In [24]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.utils import class_weight
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

In [71]:
print('Build model...')
print('Load training data...')
df_x_train = pd.read_csv('X_train.csv', header=0, index_col = 0)
df_y_train = pd.read_csv('y_train.csv', header=0, index_col = 0)

x_train, x_test, y_train, y_test = train_test_split(df_x_train, df_y_train, test_size=0.2, random_state=0)

print(y_train)

Build model...
Load training data...
      y
id     
70    1
4347  0
1122  2
4570  1
34    1
...  ..
1033  1
3264  1
1653  1
2607  1
2732  1

[3840 rows x 1 columns]


In [72]:
scaler_norm = MinMaxScaler(feature_range=(-1,1))
scaler_std = StandardScaler()

print('Splitting into training and validation dataset')
X_train = x_train.values
Y_train = y_train['y'].values
X_val = x_test.values
Y_val = y_test['y'].values

X_train = scaler_std.fit_transform(X_train)
X_val = scaler_std.fit_transform(X_val)

X_train = scaler_norm.fit_transform(X_train)
X_val = scaler_norm.fit_transform(X_val)



print('Load testing data...')
df_x_test = pd.read_csv('X_test.csv', header=0, index_col = 0)
X_test = df_x_test.values

X_test = scaler_std.fit_transform(X_test)

X_test = scaler_norm.fit_transform(X_test)

print(X_test)

Splitting into training and validation dataset
Load testing data...
[[ 0.25453034 -0.41639705 -0.09564329 ...  0.33162711 -0.14751304
  -0.16357612]
 [ 0.45045407  0.10414288  0.07623589 ... -0.37743754 -0.07508129
  -0.39041089]
 [ 0.13467647 -0.314837    0.12580921 ...  0.20310091 -0.00304303
  -0.09356141]
 ...
 [-0.06681622 -0.07060448  0.03381337 ... -0.05593252  0.05656129
   0.09909006]
 [-0.02609598 -0.00959005  0.36905202 ... -0.50208144 -0.27695447
  -0.06519337]
 [ 0.24281811  0.18204007  0.20235135 ... -0.5072581  -0.21881468
  -0.29582945]]


In [73]:
# # encode class values as integers
# encoder = LabelEncoder()
# encoder.fit(y)
# encoded_Y = encoder.transform(y)
# # convert integers to dummy variables (i.e. one hot encoded)
# dummy_y = np_utils.to_categorical(encoded_Y)

# onehotencoder = OneHotEncoder(categorical_features = [0])
# x = onehotencoder.fit_transform(x).toarray()


In [74]:
# create model

model = Sequential()
model.add(Dense(1024, input_dim=1000, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2014, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1024, activation='relu'))
model.add(Dense(1024, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(3, activation='softmax'))
# Compile model

model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='adam', metrics=['accuracy'],
              weighted_metrics=['accuracy'])
earlier = EarlyStopping(monitor = 'val_accuracy_1',mode='max',min_delta=1e-3,patience=50,verbose=1)
checkpointer = ModelCheckpoint(filepath='best.hdf5',verbose=1, save_best_only=True, monitor='val_accuracy_1', mode='max')



In [75]:
cls_weights = class_weight.compute_class_weight('balanced', np.unique(Y_train), 
                                                Y_train)
cls_weight_dict = {0: cls_weights[0], 1: cls_weights[1], 2: cls_weights[2]}
val_sample_weights = class_weight.compute_sample_weight(cls_weight_dict, Y_val)


In [None]:

model.fit(X_train, Y_train, epochs=200, batch_size=100, validation_data=(X_val, Y_val,val_sample_weights), callbacks=[earlier, checkpointer])



Train on 3840 samples, validate on 960 samples
Epoch 1/200
Epoch 00001: val_accuracy_1 improved from -inf to 0.56793, saving model to best.hdf5
Epoch 2/200
Epoch 00002: val_accuracy_1 did not improve from 0.56793
Epoch 3/200
Epoch 00003: val_accuracy_1 did not improve from 0.56793
Epoch 4/200
Epoch 00004: val_accuracy_1 did not improve from 0.56793
Epoch 5/200
Epoch 00005: val_accuracy_1 improved from 0.56793 to 0.57031, saving model to best.hdf5
Epoch 6/200
Epoch 00006: val_accuracy_1 did not improve from 0.57031
Epoch 7/200
Epoch 00007: val_accuracy_1 improved from 0.57031 to 0.57366, saving model to best.hdf5
Epoch 8/200
Epoch 00008: val_accuracy_1 improved from 0.57366 to 0.67527, saving model to best.hdf5
Epoch 9/200
Epoch 00009: val_accuracy_1 did not improve from 0.67527
Epoch 10/200
Epoch 00010: val_accuracy_1 did not improve from 0.67527
Epoch 11/200
Epoch 00011: val_accuracy_1 did not improve from 0.67527
Epoch 12/200
Epoch 00012: val_accuracy_1 did not improve from 0.67527
E

Epoch 23/200
Epoch 00023: val_accuracy_1 did not improve from 0.69147
Epoch 24/200
Epoch 00024: val_accuracy_1 did not improve from 0.69147
Epoch 25/200
Epoch 00025: val_accuracy_1 did not improve from 0.69147
Epoch 26/200
Epoch 00026: val_accuracy_1 did not improve from 0.69147
Epoch 27/200
Epoch 00027: val_accuracy_1 did not improve from 0.69147
Epoch 28/200
Epoch 00028: val_accuracy_1 did not improve from 0.69147
Epoch 29/200
Epoch 00029: val_accuracy_1 did not improve from 0.69147
Epoch 30/200
Epoch 00030: val_accuracy_1 did not improve from 0.69147
Epoch 31/200
Epoch 00031: val_accuracy_1 did not improve from 0.69147
Epoch 32/200
Epoch 00032: val_accuracy_1 improved from 0.69147 to 0.69206, saving model to best.hdf5
Epoch 33/200
Epoch 00033: val_accuracy_1 did not improve from 0.69206
Epoch 34/200
Epoch 00034: val_accuracy_1 improved from 0.69206 to 0.69234, saving model to best.hdf5
Epoch 35/200
Epoch 00035: val_accuracy_1 did not improve from 0.69234
Epoch 36/200
Epoch 00036: va

Epoch 46/200
Epoch 00046: val_accuracy_1 did not improve from 0.70035
Epoch 47/200
Epoch 00047: val_accuracy_1 did not improve from 0.70035
Epoch 48/200
Epoch 00048: val_accuracy_1 did not improve from 0.70035
Epoch 49/200
Epoch 00049: val_accuracy_1 did not improve from 0.70035
Epoch 50/200
Epoch 00050: val_accuracy_1 did not improve from 0.70035
Epoch 51/200
Epoch 00051: val_accuracy_1 did not improve from 0.70035
Epoch 52/200
Epoch 00052: val_accuracy_1 did not improve from 0.70035
Epoch 53/200
Epoch 00053: val_accuracy_1 did not improve from 0.70035
Epoch 54/200
Epoch 00054: val_accuracy_1 did not improve from 0.70035
Epoch 55/200
Epoch 00055: val_accuracy_1 did not improve from 0.70035
Epoch 56/200
Epoch 00056: val_accuracy_1 did not improve from 0.70035
Epoch 57/200
Epoch 00057: val_accuracy_1 did not improve from 0.70035
Epoch 58/200
Epoch 00058: val_accuracy_1 did not improve from 0.70035
Epoch 59/200
Epoch 00059: val_accuracy_1 did not improve from 0.70035
Epoch 60/200
Epoch 0

Epoch 69/200
Epoch 00069: val_accuracy_1 did not improve from 0.71086
Epoch 70/200
Epoch 00070: val_accuracy_1 did not improve from 0.71086
Epoch 71/200
Epoch 00071: val_accuracy_1 did not improve from 0.71086
Epoch 72/200
Epoch 00072: val_accuracy_1 did not improve from 0.71086
Epoch 73/200
Epoch 00073: val_accuracy_1 did not improve from 0.71086
Epoch 74/200
Epoch 00074: val_accuracy_1 did not improve from 0.71086
Epoch 75/200
Epoch 00075: val_accuracy_1 did not improve from 0.71086
Epoch 76/200
Epoch 00076: val_accuracy_1 did not improve from 0.71086
Epoch 77/200
Epoch 00077: val_accuracy_1 did not improve from 0.71086
Epoch 78/200
Epoch 00078: val_accuracy_1 did not improve from 0.71086
Epoch 79/200
Epoch 00079: val_accuracy_1 did not improve from 0.71086
Epoch 80/200
Epoch 00080: val_accuracy_1 did not improve from 0.71086
Epoch 81/200
Epoch 00081: val_accuracy_1 did not improve from 0.71086
Epoch 82/200
Epoch 00082: val_accuracy_1 did not improve from 0.71086
Epoch 83/200

In [67]:
model.load_weights('best.hdf5')
Y_test = model.predict_classes(X_test)
print(Y_test)


f = open("best.csv", "w")
f.write("id,y\n")
for i,x in enumerate(Y_test):
    f.write("{},{}\n".format(i,x))
f.close()
        

[1 0 1 ... 1 0 0]


In [None]:

from sklearn.metrics import balanced_accuracy_score
BMAC = balanced_accuracy_score(y_true, y_pred)