In [1]:
# Imports
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.callbacks import TensorBoard, EarlyStopping
from tensorflow import keras
import tensorflow as tf
from scipy import stats
import pandas as pd
import datetime, os
import numpy as np

Using TensorFlow backend.


In [2]:
# Import the preprocessed file we made
print("File name: ") 
file_name = input()

dataFrame = pd.read_csv(r'data/p/' + file_name + '.csv')
dataFrame.head()

File name: 
Class_23_attributes


Unnamed: 0,LB,AC.1,FM.1,UC.1,DL.1,DS.1,DP.1,ASTV,MSTV,ALTV,...,Min,Max,Nmax,Nzeros,Mode,Mean,Median,Variance,Tendency,CLASS
0,0.259259,0.0,0.0,0.0,0.0,0.0,0.0,0.813333,0.044118,0.472527,...,0.110092,0.034483,0.111111,0.0,0.472441,0.587156,0.40367,0.271375,1.0,9
1,0.481481,0.315789,0.0,0.4,0.2,0.0,0.0,0.066667,0.279412,0.0,...,0.165138,0.655172,0.333333,0.1,0.637795,0.577982,0.577982,0.04461,0.5,6
2,0.5,0.157895,0.0,0.533333,0.2,0.0,0.0,0.053333,0.279412,0.0,...,0.165138,0.655172,0.277778,0.1,0.637795,0.568807,0.559633,0.048327,0.5,6
3,0.518519,0.157895,0.0,0.533333,0.2,0.0,0.0,0.053333,0.323529,0.0,...,0.027523,0.413793,0.611111,0.0,0.606299,0.559633,0.550459,0.048327,1.0,6
4,0.481481,0.368421,0.0,0.533333,0.0,0.0,0.0,0.053333,0.323529,0.0,...,0.027523,0.413793,0.5,0.0,0.606299,0.577982,0.559633,0.040892,1.0,2


In [3]:
num_features = dataFrame.shape[1] - 1
print("We have " + str(num_features) + " features") 

We have 21 features


In [4]:
target_name = input()
y = dataFrame.pop(target_name) - 1
X = np.array(dataFrame)
X

NSP


array([[0.        , 0.        , 0.        , ..., 0.40366972, 0.27137546,
        1.        ],
       [0.31578947, 0.        , 0.4       , ..., 0.57798165, 0.04460967,
        0.5       ],
       [0.15789474, 0.        , 0.53333333, ..., 0.55963303, 0.04832714,
        0.5       ],
       ...,
       [0.05263158, 0.        , 0.46666667, ..., 0.68807339, 0.01486989,
        1.        ],
       [0.05263158, 0.        , 0.4       , ..., 0.67889908, 0.01486989,
        1.        ],
       [0.10526316, 0.004158  , 0.53333333, ..., 0.62385321, 0.00371747,
        0.5       ]])

In [5]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder


This model will be used for the NSP classification

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, shuffle=True)

# print(X.shape)
# print(X_train.shape)
# print(X_val.shape)
# print(X_test.shape)

# binary encode
y_train = keras.utils.to_categorical(y_train)
y_val = keras.utils.to_categorical(y_val)
# print(y_train.shape)
# y_train


from keras.regularizers import l1,l2

model = keras.Sequential([
    keras.layers.Dense(20, input_dim=num_features, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(10, 
                       activation='relu', 
                       kernel_regularizer=l2(0.01)),
    keras.layers.Dense(10, activation='relu', activity_regularizer=l1(0.01)),
    keras.layers.Dense(5, activation='relu'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(3, activation='softmax')
])


opt = keras.optimizers.RMSprop(learning_rate=0.0005, rho=0.9)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])


# Tensorboard stuff
log_dir = os.path.join(
    "logs",
    datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    )

# Callbacks
tensorboard_callback = TensorBoard(log_dir, histogram_freq=1)
earlystop_callback = EarlyStopping( monitor='val_loss', 
                                    min_delta=0.0001,
                                    mode='min',
                                    patience=5
                                  )

# Clear any logs from previous runs
#!rm -rf ./logs/ 

# Train the model
history = model.fit(x=X_train,
          y=y_train,
          epochs=1000,
          batch_size=50,
          validation_data=(X_val, y_val),
          callbacks=[tensorboard_callback,earlystop_callback],
          verbose=0)

In [27]:
y_pred = model.predict_classes(X_test)

# accuracy: (tp + tn) / (p + n)
accuracy = accuracy_score(y_test, y_pred)
# precision tp / (tp + fp)
precision = np.around(precision_score(y_test, y_pred, average=None), decimals=3)
# recall: tp / (tp + fn)
recall = np.around(recall_score(y_test, y_pred, average=None), decimals=3)
# f1: 2 tp / (2 tp + fp + fn)
f1 = np.around(f1_score(y_test, y_pred, average=None), decimals=3)

model.summary()
print("\n")
print("-"*18)
print('Accuracy: %f' % accuracy)
print("-"*40)
print("| Class:\t  N\t  S\t  P\t|")
print("| Precision:\t", precision[0], "\t", precision[1], "\t", precision[2], "\t|")
print("| Recall:\t", recall[0], "\t", recall[1], "\t", recall[2], "\t|")
print("| F1:\t\t", f1[0], "\t", f1[1], "\t", f1[2], "\t|")
print("-"*40)
#del model

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_45 (Dense)             (None, 20)                420       
_________________________________________________________________
batch_normalization_9 (Batch (None, 20)                80        
_________________________________________________________________
dense_46 (Dense)             (None, 10)                210       
_________________________________________________________________
dense_47 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_48 (Dense)             (None, 5)                 55        
_________________________________________________________________
dropout_9 (Dropout)          (None, 5)                 0         
_________________________________________________________________
dense_49 (Dense)             (None, 3)                

In [None]:
del model

In [None]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard
%tensorboard --logdir logs/

In [None]:
y_pred


In [12]:
model.save('models/' + input() + '.h5')

Overall_best
