In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import roc_auc_score, mean_squared_error, r2_score, accuracy_score, confusion_matrix, classification_report, plot_confusion_matrix, make_scorer
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import ShuffleSplit
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler 
import sys
sys.path.insert(0, '../src/features/')
from custom_metric import *


pd.set_option("display.max.columns", None)

In [None]:
#read in the data

data = pd.read_csv('../data/clean_data.csv')
data.columns

## Neural Network (MLP) 1 - Multiclass
#### Features = 'capacity', 'vib1', 'vib2', 'amp_uni', 'mic1', 'mic2'
#### Value = 'type_cat'

In [None]:
drop_list = ['type', 'capacity_cat', 'pump_cat',
                'anomaly', 'anomaly_cat','anomaly_binary', 'datetime', 'date', 'time',
                'vib1_x', 'vib1_y', 'vib1_z', 'vib2_x', 'vib2_y', 'vib2_z', 'amp1', 'amp2', 'pump', 'anomaly_binary_cat']

value = 'type_cat'

In [None]:
#Version 1 - let's try the default.

#drop the columns we don't need
df = data.drop(drop_list, axis=1)
    
#define features and value
X = df.drop(value, axis=1)
y = df[value]
    
    
#create train and test datasets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.20, random_state = 100)

    
#train the model
model = MLPClassifier(random_state = 100, max_iter=800)
    
model.fit(X_train,y_train)
    
    
#calculate accuracy
predict_train = model.predict(X_train)
predict_val = model.predict(X_val)
accuracy_train = accuracy_score(y_train,predict_train)
print("Accuracies")
print('accuracy_score on train dataset : ', accuracy_train)
accuracy_val = accuracy_score(y_val,predict_val)
print('accuracy_score on test dataset : ', accuracy_val)
print('\n')
    
    
#print classification report
print('Classification report')
predict_val = model.predict(X_val)
print(classification_report(y_val, predict_val))
print('\n')
    
#print normalized confusion matrix   
matrix = plot_confusion_matrix(model, X_val, y_val, cmap=plt.cm.Blues, normalize='true')
matrix.ax_.set_title('Confusion Matrix')
fig = plt.gcf()
fig.set_size_inches(7, 7)
plt.show()


In [None]:
#Version 2 - let's try data scaling

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.20, random_state = 100)

scaler = StandardScaler()  

scaler.fit(X_train) 

X_train = scaler.transform(X_train)  
X_val = scaler.transform(X_val)


#train the model
model2 = MLPClassifier(random_state = 100, max_iter=1500)
    
model2.fit(X_train,y_train)
    
    
#calculate accuracy
predict_train = model2.predict(X_train)
predict_val = model2.predict(X_val)
accuracy_train = accuracy_score(y_train,predict_train)
print("Accuracies")
print('accuracy_score on train dataset : ', accuracy_train)
accuracy_val = accuracy_score(y_val,predict_val)
print('accuracy_score on test dataset : ', accuracy_val)
print('\n')
    
    
#print classification report
print('Classification report')
print(classification_report(y_val, predict_val))
print('\n')
    
#print normalized confusion matrix   
matrix = plot_confusion_matrix(model2, X_val, y_val, cmap=plt.cm.Blues, normalize='true')
matrix.ax_.set_title('Confusion Matrix')
fig = plt.gcf()
fig.set_size_inches(7, 7)
plt.show()



In [None]:
model2.get_params()

In [None]:
#model hypertuning

solver = ['lbfgs', 'sgd', 'adam']

alpha = [0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 10]

learning_rate = ['constant', 'invscaling', 'adaptive']


# Create the grid
search_params = {'solver': solver,
                 'alpha': alpha,
                 'learning_rate': learning_rate}

cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=100)

#perform model hypertuning

model3 = GridSearchCV(model2, search_params, cv=cv, n_jobs=-1, scoring=make_scorer(cedric_metric_nonbinary, greater_is_better=True))

model3.fit(X_train, y_train)

best_params = model3.best_params_

print('Best Parameters: ', model3.best_params_)

print('Best score: ', model3.best_score_)

In [None]:
#train the hypetuned model

X = scaler.transform(X)

model4 = MLPClassifier(random_state = 100, max_iter=1500, alpha=0.1, learning_rate='constant', solver='lbfgs')
model4.fit(X_train, y_train)

#perform cross validation
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=100)

scores = cross_val_score(model4, X, y, cv=cv, n_jobs=-1)
print('Cross Validation accuracy scores: %s' % scores)
print('Cross Validation accuracy: %.3f +/- %.3f' % (np.mean(scores),np.std(scores)))
print('\n')

scores = cross_val_score(model4, X, y, cv=cv, n_jobs=-1, scoring=make_scorer(cedric_metric_nonbinary, greater_is_better=True))
print('Cross Validation custom metric scores: %s' % scores)
print('Cross Validation custom metric: %.3f +/- %.3f' % (np.mean(scores),np.std(scores)))

In [None]:
#print classification report
print('Classification report')
predict_val = model4.predict(X_val)
print(classification_report(y_val, predict_val))
print('\n')
    
#print normalized confusion matrix   
matrix = plot_confusion_matrix(model4, X_val, y_val, cmap=plt.cm.Blues, normalize='true')
matrix.ax_.set_title('Confusion Matrix')
fig = plt.gcf()
fig.set_size_inches(7, 7)
plt.show()

## Neural Network (MLP) 2 - Binary
#### Features = 'capacity', 'vib1', 'vib2', 'amp_uni', 'mic1', 'mic2'
#### Value = 'anomaly_binary'

In [None]:
drop_list = ['type', 'capacity_cat', 'pump_cat',
                'anomaly', 'anomaly_cat','type_cat', 'datetime', 'date', 'time',
                'vib1_x', 'vib1_y', 'vib1_z', 'vib2_x', 'vib2_y', 'vib2_z', 'amp1', 'amp2', 'pump', 'anomaly_binary_cat']


value = 'anomaly_binary'

In [None]:
#drop the columns we don't need
df = data.drop(drop_list, axis=1)
    
#define features and value
X = df.drop(value, axis=1)
y = df[value]
    
    
#create train and test datasets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.20, random_state = 100)

scaler.fit(X_train) 

X_train = scaler.transform(X_train)  
X_val = scaler.transform(X_val)
X = scaler.transform(X)


#train the model
model5 = MLPClassifier(random_state = 100, max_iter=1500)
    
model5.fit(X_train,y_train)
    
    
#calculate accuracy
predict_train = model5.predict(X_train)
predict_val = model5.predict(X_val)
accuracy_train = accuracy_score(y_train,predict_train)
print("Accuracies")
print('accuracy_score on train dataset : ', accuracy_train)
accuracy_val = accuracy_score(y_val,predict_val)
print('accuracy_score on test dataset : ', accuracy_val)
print('\n')
    
    
#print classification report
print('Classification report')
print(classification_report(y_val, predict_val))
print('\n')
    
#print normalized confusion matrix   
matrix = plot_confusion_matrix(model5, X_val, y_val, cmap=plt.cm.Blues, normalize='true')
matrix.ax_.set_title('Confusion Matrix')
fig = plt.gcf()
fig.set_size_inches(7, 7)
plt.show()




In [None]:
#perform cross validation
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=100)

scores = cross_val_score(model5, X, y, cv=cv, n_jobs=-1)
print('Cross Validation accuracy scores: %s' % scores)
print('Cross Validation accuracy: %.3f +/- %.3f' % (np.mean(scores),np.std(scores)))
print('\n')

scores = cross_val_score(model5, X, y, cv=cv, n_jobs=-1, scoring=make_scorer(cedric_metric_binary, greater_is_better=True))
print('Cross Validation custom metric scores: %s' % scores)
print('Cross Validation custom metric: %.3f +/- %.3f' % (np.mean(scores),np.std(scores)))

In [None]:
#model hypertuning

solver = ['lbfgs', 'sgd', 'adam']

alpha = [0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 10]

learning_rate = ['constant', 'invscaling', 'adaptive']


# Create the grid
search_params = {'solver': solver,
                 'alpha': alpha,
                 'learning_rate': learning_rate}

cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=100)

#perform model hypertuning

model6 = GridSearchCV(model5, search_params, cv=cv, n_jobs=-1, scoring=make_scorer(cedric_metric_binary, greater_is_better=True))

model6.fit(X_train, y_train)

best_params = model6.best_params_

print('Best Parameters: ', model6.best_params_)

print('Best score: ', model6.best_score_)

In [None]:
#train the hypetuned model

model7 = MLPClassifier(random_state = 100, max_iter=1500, alpha=0.1, learning_rate='constant', solver='lbfgs')
model7.fit(X_train, y_train)

#perform cross validation
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=100)

scores = cross_val_score(model7, X, y, cv=cv, n_jobs=-1)
print('Cross Validation accuracy scores: %s' % scores)
print('Cross Validation accuracy: %.3f +/- %.3f' % (np.mean(scores),np.std(scores)))
print('\n')

scores = cross_val_score(model7, X, y, cv=cv, n_jobs=-1, scoring=make_scorer(cedric_metric_binary, greater_is_better=True))
print('Cross Validation custom metric scores: %s' % scores)
print('Cross Validation custom metric: %.3f +/- %.3f' % (np.mean(scores),np.std(scores)))

In [None]:
#print classification report
print('Classification report')
predict_val = model7.predict(X_val)
print(classification_report(y_val, predict_val))
print('\n')
    
#print normalized confusion matrix   
matrix = plot_confusion_matrix(model7, X_val, y_val, cmap=plt.cm.Blues, normalize='true')
matrix.ax_.set_title('Confusion Matrix')
fig = plt.gcf()
fig.set_size_inches(7, 7)
plt.show()