In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
df=pd.read_csv('hypodata_numeric_scaled.csv')

In [4]:
df.head()

Unnamed: 0,age,TSH,T3,TT4,T4U,FTI,classes
0,0.430108,0.002709,0.232227,0.287383,0.458564,0.272265,0
1,0.236559,0.008567,0.184834,0.233645,0.38011,0.276814,0
2,0.483871,0.00204,0.187204,0.25,0.331492,0.300254,0
3,0.741935,0.000324,0.175355,0.404206,0.38011,0.276814,0
4,0.741935,0.001496,0.109005,0.13785,0.309392,0.173028,0


In [5]:
df.columns

Index(['age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI', 'classes'], dtype='object')

In [6]:
X=df[['age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI']]

In [7]:
y=df['classes']

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
X_train, X_cv, y_train, y_cv = train_test_split(X, y, test_size=0.33, random_state=101)

In [10]:
from sklearn.neural_network import MLPClassifier

In [11]:
clf = MLPClassifier(solver='lbfgs', alpha=0.01, hidden_layer_sizes=(8,8), random_state=101)

In [12]:
nn_model=clf.fit(X_train, y_train)

In [13]:
pred=nn_model.predict(X_cv)

In [14]:
from sklearn.metrics import classification_report

In [15]:
clf.score(X_train, y_train)

0.9701492537313433

In [16]:
print("Training data confusion_matrix\n\n", classification_report(y_train, clf.predict(X_train)))

Training data confusion_matrix

              precision    recall  f1-score   support

          0       0.99      0.98      0.99      1724
          1       0.76      0.78      0.77        40
          2       0.75      0.84      0.79       110
          3       0.00      0.00      0.00         2

avg / total       0.97      0.97      0.97      1876



  'precision', 'predicted', average, warn_for)


In [17]:
print("Cross-validation data confusion_matrix\n\n", classification_report(y_cv, pred))

Cross-validation data confusion_matrix

              precision    recall  f1-score   support

          0       0.99      0.98      0.99       856
          1       0.89      0.67      0.76        24
          2       0.62      0.89      0.73        44

avg / total       0.97      0.97      0.97       924



In [18]:
clf2 = MLPClassifier(solver='lbfgs', alpha=0.01, hidden_layer_sizes=(12,8), random_state=101)

In [19]:
nn_model2=clf2.fit(X_train, y_train)

In [20]:
pred2=nn_model2.predict(X_cv)

In [21]:
clf2.score(X_train, y_train)

0.9664179104477612

In [22]:
print("Training data classifier 2 confusion_matrix\n\n", classification_report(y_train, clf2.predict(X_train)))

Training data classifier 2 confusion_matrix

              precision    recall  f1-score   support

          0       0.99      0.98      0.99      1724
          1       0.73      0.55      0.63        40
          2       0.70      0.85      0.77       110
          3       0.00      0.00      0.00         2

avg / total       0.97      0.97      0.97      1876



  'precision', 'predicted', average, warn_for)


In [23]:
print("Cross-validation data classifier 2 confusion_matrix\n\n", classification_report(y_cv, pred2))
# model2 outperfoms model1 in both training and cross-validation data set with high f1-scores and other scores

Cross-validation data classifier 2 confusion_matrix

              precision    recall  f1-score   support

          0       0.99      0.98      0.99       856
          1       0.93      0.58      0.72        24
          2       0.56      0.84      0.67        44

avg / total       0.97      0.96      0.96       924



In [24]:
clf3 = MLPClassifier(solver='lbfgs', alpha=0.01, hidden_layer_sizes=(16,16), random_state=101)

In [25]:
nn_model3=clf3.fit(X_train, y_train)

In [26]:
pred3=nn_model3.predict(X_cv)

In [27]:
clf3.score(X_train, y_train)

0.9749466950959488

In [28]:
print("Training data classifier 3 confusion_matrix\n\n", classification_report(y_train, clf3.predict(X_train)))

Training data classifier 3 confusion_matrix

              precision    recall  f1-score   support

          0       0.99      0.98      0.99      1724
          1       0.80      0.90      0.85        40
          2       0.78      0.88      0.83       110
          3       0.00      0.00      0.00         2

avg / total       0.98      0.97      0.98      1876



  'precision', 'predicted', average, warn_for)


In [29]:
print("Cross-validation data classifier 3 confusion_matrix\n\n", classification_report(y_cv, pred3))
# model2 outperfoms model1 and 3 in both training and cross-validation data set with high f1-scores and other scores.

Cross-validation data classifier 3 confusion_matrix

              precision    recall  f1-score   support

          0       0.99      0.97      0.98       856
          1       0.95      0.75      0.84        24
          2       0.57      0.84      0.68        44

avg / total       0.97      0.96      0.96       924



In [30]:
## save model weights 
W0 = clf.coefs_[0]

In [31]:
W1 = clf.coefs_[1]

In [32]:
W2 = clf.coefs_[2]

In [33]:
# %load classification_report_dataframe.py


# In[3]:


def classification_report_df(report_in):
    rep = report_in.split('\n')
    column_names = ['class']
    report_out =[]
    for i in rep[0].split():
        column_names.append(i)
    for row in rep[2:6]:
        report_out.append(row.split())
    if rep[6]!='':
        report_out.append(rep[6].split('      '))
    elif rep[6]=='' and rep[7]!='':
        report_out.append(rep[7].split('      '))
        
    report_df = pd.DataFrame(report_out)
    report_df.columns = column_names
    return report_df 

def f1_score_class_1_2(df, i=1):
    return (df.loc[i,"f1-score"]*df.loc[i,"support"]+df.loc[i+1,"f1-score"]*df.loc[i+1,"support"])/(df.loc[i,"support"]+df.loc[i+1,"support"])  

In [35]:
classification_report_df(classification_report(y_cv, pred3)).to_csv("Neural_network_numeric_data classifier3 confusion_matrix on cv_data.csv", index=False)

In [36]:
nn_cv3 = pd.read_csv("Neural_network_numeric_data classifier3 confusion_matrix on cv_data.csv")

In [37]:
nn_cv3
# There are no class 3 examples in cross-validation data, only 2 cases in training data.

Unnamed: 0,class,precision,recall,f1-score,support
0,0,0.99,0.97,0.98,856.0
1,1,0.95,0.75,0.84,24.0
2,2,0.57,0.84,0.68,44.0
3,,,,,
4,avg / total,0.97,0.96,0.96,924.0


In [38]:
f1_score_class_1_2(nn_cv3, i=1)

0.7364705882352941

In [39]:
# Save weights for future classification interfence 
np.savetxt('neural_network_numeric_data classification weight0.csv', W0, delimiter=',')

In [40]:
np.savetxt('neural_network_numeric_data classification weight1.csv', W1, delimiter=',')

In [41]:
np.savetxt('neural_network_numeric_data classification weight2.csv', W2, delimiter=',')

In [43]:
from sklearn.externals import joblib
joblib.dump(clf3, 'neural_network_numeric_data_model3.pkl')

['neural_network_numeric_data_model3.pkl']