In [26]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)

#remove low variance features
from sklearn.feature_selection import VarianceThreshold
selection = VarianceThreshold(threshold=(0.1))
X = selection.fit_transform(X)
X.shape


(150, 4)

In [27]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42) 
X_train.shape, X_test.shape



((120, 4), (30, 4))

In [28]:
#K nearest neighbors model

knn = KNeighborsClassifier(3)
knn.fit(X_train, y_train)

y_train_pred = knn.predict(X_train)
y_test_pred = knn.predict(X_test)

#calcualte training performance

#calculate accuracy
knn_train_accuracy = accuracy_score(y_train, y_train_pred)
#calculate MCC
knn_train_mcc = matthews_corrcoef(y_train, y_train_pred)
#calculate F1-score
knn_train_F1 = f1_score(y_train,y_train_pred, average='weighted')


#calcualte test performance
#calculate accuracy
knn_test_accuracy = accuracy_score(y_test, y_test_pred)
#calculate MCC
knn_test_mcc = matthews_corrcoef(y_test, y_test_pred)
#calculate F1-score
knn_test_F1 = f1_score(y_test, y_test_pred, average='weighted')

#print results
print('Model Performance for Training Data')
print('1. Accuracy: %s' %knn_train_accuracy)
print('2. MCC: %s' %knn_train_mcc)
print('3. F1_score: %s' %knn_train_F1)
print('- - - - - - - - - - - - -')
print('Model Performance for Test Data')
print('1. Accuracy: %s' %knn_test_accuracy)
print('2. MCC: %s' %knn_test_mcc)
print('3. F1_score: %s' %knn_test_F1)


Model Performance for Training Data
1. Accuracy: 0.9583333333333334
2. MCC: 0.9375976715114386
3. F1_score: 0.9583268218992551
- - - - - - - - - - - - -
Model Performance for Test Data
1. Accuracy: 1.0
2. MCC: 1.0
3. F1_score: 1.0


In [29]:
#Support Vector Machine

from sklearn.svm import SVC

svm_rbf = SVC(gamma=2, C=1)
svm_rbf.fit(X_train, y_train)


y_train_pred = svm_rbf.predict(X_train)
y_test_pred = svm_rbf.predict(X_test)

#calcualte training performance

#calculate accuracy
svm_rbf_train_accuracy = accuracy_score(y_train, y_train_pred)
#calculate MCC
svm_rbf_train_mcc = matthews_corrcoef(y_train, y_train_pred)
#calculate F1-score
svm_rbf_train_F1 = f1_score(y_train,y_train_pred, average='weighted')


#calcualte test performance

#calculate accuracy
svm_rbf_test_accuracy = accuracy_score(y_test, y_test_pred)
#calculate MCC
svm_rbf_test_mcc = matthews_corrcoef(y_test, y_test_pred)
#calculate F1-score
svm_rbf_test_F1 = f1_score(y_test, y_test_pred, average='weighted')

#print results
print('Model Performance for Training Data')
print('1. Accuracy: %s' %svm_rbf_train_accuracy)
print('2. MCC: %s' %svm_rbf_train_mcc)
print('3. F1_score: %s' %svm_rbf_train_F1)
print('- - - - - - - - - - - - -')
print('Model Performance for Test Data')
print('1. Accuracy: %s' %svm_rbf_test_accuracy)
print('2. MCC: %s' %svm_rbf_test_mcc)
print('3. F1_score: %s' %svm_rbf_test_F1)


Model Performance for Training Data
1. Accuracy: 0.9916666666666667
2. MCC: 0.9876028806587153
3. F1_score: 0.9916653643798509
- - - - - - - - - - - - -
Model Performance for Test Data
1. Accuracy: 0.9666666666666667
2. MCC: 0.9515873026942034
3. F1_score: 0.9665831244778613


In [30]:
#Decision Tree Model

from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(max_depth=5)
dt.fit(X_train, y_train)

y_train_pred = dt.predict(X_train)
y_test_pred = dt.predict(X_test)


#calcualte training performance

#calculate accuracy
dt_train_accuracy = accuracy_score(y_train, y_train_pred)
#calculate MCC
dt_train_mcc = matthews_corrcoef(y_train, y_train_pred)
#calculate F1-score
dt_train_F1 = f1_score(y_train,y_train_pred, average='weighted')


#calcualte test performance

#calculate accuracy
dt_test_accuracy = accuracy_score(y_test, y_test_pred)
#calculate MCC
dt_test_mcc = matthews_corrcoef(y_test, y_test_pred)
#calculate F1-score
dt_test_F1 = f1_score(y_test, y_test_pred, average='weighted')

#print results
print('Model Performance for Training Data')
print('1. Accuracy: %s' %dt_train_accuracy)
print('2. MCC: %s' %dt_train_mcc)
print('3. F1_score: %s' %dt_train_F1)
print('- - - - - - - - - - - - -')
print('Model Performance for Test Data')
print('1. Accuracy: %s' %dt_test_accuracy)
print('2. MCC: %s' %dt_test_mcc)
print('3. F1_score: %s' %dt_test_F1)

Model Performance for Training Data
1. Accuracy: 1.0
2. MCC: 1.0
3. F1_score: 1.0
- - - - - - - - - - - - -
Model Performance for Test Data
1. Accuracy: 0.9666666666666667
2. MCC: 0.9515873026942034
3. F1_score: 0.9665831244778613


In [31]:
#Random forest

from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=10)
rf.fit(X_train, y_train)

y_train_pred = rf.predict(X_train)
y_test_pred = rf.predict(X_test)

#calcualte training performance

#calculate accuracy
rf_train_accuracy = accuracy_score(y_train, y_train_pred)
#calculate MCC
rf_train_mcc = matthews_corrcoef(y_train, y_train_pred)
#calculate F1-score
rf_train_F1 = f1_score(y_train,y_train_pred, average='weighted')


#calcualte test performance

#calculate accuracy
rf_test_accuracy = accuracy_score(y_test, y_test_pred)
#calculate MCC
rf_test_mcc = matthews_corrcoef(y_test, y_test_pred)
#calculate F1-score
rf_test_F1 = f1_score(y_test, y_test_pred, average='weighted')

#print results
print('Model Performance for Training Data')
print('1. Accuracy: %s' %rf_train_accuracy)
print('2. MCC: %s' %rf_train_mcc)
print('3. F1_score: %s' %rf_train_F1)
print('- - - - - - - - - - - - -')
print('Model Performance for Test Data')
print('1. Accuracy: %s' %rf_test_accuracy)
print('2. MCC: %s' %rf_test_mcc)
print('3. F1_score: %s' %rf_test_F1)


Model Performance for Training Data
1. Accuracy: 1.0
2. MCC: 1.0
3. F1_score: 1.0
- - - - - - - - - - - - -
Model Performance for Test Data
1. Accuracy: 0.9333333333333333
2. MCC: 0.9
3. F1_score: 0.9333333333333333


In [32]:
#Neural Network

from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(alpha=1, max_iter = 1000)
mlp.fit(X_train, y_train)

#Predictions
y_train_pred = mlp.predict(X_train)
y_test_pred = mlp.predict(X_test)

#calcualte training performance

#calculate accuracy
mlp_train_accuracy = accuracy_score(y_train, y_train_pred)
#calculate MCC
mlp_train_mcc = matthews_corrcoef(y_train, y_train_pred)
#calculate F1-score
mlp_train_F1 = f1_score(y_train,y_train_pred, average='weighted')


#calcualte test performance

#calculate accuracy
mlp_test_accuracy = accuracy_score(y_test, y_test_pred)
#calculate MCC
mlp_test_mcc = matthews_corrcoef(y_test, y_test_pred)
#calculate F1-score
mlp_test_F1 = f1_score(y_test, y_test_pred, average='weighted')

#print results
print('Model Performance for Training Data')
print('1. Accuracy: %s' %mlp_train_accuracy)
print('2. MCC: %s' %mlp_train_mcc)
print('3. F1_score: %s' %mlp_train_F1)
print('- - - - - - - - - - - - -')
print('Model Performance for Test Data')
print('1. Accuracy: %s' %mlp_test_accuracy)
print('2. MCC: %s' %mlp_test_mcc)
print('3. F1_score: %s' %mlp_test_F1)


Model Performance for Training Data
1. Accuracy: 0.9833333333333333
2. MCC: 0.9754065040827025
3. F1_score: 0.9833229101521785
- - - - - - - - - - - - -
Model Performance for Test Data
1. Accuracy: 1.0
2. MCC: 1.0
3. F1_score: 1.0


In [33]:
#Stacked Models

#Define estimators
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression

estimator_list = [
    ('knn', knn),
    ('svm_rbf', svm_rbf),
    ('dt',dt),
    ('rf',rf),
    ('mlp',mlp)
]

#stacked model
stack_model = StackingClassifier(estimators=estimator_list, final_estimator=LogisticRegression())


#fit model
stack_model.fit(X_train,y_train)

#Predictions
y_train_pred = stack_model.predict(X_train)
y_test_pred = stack_model.predict(X_test)

#calcualte training performance

#calculate accuracy
stack_model_train_accuracy = accuracy_score(y_train, y_train_pred)
#calculate MCC
stack_model_train_mcc = matthews_corrcoef(y_train, y_train_pred)
#calculate F1-score
stack_model_train_F1 = f1_score(y_train,y_train_pred, average='weighted')


#calcualte test performance

#calculate accuracy
stack_model_test_accuracy = accuracy_score(y_test, y_test_pred)
#calculate MCC
stack_model_test_mcc = matthews_corrcoef(y_test, y_test_pred)
#calculate F1-score
stack_model_test_F1 = f1_score(y_test, y_test_pred, average='weighted')

#print results
print('Model Performance for Training Data')
print('1. Accuracy: %s' %stack_model_train_accuracy)
print('2. MCC: %s' %stack_model_train_mcc)
print('3. F1_score: %s' %stack_model_train_F1)
print('- - - - - - - - - - - - -')
print('Model Performance for Test Data')
print('1. Accuracy: %s' %stack_model_test_accuracy)
print('2. MCC: %s' %stack_model_test_mcc)
print('3. F1_score: %s' %stack_model_test_F1)

Model Performance for Training Data
1. Accuracy: 0.9916666666666667
2. MCC: 0.9876028806587153
3. F1_score: 0.9916653643798509
- - - - - - - - - - - - -
Model Performance for Test Data
1. Accuracy: 1.0
2. MCC: 1.0
3. F1_score: 1.0


In [34]:
#Training Performance Scores

accuracy_train_list = {'knn': knn_train_accuracy,
                       'svm_rbf': svm_rbf_train_accuracy,
                       'dt': dt_train_accuracy,
                       'rf': rf_train_accuracy,
                       'mlp': mlp_train_accuracy,
                       'stack': stack_model_train_accuracy
                      }

mcc_train_list = {'knn': knn_train_mcc,
                       'svm_rbf': svm_rbf_train_mcc,
                       'dt': dt_train_mcc,
                       'rf': rf_train_mcc,
                       'mlp': mlp_train_mcc,
                       'stack': stack_model_train_mcc
                      }

F1_train_list = {'knn': knn_train_F1,
                       'svm_rbf': svm_rbf_train_F1,
                       'dt': dt_train_F1,
                       'rf': rf_train_F1,
                       'mlp': mlp_train_F1,
                       'stack': stack_model_train_F1
                      }

In [35]:
import pandas as pd

accuracy_df = pd.DataFrame.from_dict(accuracy_train_list, orient='index', columns=['Accuracy'])
mcc_df = pd.DataFrame.from_dict(mcc_train_list, orient='index', columns=['MCC'])
F1_df = pd.DataFrame.from_dict(F1_train_list, orient='index', columns=['F1'])
df = pd.concat([accuracy_df, mcc_df, F1_df], axis=1)
df = df.style.set_caption('Training Performance Scores')
df

Unnamed: 0,Accuracy,MCC,F1
knn,0.958333,0.937598,0.958327
svm_rbf,0.991667,0.987603,0.991665
dt,1.0,1.0,1.0
rf,1.0,1.0,1.0
mlp,0.983333,0.975407,0.983323
stack,0.991667,0.987603,0.991665


In [36]:
#Test Performance Scores

accuracy_test_list = {'knn': knn_test_accuracy,
                       'svm_rbf': svm_rbf_test_accuracy,
                       'dt': dt_test_accuracy,
                       'rf': rf_test_accuracy,
                       'mlp': mlp_test_accuracy,
                       'stack': stack_model_test_accuracy
                      }

mcc_test_list = {'knn': knn_test_mcc,
                       'svm_rbf': svm_rbf_test_mcc,
                       'dt': dt_test_mcc,
                       'rf': rf_test_mcc,
                       'mlp': mlp_test_mcc,
                       'stack': stack_model_test_mcc
                      }

F1_test_list = {'knn': knn_test_F1,
                       'svm_rbf': svm_rbf_test_F1,
                       'dt': dt_test_F1,
                       'rf': rf_test_F1,
                       'mlp': mlp_test_F1,
                       'stack': stack_model_test_F1
                      }

In [39]:
accuracy_df = pd.DataFrame.from_dict(accuracy_test_list, orient='index', columns=['Accuracy'])
mcc_df = pd.DataFrame.from_dict(mcc_test_list, orient='index', columns=['MCC'])
F1_df = pd.DataFrame.from_dict(F1_test_list, orient='index', columns=['F1'])
df = pd.concat([accuracy_df, mcc_df, F1_df], axis=1)
df = df.style.set_caption('Testing Performance Scores')
df

Unnamed: 0,Accuracy,MCC,F1
knn,1.0,1.0,1.0
svm_rbf,0.966667,0.951587,0.966583
dt,0.966667,0.951587,0.966583
rf,0.933333,0.9,0.933333
mlp,1.0,1.0,1.0
stack,1.0,1.0,1.0
