In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler, RobustScaler
import warnings
warnings.simplefilter("ignore")

In [4]:
featureNames = ["seq", "mcg", "gvh", "alm", "mit", "erl", "pox", "vac", "nuc", "loc"]
yeastData = pd.read_csv("yeast.data", sep=" ", names=featureNames)
kFold = KFold(n_splits=2, shuffle=False)
yeastAttrib = yeastData.iloc[:,1:9].values  # fix column indexes
yeastTarget = yeastData["loc"].values

titles = ("GradientBoost", "KNN", "Gaussian", "Random Forest", "MLP") # add more
models = (GradientBoostingClassifier(n_estimators=100, max_features=None, max_depth=2, random_state=5),
            KNeighborsClassifier(),
            GaussianNB(),
            RandomForestClassifier(),
             MLPClassifier())
scaler = MinMaxScaler() 
scaler.fit(yeastAttrib)
yeastAttribMM = scaler.transform(yeastAttrib)
scaler = RobustScaler() 
scaler.fit(yeastAttrib)
yeastAttribRS = scaler.transform(yeastAttrib)

In [5]:
fold = 1
for train_index, test_index in kFold.split(yeastAttrib):
    print(f"------------"
          f"Fold {fold}")
    modelno = 1
    train_data, train_target = yeastAttrib[train_index], yeastTarget[train_index]
    test_data, test_target = yeastAttrib[test_index], yeastTarget[test_index]
    train_dataMM = yeastAttribMM[train_index]
    train_dataRS = yeastAttribRS[train_index]
    test_dataMM = yeastAttribMM[test_index]
    test_dataRS = yeastAttribRS[test_index]
    for model, title in zip(models, titles):
        clf = model.fit(train_data, train_target)
        prediction = clf.predict(test_data)
        print(f"{title}")
        print(classification_report(test_target, prediction))
        print(f"Confusion Matrix: \n {confusion_matrix(test_target, prediction)}")
        
        clf = model.fit(train_dataMM, train_target)
        prediction = clf.predict(test_dataMM)
        print(f"{title}")
        print(classification_report(test_target, prediction))
        print(f"Confusion Matrix: \n {confusion_matrix(test_target, prediction)}")

        clf = model.fit(train_dataRS, train_target)
        prediction = clf.predict(test_dataRS)
        print(f"{title}")
        print(classification_report(test_target, prediction))
        print(f"Confusion Matrix: \n {confusion_matrix(test_target, prediction)}")
    fold += 1

------------Fold 1
GradientBoost
             precision    recall  f1-score   support

        CYT       0.45      0.61      0.52       198
        ERL       0.00      0.00      0.00         5
        EXC       0.43      0.32      0.36        19
        ME1       0.52      0.65      0.58        17
        ME2       0.48      0.39      0.43        33
        ME3       0.72      0.89      0.80        74
        MIT       0.82      0.42      0.55       180
        NUC       0.50      0.53      0.52       197
        POX       0.55      0.43      0.48        14
        VAC       0.00      0.00      0.00         5

avg / total       0.58      0.54      0.54       742

Confusion Matrix: 
 [[121   0   0   0   1   6   7  62   0   1]
 [  0   0   2   0   2   0   0   0   0   1]
 [  5   0   6   3   3   0   0   1   0   1]
 [  0   0   2  11   2   1   0   0   1   0]
 [  2   0   2   5  13   3   1   3   2   2]
 [  0   0   0   0   0  66   0   8   0   0]
 [ 61   0   2   2   2   8  75  27   2   1]
 [ 74  

MLP
             precision    recall  f1-score   support

        CYT       0.39      0.72      0.51       198
        ERL       0.00      0.00      0.00         5
        EXC       1.00      0.16      0.27        19
        ME1       0.35      0.82      0.49        17
        ME2       0.00      0.00      0.00        33
        ME3       0.69      0.80      0.74        74
        MIT       0.73      0.39      0.51       180
        NUC       0.57      0.43      0.49       197
        POX       0.75      0.43      0.55        14
        VAC       0.00      0.00      0.00         5

avg / total       0.55      0.51      0.49       742

Confusion Matrix: 
 [[142   0   0   1   0   3   7  45   0   0]
 [  1   0   0   1   0   2   1   0   0   0]
 [  8   0   3   8   0   0   0   0   0   0]
 [  0   0   0  14   0   1   2   0   0   0]
 [  7   0   0  13   0   5   5   3   0   0]
 [  2   0   0   0   0  59   1  12   0   0]
 [ 92   0   0   3   0   8  70   5   2   0]
 [ 98   0   0   0   0   6   8  85   

MLP
             precision    recall  f1-score   support

        CYT       0.52      0.56      0.54       265
        EXC       0.00      0.00      0.00        16
        ME1       0.33      0.04      0.07        27
        ME2       0.13      0.28      0.18        18
        ME3       0.80      0.67      0.73        89
        MIT       0.28      0.61      0.38        64
        NUC       0.59      0.50      0.54       232
        POX       1.00      0.50      0.67         6
        VAC       0.00      0.00      0.00        25

avg / total       0.51      0.50      0.50       742

Confusion Matrix: 
 [[149   0   0   2   0  55  59   0   0]
 [  0   0   0   5   0  11   0   0   0]
 [  0   0   1  24   1   1   0   0   0]
 [  2   0   2   5   4   2   3   0   0]
 [ 17   0   0   1  60   2   9   0   0]
 [ 16   0   0   0   2  39   7   0   0]
 [ 92   0   0   0   2  22 116   0   0]
 [  0   0   0   1   0   2   0   3   0]
 [ 12   0   0   1   6   5   1   0   0]]
MLP
             precision    recall  