In [158]:
# Import packages
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import make_scorer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE
from collections import Counter
from parse import preprocess

In [159]:
# Function to show classification report for Cross Validation
def classification_report_with_accuracy_score(y_true, y_pred):
    print(classification_report(y_true, y_pred)) # print classification report
    return accuracy_score(y_true, y_pred) # return accuracy score

In [160]:
# Pre-parse the dataset
data = preprocess("rawfile_blood.csv")


####################################################################
Number of Rows of Dataframe:
1123
Number of Columns of Dataframe:
59

####################################################################
Threshold for number of NULLs in a column: 0.1095
Number of Columns before Parsing for Too Many NULLs in a column:
59
Number of Columns after Parsing for Too Many NULLs in a column:
51

Columns Removed:
B1_b5
B4_a1
B4_a3
B4_a4
B4_a6
B4_b1
B4_b3
B5_a1

####################################################################
Number of Rows before Parsing NULLs in data:
1123
Number of Rows after Parsing NULLs in data:
1007

####################################################################
Number of Columns after dropping A1_2, B1_b4, B2_c3, B4_b2 for inconsistent data types:
47


In [161]:
# Initialise counters for each condition
frail = 0
frail_mci = 0
mci = 0
prefrail_mci = 0
prefrail = 0
robust = 0

# Count rows of data for each condition
for i in range(0, len(data)):
	if data.at[i, 'condition'] == 'frail':
		frail += 1
	elif data.at[i, 'condition'] == 'frail_mci':
		frail_mci += 1
	elif data.at[i, 'condition'] == 'mci':
		mci += 1
	elif data.at[i, 'condition'] == 'prefrail_mci':
		prefrail_mci += 1
	elif data.at[i, 'condition'] == 'prefrail':
		prefrail += 1
	elif data.at[i, 'condition'] == 'robust':
		robust += 1
        
# Display number of rows (frequency) for each condition (label)
print("\n####################################################################")
print("Labels with frequencies:")
print("Frail:", frail)
print("Frail + MCI:", frail_mci)
print("MCI:", mci)
print("Prefrail + MCI:", prefrail_mci)
print("Prefrail:", prefrail)
print("Robust:", robust)


####################################################################
Labels with frequencies:
Frail: 7
Frail + MCI: 76
MCI: 133
Prefrail + MCI: 231
Prefrail: 221
Robust: 339


In [162]:
data.head()

Unnamed: 0,mtag,condition,A1_1,A2_1,A3_1,B1_a,B1_a1,B1_a2,B1_a3,B1_a4,...,B2_d6,B2_d7,B2_d8,B2_d9,B3,B4_a2,B4_a5,B5_a2,B5_a3,B6
0,ME02646,frail,196,24,46.5,121,3.93,0.37,95,31,...,7,12,13,6,0.2,6.0,1.011,1.14,4.1,5.9
1,ME03109,frail,200,23,55.6,142,4.82,0.42,87,30,...,7,20,17,26,3.1,5.0,1.011,3.25,4.6,8.5
2,ME06997,frail,441,20,76.8,105,4.54,0.41,90,30,...,5,16,19,15,1.4,7.0,1.023,2.14,4.0,6.4
3,ME07149,frail,265,16,47.2,122,4.53,0.39,86,27,...,8,24,19,21,2.1,5.5,1.012,1.06,4.7,6.1
4,ME07700,frail,425,14,31.3,124,4.44,0.38,85,28,...,6,20,23,23,6.0,5.5,1.013,1.95,3.8,5.8


In [163]:
data.columns

Index(['mtag', 'condition', 'A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6'],
      dtype='object')

In [164]:
c = data['condition'].value_counts()
condition = c.index
c

robust          339
prefrail_mci    231
prefrail        221
mci             133
frail_mci        76
frail             7
Name: condition, dtype: int64

In [165]:
for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)

data.head(3)

Unnamed: 0,mtag,condition,A1_1,A2_1,A3_1,B1_a,B1_a1,B1_a2,B1_a3,B1_a4,...,B2_d6,B2_d7,B2_d8,B2_d9,B3,B4_a2,B4_a5,B5_a2,B5_a3,B6
0,ME02646,5,196,24,46.5,121,3.93,0.37,95,31,...,7,12,13,6,0.2,6.0,1.011,1.14,4.1,5.9
1,ME03109,5,200,23,55.6,142,4.82,0.42,87,30,...,7,20,17,26,3.1,5.0,1.011,3.25,4.6,8.5
2,ME06997,5,441,20,76.8,105,4.54,0.41,90,30,...,5,16,19,15,1.4,7.0,1.023,2.14,4.0,6.4


In [166]:
data.tail()

Unnamed: 0,mtag,condition,A1_1,A2_1,A3_1,B1_a,B1_a1,B1_a2,B1_a3,B1_a4,...,B2_d6,B2_d7,B2_d8,B2_d9,B3,B4_a2,B4_a5,B5_a2,B5_a3,B6
1002,MV00454,0,220,19,67.5,138,4.66,0.42,91,30,...,20,10,17,8,6.6,7.0,1.015,1.29,4.5,6.2
1003,MV00456,0,334,18,51.0,139,4.63,0.42,91,30,...,16,22,35,40,1.0,6.0,1.015,1.88,3.9,5.6
1004,MV00460,0,418,17,61.0,122,4.18,0.38,90,29,...,19,20,23,15,0.4,6.5,1.005,3.58,4.0,5.6
1005,MV00502,0,393,18,43.1,136,4.57,0.43,94,30,...,13,11,22,23,0.7,7.0,1.009,0.92,4.1,6.0
1006,MV00510,0,371,24,55.9,127,4.41,0.4,90,29,...,13,14,16,12,7.5,8.0,1.017,2.45,4.5,6.2


In [167]:
data.columns

Index(['mtag', 'condition', 'A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6'],
      dtype='object')

In [168]:
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

In [169]:
X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

In [170]:
# Summarise the new class distribution
counter = Counter(y)
print(counter)

Counter({0: 339, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7})


In [171]:
# Undersample the majority class
# Define undersample strategy

# 75% of majority class
sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

X, y = undersample.fit_resample(X, y)

In [172]:
# Summarise the new class distribution
counter = Counter(y)
print(counter) 

Counter({0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7})


In [173]:
y.shape

(922,)

In [174]:
X.shape

(922, 45)

In [175]:
# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

In [176]:
# Summarise the new class distribution
counter = Counter(y)
print(counter)

Counter({0: 254, 1: 254, 2: 254, 3: 254, 4: 254, 5: 254})


In [177]:
y.shape

(1524,)

In [178]:
X.shape

(1524, 45)

In [179]:
# Test 1: Using the entire dataset as both the train and test sets without splitting into separate train and test sets

In [180]:
# Logistics Regression

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

log_model = LogisticRegression()
log_model.fit(X, y)
print("Logistic Regression:", log_model.score(X, y).round(3))

# Linear Discriminant Analysis

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

lda_model = LinearDiscriminantAnalysis()
lda_model.fit(X, y)
print("Linear Discriminant Analysis:", lda_model.score(X, y).round(3))

# K-Nearest Neigbors

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

knn_model = KNeighborsClassifier()
knn_model.fit(X, y)
print("K-Nearest Neigbors:", knn_model.score(X, y).round(3))

# Classification and Regression Trees

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

cart_model = DecisionTreeClassifier()
cart_model.fit(X, y)
print("Classification and Regression Trees:", cart_model.score(X, y).round(3))

# Gaussian Naive Bayes

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

gnb_model = GaussianNB()
gnb_model.fit(X, y)
print("Gaussian Naive Bayes:", gnb_model.score(X, y).round(3))

# Support Vector Machines

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

svm_model = SVC(gamma = 'auto')
svm_model.fit(X, y)
print("Support Vector Machines:", svm_model.score(X, y).round(3))

# Random Forest Classifier

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

rfc_model = RandomForestClassifier()
rfc_model.fit(X, y)
print("Random Forest Classifier:", rfc_model.score(X, y).round(3))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Logistic Regression: 0.434
Linear Discriminant Analysis: 0.51
K-Nearest Neigbors: 0.763
Classification and Regression Trees: 1.0
Gaussian Naive Bayes: 0.465
Support Vector Machines: 1.0
Random Forest Classifier: 1.0


In [181]:
# Test 2: Splitting the dataset into separate train and test sets

In [182]:
# Logistic Regression

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

log_model = LogisticRegression()
log_model.fit(X_train, y_train)
print("Logistic Regression:", log_model.score(X_test, y_test).round(3))

# Linear Discriminant Analysis

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

lda_model = LinearDiscriminantAnalysis()
lda_model.fit(X_train, y_train)
print("Linear Discriminant Analysis:", lda_model.score(X_test, y_test).round(3))

# K-Nearest Neigbors

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)
print("K-Nearest Neigbors:", knn_model.score(X_test, y_test).round(3))

# Classification and Regression Trees

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

cart_model = DecisionTreeClassifier()
cart_model.fit(X_train, y_train)
print("Classification and Regression Trees:", cart_model.score(X_test, y_test).round(3))

# Gaussian Naive Bayes

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

gnb_model = GaussianNB()
gnb_model.fit(X_train, y_train)
print("Gaussian Naive Bayes:", gnb_model.score(X_test, y_test).round(3))

# Support Vector Machines

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

svm_model = SVC(gamma = 'auto')
svm_model.fit(X_train, y_train)
print("Support Vector Machines:", svm_model.score(X_test, y_test).round(3))

# Random Forest Classifier

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

rfc_model = RandomForestClassifier()
rfc_model.fit(X_train, y_train)
print("Random Forest Classifier:", rfc_model.score(X_test, y_test).round(3))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Logistic Regression: 0.383
Linear Discriminant Analysis: 0.464
K-Nearest Neigbors: 0.558
Classification and Regression Trees: 0.545
Gaussian Naive Bayes: 0.459
Support Vector Machines: 0.501
Random Forest Classifier: 0.725


In [183]:
# Logistic Regression

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

log_model = LogisticRegression()
log_model.fit(X, y)
scores = cross_val_score(log_model, X, y, cv=10)
print("Logistic Regression: %0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

# Linear Discriminant Analysis

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

lda_model = LinearDiscriminantAnalysis()
lda_model.fit(X, y)
scores = cross_val_score(lda_model, X, y, cv=10)
print("Linear Discriminant Analysis: %0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

# K-Nearest Neigbors

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

knn_model = KNeighborsClassifier()
knn_model.fit(X, y)
scores = cross_val_score(knn_model, X, y, cv=10)
print("K-Nearest Neighbors: %0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

# Classification and Regression Trees

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

cart_model = DecisionTreeClassifier()
cart_model.fit(X, y)
scores = cross_val_score(cart_model, X, y, cv=10)
print("Classification and Regression Trees: %0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

# Gaussian Naive Bayes

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

gnb_model = GaussianNB()
gnb_model.fit(X, y)
scores = cross_val_score(gnb_model, X, y, cv=10)
print("Gaussian Naive Bayes: %0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

# Support Vector Machines

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

svm_model = SVC(gamma = 'auto')
svm_model.fit(X, y)
scores = cross_val_score(svm_model, X, y, cv=10)
print("Support Vector Machines: %0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

# Random Forest Classifier

c = data['condition'].value_counts()
condition = c.index

for i in range(len(condition)):
    data['condition'].replace(condition[i], i, inplace = True)
    
y = data['condition']

features = ['A1_1', 'A2_1', 'A3_1', 'B1_a', 'B1_a1', 'B1_a2',
       'B1_a3', 'B1_a4', 'B1_a5', 'B1_a6', 'B1_b', 'B1_b1', 'B1_b2', 'B1_b3',
       'B1_c', 'B1_d', 'B2_a1', 'B2_a2', 'B2_a3', 'B2_a4', 'B2_a5', 'B2_b1',
       'B2_b2', 'B2_b3', 'B2_c1', 'B2_c2', 'B2_c4', 'B2_c5', 'B2_c6', 'B2_c7',
       'B2_d1', 'B2_d2', 'B2_d3', 'B2_d4', 'B2_d5', 'B2_d6', 'B2_d7', 'B2_d8',
       'B2_d9', 'B3', 'B4_a2', 'B4_a5', 'B5_a2', 'B5_a3', 'B6']
X_old = data[features]

X = X_old
# X = StandardScaler().fit_transform(X_old)
# X = MinMaxScaler().fit_transform(X_old)

# Undersample the majority class
# Define undersample strategy

# 75% of majority class
# sampling_strategy = {0: 254, 1: 231, 2: 221, 3: 133, 4: 76, 5: 7}

# 50% of majority class
# sampling_strategy = {0: 170, 1: 170, 2: 170, 3: 133, 4: 76, 5: 7}

# 25% of majority class
# sampling_strategy = {0: 85, 1: 85, 2: 85, 3: 85, 4: 76, 5: 7}


# undersample = RandomUnderSampler(sampling_strategy=sampling_strategy)

# 50% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0413)

# 25% of majority class
# undersample = RandomUnderSampler(sampling_strategy=0.0826)

# X, y = undersample.fit_resample(X, y)

# Transform the dataset using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

rfc_model = RandomForestClassifier()
rfc_model.fit(X, y)
scores = cross_val_score(rfc_model, X, y, cv=10)
print("Random Forest Classifier: %0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

Logistic Regression: 0.40 accuracy with a standard deviation of 0.03
Linear Discriminant Analysis: 0.45 accuracy with a standard deviation of 0.06
K-Nearest Neighbors: 0.61 accuracy with a standard deviation of 0.06
Classification and Regression Trees: 0.56 accuracy with a standard deviation of 0.05
Gaussian Naive Bayes: 0.45 accuracy with a standard deviation of 0.04
Support Vector Machines: 0.52 accuracy with a standard deviation of 0.03
Random Forest Classifier: 0.75 accuracy with a standard deviation of 0.08


In [184]:
# Logistic Regression
scores = cross_val_score(log_model, X, y, cv=10, scoring=make_scorer(classification_report_with_accuracy_score))
print(scores)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


              precision    recall  f1-score   support

           0       0.27      0.44      0.34        34
           1       0.44      0.12      0.19        34
           2       0.26      0.18      0.21        34
           3       0.29      0.26      0.28        34
           4       0.52      0.38      0.44        34
           5       0.56      1.00      0.72        34

    accuracy                           0.40       204
   macro avg       0.39      0.40      0.36       204
weighted avg       0.39      0.40      0.36       204



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


              precision    recall  f1-score   support

           0       0.26      0.26      0.26        34
           1       0.17      0.12      0.14        34
           2       0.20      0.12      0.15        34
           3       0.26      0.29      0.28        34
           4       0.33      0.29      0.31        34
           5       0.58      1.00      0.73        34

    accuracy                           0.35       204
   macro avg       0.30      0.35      0.31       204
weighted avg       0.30      0.35      0.31       204



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


              precision    recall  f1-score   support

           0       0.28      0.21      0.24        34
           1       0.20      0.15      0.17        34
           2       0.38      0.15      0.21        34
           3       0.39      0.35      0.37        34
           4       0.30      0.50      0.38        34
           5       0.59      0.94      0.73        34

    accuracy                           0.38       204
   macro avg       0.36      0.38      0.35       204
weighted avg       0.36      0.38      0.35       204



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


              precision    recall  f1-score   support

           0       0.29      0.21      0.24        34
           1       0.29      0.12      0.17        34
           2       0.18      0.18      0.18        34
           3       0.14      0.15      0.14        34
           4       0.33      0.38      0.35        34
           5       0.60      1.00      0.75        34

    accuracy                           0.34       204
   macro avg       0.30      0.34      0.30       204
weighted avg       0.30      0.34      0.30       204

              precision    recall  f1-score   support

           0       0.26      0.27      0.26        33
           1       0.21      0.15      0.17        34
           2       0.19      0.09      0.12        34
           3       0.38      0.35      0.36        34
           4       0.32      0.44      0.37        34
           5       0.69      1.00      0.82        34

    accuracy                           0.38       203
   macro avg       0.34

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


              precision    recall  f1-score   support

           0       0.19      0.15      0.17        34
           1       0.21      0.18      0.19        34
           2       0.36      0.27      0.31        33
           3       0.28      0.24      0.25        34
           4       0.38      0.35      0.36        34
           5       0.54      1.00      0.70        34

    accuracy                           0.36       203
   macro avg       0.33      0.36      0.33       203
weighted avg       0.33      0.36      0.33       203

              precision    recall  f1-score   support

           0       0.30      0.41      0.35        34
           1       0.09      0.06      0.07        33
           2       0.27      0.24      0.25        34
           3       0.38      0.32      0.35        34
           4       0.46      0.35      0.40        34
           5       0.69      1.00      0.82        34

    accuracy                           0.40       203
   macro avg       0.37

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


              precision    recall  f1-score   support

           0       0.32      0.35      0.34        34
           1       0.16      0.09      0.11        34
           2       0.25      0.26      0.26        34
           3       0.29      0.21      0.25        33
           4       0.47      0.44      0.45        34
           5       0.62      1.00      0.76        34

    accuracy                           0.39       203
   macro avg       0.35      0.39      0.36       203
weighted avg       0.35      0.39      0.36       203

              precision    recall  f1-score   support

           0       0.20      0.15      0.17        34
           1       0.25      0.12      0.16        34
           2       0.30      0.35      0.32        34
           3       0.42      0.32      0.37        34
           4       0.37      0.39      0.38        33
           5       0.56      1.00      0.72        34

    accuracy                           0.39       203
   macro avg       0.35

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [185]:
# Linear Discriminant Analysis
scores = cross_val_score(lda_model, X, y, cv=10, scoring=make_scorer(classification_report_with_accuracy_score))
print(scores)

              precision    recall  f1-score   support

           0       0.33      0.47      0.39        34
           1       0.38      0.24      0.29        34
           2       0.24      0.29      0.27        34
           3       0.37      0.32      0.34        34
           4       0.40      0.24      0.30        34
           5       0.66      0.85      0.74        34

    accuracy                           0.40       204
   macro avg       0.40      0.40      0.39       204
weighted avg       0.40      0.40      0.39       204

              precision    recall  f1-score   support

           0       0.38      0.38      0.38        34
           1       0.26      0.26      0.26        34
           2       0.28      0.32      0.30        34
           3       0.28      0.21      0.24        34
           4       0.32      0.26      0.29        34
           5       0.76      0.94      0.84        34

    accuracy                           0.40       204
   macro avg       0.38

In [186]:
# K-Nearest Neigbors
knn_model = KNeighborsClassifier()
knn_model.fit(X, y)
scores = cross_val_score(knn_model, X, y, cv=10, scoring=make_scorer(classification_report_with_accuracy_score))
print(scores)

              precision    recall  f1-score   support

           0       0.45      0.29      0.36        34
           1       0.44      0.35      0.39        34
           2       0.38      0.29      0.33        34
           3       0.53      0.56      0.54        34
           4       0.68      0.88      0.77        34
           5       0.69      1.00      0.82        34

    accuracy                           0.56       204
   macro avg       0.53      0.56      0.54       204
weighted avg       0.53      0.56      0.54       204

              precision    recall  f1-score   support

           0       0.25      0.24      0.24        34
           1       0.42      0.32      0.37        34
           2       0.38      0.26      0.31        34
           3       0.50      0.53      0.51        34
           4       0.63      0.76      0.69        34
           5       0.76      1.00      0.86        34

    accuracy                           0.52       204
   macro avg       0.49

In [187]:
# Classification and Regression Trees
cart_model = DecisionTreeClassifier()
cart_model.fit(X, y)
scores = cross_val_score(cart_model, X, y, cv=10, scoring=make_scorer(classification_report_with_accuracy_score))
print(scores)

              precision    recall  f1-score   support

           0       0.25      0.32      0.28        34
           1       0.47      0.44      0.45        34
           2       0.36      0.26      0.31        34
           3       0.52      0.50      0.51        34
           4       0.62      0.74      0.68        34
           5       1.00      0.88      0.94        34

    accuracy                           0.52       204
   macro avg       0.54      0.52      0.53       204
weighted avg       0.54      0.52      0.53       204

              precision    recall  f1-score   support

           0       0.27      0.24      0.25        34
           1       0.42      0.38      0.40        34
           2       0.38      0.38      0.38        34
           3       0.50      0.50      0.50        34
           4       0.49      0.59      0.53        34
           5       0.97      0.97      0.97        34

    accuracy                           0.51       204
   macro avg       0.50

In [188]:
# Gaussian Naive Bayes
gnb_model = GaussianNB()
gnb_model.fit(X, y)
scores = cross_val_score(gnb_model, X, y, cv=10, scoring=make_scorer(classification_report_with_accuracy_score))
print(scores)

              precision    recall  f1-score   support

           0       0.31      0.38      0.34        34
           1       0.45      0.26      0.33        34
           2       0.25      0.24      0.24        34
           3       0.33      0.56      0.41        34
           4       0.69      0.32      0.44        34
           5       0.94      1.00      0.97        34

    accuracy                           0.46       204
   macro avg       0.49      0.46      0.46       204
weighted avg       0.49      0.46      0.46       204

              precision    recall  f1-score   support

           0       0.43      0.38      0.41        34
           1       0.26      0.24      0.25        34
           2       0.21      0.18      0.19        34
           3       0.22      0.32      0.26        34
           4       0.33      0.26      0.30        34
           5       0.94      1.00      0.97        34

    accuracy                           0.40       204
   macro avg       0.40

In [189]:
# Support Vector Machines
svm_model = SVC(gamma = 'auto')
svm_model.fit(X, y)
scores = cross_val_score(svm_model, X, y, cv=10, scoring=make_scorer(classification_report_with_accuracy_score))
print(scores)

              precision    recall  f1-score   support

           0       0.26      1.00      0.41        34
           1       1.00      0.24      0.38        34
           2       1.00      0.15      0.26        34
           3       1.00      0.41      0.58        34
           4       1.00      0.53      0.69        34
           5       1.00      0.82      0.90        34

    accuracy                           0.52       204
   macro avg       0.88      0.52      0.54       204
weighted avg       0.88      0.52      0.54       204

              precision    recall  f1-score   support

           0       0.26      1.00      0.41        34
           1       1.00      0.35      0.52        34
           2       1.00      0.18      0.30        34
           3       1.00      0.29      0.45        34
           4       1.00      0.50      0.67        34
           5       1.00      0.79      0.89        34

    accuracy                           0.52       204
   macro avg       0.88

In [190]:
# Random Forest Classifier
rfc_model = RandomForestClassifier()
rfc_model.fit(X, y)
scores = cross_val_score(rfc_model, X, y, cv=10, scoring=make_scorer(classification_report_with_accuracy_score))
print(scores)

              precision    recall  f1-score   support

           0       0.41      0.59      0.48        34
           1       0.78      0.41      0.54        34
           2       0.55      0.47      0.51        34
           3       0.71      0.79      0.75        34
           4       0.89      0.94      0.91        34
           5       1.00      1.00      1.00        34

    accuracy                           0.70       204
   macro avg       0.72      0.70      0.70       204
weighted avg       0.72      0.70      0.70       204

              precision    recall  f1-score   support

           0       0.55      0.50      0.52        34
           1       0.61      0.65      0.63        34
           2       0.45      0.41      0.43        34
           3       0.69      0.71      0.70        34
           4       0.81      0.88      0.85        34
           5       1.00      1.00      1.00        34

    accuracy                           0.69       204
   macro avg       0.68

In [191]:
# # Calculating accuracy metrics for LDA
# lda_model.fit(X_train, y_train)
# lda_pred = lda_model.predict(X_test)

# print('Accuracy Metrics for LDA:\n')
# print(accuracy_score(y_test, lda_pred).round(5), '\n')
# print(confusion_matrix(y_test, lda_pred), '\n')
# print(classification_report(y_test, lda_pred))