In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Pipeline creation
from sklearn.pipeline import Pipeline

#Data processing modules
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score, cross_val_predict

#importing various Ml classifiers
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

#Importing Metric modules
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score, f1_score




In [2]:
# Extracted Features path
healthy_features_path = 'featuresvalues_new.csv'
tumor_features_path = 'tumorfeaturesvalues_new.csv'

In [3]:
# Reading the healthy pancreas features
data = pd.read_csv(healthy_features_path)
X_healthy = data.iloc[:, :-1]
y_healthy = data.iloc[:, -1]

In [4]:
# Reading the tumor pancreas features
data_tumor = pd.read_csv(tumor_features_path)
X_tumor = data_tumor.iloc[:, :-1]
y_tumor = data_tumor.iloc[:, -1]

In [6]:
# data_tumor

In [46]:
y_healthy.shape

(79,)

In [47]:
# Concat Features
z=y_tumor.values.reshape(281,1)
h = y_healthy.values.reshape(79,1)
modified_array = np.vstack((z, h))
y = modified_array.reshape(1,360)

In [48]:
z1=X_tumor.values.reshape(281,107)
h1 = X_healthy.values.reshape(79,107)
modified_array = np.vstack((z1, h1))
X = modified_array.reshape(107,360)

In [49]:
X =X.reshape(360,107)
y = y.reshape(360,1)

In [50]:
modified_data = np.hstack((X,y))
df = pd.DataFrame(modified_data)


In [51]:
# df_shuffled
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,98,99,100,101,102,103,104,105,106,107
0,0.774209,0.0,0.0,62.498825,44.687500,59.296875,67.012091,67.012091,1699.716187,41.225667,...,0.015991,6.061205,0.142939,796.772754,1.078497,0.008652,41.108638,0.030069,0.687439,1.0
1,0.687649,0.0,0.0,81.423081,49.500000,69.265625,88.906661,88.906661,2395.702515,46.603172,...,0.002551,6.455984,0.306062,55.559027,0.106842,0.005106,1100.218702,0.037558,16.107521,1.0
2,0.587004,0.0,0.0,45.459287,22.859375,34.890625,45.023553,45.023553,754.095968,21.371957,...,0.019730,5.525098,0.255166,46.659327,0.577877,0.019061,59.980274,0.049368,1.508778,1.0
3,0.687467,0.0,0.0,65.250762,36.609375,55.171875,62.805323,62.805323,1471.019491,36.718732,...,0.004378,6.009932,0.261595,60.821255,0.217363,0.006768,542.147860,0.016580,8.667461,1.0
4,0.742163,0.0,0.0,66.731863,44.687500,65.484375,71.129800,71.129800,1790.234782,43.648946,...,0.018017,5.749565,0.381538,10.353508,1.813233,0.004777,93.984282,0.066585,0.363591,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,0.236020,0.0,0.0,138.920782,71.875000,38.281250,131.205815,131.205815,2650.858561,32.788112,...,0.002361,7.235649,0.271580,257.247524,0.050817,0.005500,1250.957739,0.054629,19.374934,0.0
356,0.193854,0.0,0.0,162.396489,70.312500,26.367188,141.827003,141.827003,2212.206523,31.481219,...,0.027410,5.661709,0.243142,131.112058,2.948380,0.003997,73.166907,0.056923,0.501311,0.0
357,0.335641,0.0,0.0,100.225046,54.687500,48.828125,88.307389,88.307389,1618.226369,33.639606,...,0.012035,6.291807,0.299544,35.919704,0.329676,0.007996,423.064167,0.035987,4.055273,0.0
358,0.532818,0.0,0.0,54.400939,34.179688,41.015625,51.563980,51.563980,652.949015,28.985823,...,0.024009,5.611265,0.248266,48.664024,0.268894,0.027179,92.863090,0.062765,3.732317,0.0


In [52]:
#Pipeline Creation
#Pipeline contains a MinMaxScaler and a classifier

def create_pipeline(classifier):
    steps = list()
    steps.append(("scaler",MinMaxScaler()))
    steps.append(("classifier",classifier))
    pipeline = Pipeline(steps=steps)
    return pipeline

In [53]:
# 5 fold CV evaluation

def calculate_metrics(pipeline,X,y):
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)

    # Accuracy
    scores_accuracy = cross_val_score(pipeline, X, y, cv=kfold,scoring="accuracy")

    # Print the cross-validation scores
    print("Printing Accuracy scores: ", scores_accuracy)
    print("Mean Accuracy: ", round(np.mean(scores_accuracy),5),'\n')


    # Precision
    scores_precision = cross_val_score(pipeline, X, y, cv=kfold,scoring="precision")

    # Print the cross-validation scores
    print("Printing Precision scores: ", scores_precision)
    print("Mean Precision: ", round(np.mean(scores_precision),5),'\n')

    # ROC-AUC
    scores_roc_auc = cross_val_score(pipeline, X, y, cv=kfold,scoring="roc_auc")

    # Print the cross-validation scores
    print("Printing ROC-AUC scores: ", scores_roc_auc)
    print("Mean ROC-AUC: ", round(np.mean(scores_roc_auc),5),'\n')

    # Recall
    scores_recall = cross_val_score(pipeline, X, y, cv=kfold,scoring="recall")

    # Print the cross-validation scores
    print("Printing Recall scores: ", scores_recall)
    print("Mean Recall: ", round(np.mean(scores_recall),5),'\n')


    # F1-score
    scores_f1 = cross_val_score(pipeline, X, y, cv=kfold,scoring="f1")

    # Print the cross-validation scores
    print("Printing F1 scores: ", scores_f1)
    print("Mean F1: ", round(np.mean(scores_f1),5),'\n')

    return (round(np.mean(scores_accuracy),5),round(np.mean(scores_precision),5),round(np.mean(scores_roc_auc),5),round(np.mean(scores_recall),5),round(np.mean(scores_f1),5))

    


In [54]:
# Decision Tree
accuracy_list = []
precision_list = []
recall_list = []
roc_auc_list = []
f1_score_list = []

iterations = 10

for it in range(1,iterations+1):
    print(f"Printing values for iteration {it}\n")
    df_shuffled = df.sample(frac=1) # Shuffling the DataFrame 

    # Extracting values from the shuffled DataFrame in form of NumPy array
    X1= df_shuffled.iloc[:, :-1].values
    y1 = df_shuffled.iloc[:,-1].values

    decision_tree = DecisionTreeClassifier(criterion = "gini",random_state=42)
    pipeline_decision_tree = create_pipeline(decision_tree)
    acc, precision, roc_auc, recall, f1_score = calculate_metrics(pipeline_decision_tree,X1,y1)
    accuracy_list.append(acc)
    precision_list.append(precision)
    roc_auc_list.append(roc_auc)
    recall_list.append(recall)
    f1_score_list.append(f1_score)


Printing values for iteration 1

Printing Accuracy scores:  [0.97222222 0.98611111 0.97222222 0.95833333 0.97222222]
Mean Accuracy:  0.97222 

Printing Precision scores:  [0.96491228 1.         1.         0.96226415 1.        ]
Mean Precision:  0.98544 

Printing ROC-AUC scores:  [0.94117647 0.99152542 0.98305085 0.94038462 0.98214286]
Mean ROC-AUC:  0.96766 

Printing Recall scores:  [1.         0.98305085 0.96610169 0.98076923 0.96428571]
Mean Recall:  0.97884 

Printing F1 scores:  [0.98214286 0.99145299 0.98275862 0.97142857 0.98181818]
Mean F1:  0.98192 

Printing values for iteration 2

Printing Accuracy scores:  [0.94444444 0.97222222 0.95833333 0.97222222 0.94444444]
Mean Accuracy:  0.95833 

Printing Precision scores:  [0.93103448 1.         0.98076923 1.         0.98214286]
Mean Precision:  0.97879 

Printing ROC-AUC scores:  [0.88888889 0.98181818 0.95481629 0.98360656 0.93842365]
Mean ROC-AUC:  0.94951 

Printing Recall scores:  [1.         0.96363636 0.96226415 0.96721311 

In [55]:
print("Printing values for 10 iterations Decision Tree")
# Calculate the mean and std accuracy
mean_accuracy = sum(accuracy_list) / len(accuracy_list)
std_accuracy = np.std(accuracy_list)

# Report the mean accuracy up to 4 decimal points
print(f"Mean Accuracy: {mean_accuracy : .4f}")
print(f"Std Accuracy: {std_accuracy : .4f}")

# Calculate the mean and std precision
mean_precision = sum(precision_list) / len(precision_list)
std_precision = np.std(precision_list)
# Report the mean precision up to 4 decimal points

print(f"Mean Precision: {mean_precision : .4f}")
print(f"Std Precision: {std_precision : .4f}")

# Calculate the mean and std recall
mean_recall = sum(recall_list) / len(recall_list)
std_recall = np.std(recall_list)
# Report the mean recall up to 4 decimal points

print(f"Mean Recall: {mean_recall : .4f}")
print(f"Std Recall: {std_recall : .4f}")

# Calculate the mean and std roc_auc
mean_roc_auc = sum(roc_auc_list) / len(roc_auc_list)
std_roc_auc = np.std(roc_auc_list)
# Report the mean roc_auc up to 4 decimal points

print(f"Mean ROC-AUC: {mean_roc_auc : .4f}")
print(f"Std ROC-AUC: {std_roc_auc : .4f}")

# Calculate the mean and std f1_score
mean_f1_score = sum(f1_score_list) / len(f1_score_list)
std_f1_score = np.std(f1_score_list)
# Report the mean f1-score up to 4 decimal points

print(f"Mean F1-Score: {mean_f1_score : .4f}")
print(f"Std F1-Score: {std_f1_score : .4f}")


Printing values for 10 iterations Decision Tree
Mean Accuracy:  0.9661
Std Accuracy:  0.0082
Mean Precision:  0.9807
Std Precision:  0.0031
Mean Recall:  0.9757
Std Recall:  0.0090
Mean ROC-AUC:  0.9538
Std ROC-AUC:  0.0105
Mean F1-Score:  0.9780
Std F1-Score:  0.0054


In [30]:
# AdaBoost
accuracy_list = []
precision_list = []
recall_list = []
roc_auc_list = []
f1_score_list = []

iterations = 10

for it in range(1,iterations+1):
    print(f"Printing values for iteration {it}\n")
    df_shuffled = df.sample(frac=1) # Shuffling the DataFrame 

    # Extracting values from the shuffled DataFrame in form of NumPy array
    X1= df_shuffled.iloc[:, :-1].values
    y1 = df_shuffled.iloc[:,-1].values

    adaboost = AdaBoostClassifier(n_estimators=50, random_state=42)
    pipeline_decision_tree = create_pipeline(adaboost)
    acc, precision, roc_auc, recall, f1_score = calculate_metrics(pipeline_decision_tree,X1,y1)
    accuracy_list.append(acc)
    precision_list.append(precision)
    roc_auc_list.append(roc_auc)
    recall_list.append(recall)
    f1_score_list.append(f1_score)


Printing values for iteration 1

Printing Accuracy scores:  [0.98611111 0.98611111 0.97222222 0.97222222 1.        ]
Mean Accuracy:  0.98333 

Printing Precision scores:  [0.98181818 1.         0.98076923 0.98461538 1.        ]
Mean Precision:  0.98944 

Printing ROC-AUC scores:  [0.96604938 0.99766082 0.99615385 0.98681319 1.        ]
Mean ROC-AUC:  0.98934 

Printing Recall scores:  [1.         0.98245614 0.98076923 0.98461538 1.        ]
Mean Recall:  0.98957 

Printing F1 scores:  [0.99082569 0.99115044 0.98076923 0.98461538 1.        ]
Mean F1:  0.98947 

Printing values for iteration 2

Printing Accuracy scores:  [0.98611111 0.97222222 0.95833333 0.95833333 0.97222222]
Mean Accuracy:  0.96944 

Printing Precision scores:  [1.         0.98387097 0.96       0.94915254 0.98214286]
Mean Precision:  0.97503 

Printing ROC-AUC scores:  [1.         0.98870968 0.99556344 0.96540179 0.99888393]
Mean ROC-AUC:  0.98971 

Printing Recall scores:  [0.98275862 0.98387097 0.97959184 1.         

In [31]:
print("Printing values for 10 iterations AdaBoost")
# Calculate the mean and std accuracy
mean_accuracy = sum(accuracy_list) / len(accuracy_list)
std_accuracy = np.std(accuracy_list)

# Report the mean accuracy up to 4 decimal points
print(f"Mean Accuracy: {mean_accuracy : .4f}")
print(f"Std Accuracy: {std_accuracy : .4f}")

# Calculate the mean and std precision
mean_precision = sum(precision_list) / len(precision_list)
std_precision = np.std(precision_list)
# Report the mean precision up to 4 decimal points

print(f"Mean Precision: {mean_precision : .4f}")
print(f"Std Precision: {std_precision : .4f}")

# Calculate the mean and std recall
mean_recall = sum(recall_list) / len(recall_list)
std_recall = np.std(recall_list)
# Report the mean recall up to 4 decimal points

print(f"Mean Recall: {mean_recall : .4f}")
print(f"Std Recall: {std_recall : .4f}")

# Calculate the mean and std roc_auc
mean_roc_auc = sum(roc_auc_list) / len(roc_auc_list)
std_roc_auc = np.std(roc_auc_list)
# Report the mean roc_auc up to 4 decimal points

print(f"Mean ROC-AUC: {mean_roc_auc : .4f}")
print(f"Std ROC-AUC: {std_roc_auc : .4f}")

# Calculate the mean and std f1_score
mean_f1_score = sum(f1_score_list) / len(f1_score_list)
std_f1_score = np.std(f1_score_list)
# Report the mean f1-score up to 4 decimal points

print(f"Mean F1-Score: {mean_f1_score : .4f}")
print(f"Std F1-Score: {std_f1_score : .4f}")


Printing values for 10 iterations AdaBoost
Mean Accuracy:  0.9756
Std Accuracy:  0.0067
Mean Precision:  0.9800
Std Precision:  0.0065
Mean Recall:  0.9890
Std Recall:  0.0057
Mean ROC-AUC:  0.9892
Std ROC-AUC:  0.0019
Mean F1-Score:  0.9844
Std F1-Score:  0.0046


In [32]:
# XGBoost
accuracy_list = []
precision_list = []
recall_list = []
roc_auc_list = []
f1_score_list = []

iterations = 10

for it in range(1,iterations+1):
    print(f"Printing values for iteration {it}\n")
    df_shuffled = df.sample(frac=1) # Shuffling the DataFrame 

    # Extracting values from the shuffled DataFrame in form of NumPy array
    X1= df_shuffled.iloc[:, :-1].values
    y1 = df_shuffled.iloc[:,-1].values

    xgb_classifier = XGBClassifier(n_estimators=100, learning_rate=0.01, random_state=42)
    pipeline_decision_tree = create_pipeline(xgb_classifier)
    acc, precision, roc_auc, recall, f1_score = calculate_metrics(pipeline_decision_tree,X1,y1)
    accuracy_list.append(acc)
    precision_list.append(precision)
    roc_auc_list.append(roc_auc)
    recall_list.append(recall)
    f1_score_list.append(f1_score)


Printing values for iteration 1

Printing Accuracy scores:  [0.97222222 0.94444444 0.97222222 0.98611111 0.97222222]
Mean Accuracy:  0.96944 

Printing Precision scores:  [0.96428571 0.93333333 0.96551724 0.98181818 0.96825397]
Mean Precision:  0.96264 

Printing ROC-AUC scores:  [1.         0.99553571 0.984375   1.         0.94038748]
Mean ROC-AUC:  0.98406 

Printing Recall scores:  [1. 1. 1. 1. 1.]
Mean Recall:  1.0 

Printing F1 scores:  [0.98181818 0.96551724 0.98245614 0.99082569 0.98387097]
Mean F1:  0.9809 

Printing values for iteration 2

Printing Accuracy scores:  [0.95833333 0.98611111 0.98611111 0.95833333 0.97222222]
Mean Accuracy:  0.97222 

Printing Precision scores:  [0.95       0.98333333 0.98245614 0.94642857 0.96551724]
Mean Precision:  0.96555 

Printing ROC-AUC scores:  [0.95380117 0.99608866 0.98883929 0.99503476 0.98883929]
Mean ROC-AUC:  0.98452 

Printing Recall scores:  [1. 1. 1. 1. 1.]
Mean Recall:  1.0 

Printing F1 scores:  [0.97435897 0.99159664 0.9911504

In [33]:
print("Printing values for 10 iterations XGBoost")
# Calculate the mean and std accuracy
mean_accuracy = sum(accuracy_list) / len(accuracy_list)
std_accuracy = np.std(accuracy_list)

# Report the mean accuracy up to 4 decimal points
print(f"Mean Accuracy: {mean_accuracy : .4f}")
print(f"Std Accuracy: {std_accuracy : .4f}")

# Calculate the mean and std precision
mean_precision = sum(precision_list) / len(precision_list)
std_precision = np.std(precision_list)
# Report the mean precision up to 4 decimal points

print(f"Mean Precision: {mean_precision : .4f}")
print(f"Std Precision: {std_precision : .4f}")

# Calculate the mean and std recall
mean_recall = sum(recall_list) / len(recall_list)
std_recall = np.std(recall_list)
# Report the mean recall up to 4 decimal points

print(f"Mean Recall: {mean_recall : .4f}")
print(f"Std Recall: {std_recall : .4f}")

# Calculate the mean and std roc_auc
mean_roc_auc = sum(roc_auc_list) / len(roc_auc_list)
std_roc_auc = np.std(roc_auc_list)
# Report the mean roc_auc up to 4 decimal points

print(f"Mean ROC-AUC: {mean_roc_auc : .4f}")
print(f"Std ROC-AUC: {std_roc_auc : .4f}")

# Calculate the mean and std f1_score
mean_f1_score = sum(f1_score_list) / len(f1_score_list)
std_f1_score = np.std(f1_score_list)
# Report the mean f1-score up to 4 decimal points

print(f"Mean F1-Score: {mean_f1_score : .4f}")
print(f"Std F1-Score: {std_f1_score : .4f}")


Printing values for 10 iterations XGBoost
Mean Accuracy:  0.9728
Std Accuracy:  0.0030
Mean Precision:  0.9673
Std Precision:  0.0027
Mean Recall:  0.9989
Std Recall:  0.0032
Mean ROC-AUC:  0.9841
Std ROC-AUC:  0.0052
Mean F1-Score:  0.9827
Std F1-Score:  0.0018


In [34]:
# Random Forest
accuracy_list = []
precision_list = []
recall_list = []
roc_auc_list = []
f1_score_list = []

iterations = 10

for it in range(1,iterations+1):
    print(f"Printing values for iteration {it}\n")
    df_shuffled = df.sample(frac=1) # Shuffling the DataFrame 

    # Extracting values from the shuffled DataFrame in form of NumPy array
    X1= df_shuffled.iloc[:, :-1].values
    y1 = df_shuffled.iloc[:,-1].values

    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    pipeline_decision_tree = create_pipeline(rf_classifier)
    acc, precision, roc_auc, recall, f1_score = calculate_metrics(pipeline_decision_tree,X1,y1)
    accuracy_list.append(acc)
    precision_list.append(precision)
    roc_auc_list.append(roc_auc)
    recall_list.append(recall)
    f1_score_list.append(f1_score)


Printing values for iteration 1

Printing Accuracy scores:  [0.95833333 1.         0.97222222 0.94444444 0.95833333]
Mean Accuracy:  0.96667 

Printing Precision scores:  [0.94230769 1.         1.         0.96721311 0.98113208]
Mean Precision:  0.97813 

Printing ROC-AUC scores:  [0.98890861 1.         0.99876847 0.97168405 0.99691358]
Mean ROC-AUC:  0.99125 

Printing Recall scores:  [1.         1.         0.96551724 0.96721311 0.96296296]
Mean Recall:  0.97914 

Printing F1 scores:  [0.97029703 1.         0.98245614 0.96721311 0.97196262]
Mean F1:  0.97839 

Printing values for iteration 2

Printing Accuracy scores:  [0.95833333 0.97222222 0.93055556 0.98611111 1.        ]
Mean Accuracy:  0.96944 

Printing Precision scores:  [0.96428571 0.96610169 0.96296296 1.         1.        ]
Mean Precision:  0.97867 

Printing ROC-AUC scores:  [0.97860963 0.99181287 0.99037433 1.         1.        ]
Mean ROC-AUC:  0.99216 

Printing Recall scores:  [0.98181818 1.         0.94545455 0.98333333 

In [35]:
print("Printing values for 10 iterations Random Forest")
# Calculate the mean and std accuracy
mean_accuracy = sum(accuracy_list) / len(accuracy_list)
std_accuracy = np.std(accuracy_list)

# Report the mean accuracy up to 4 decimal points
print(f"Mean Accuracy: {mean_accuracy : .4f}")
print(f"Std Accuracy: {std_accuracy : .4f}")

# Calculate the mean and std precision
mean_precision = sum(precision_list) / len(precision_list)
std_precision = np.std(precision_list)
# Report the mean precision up to 4 decimal points

print(f"Mean Precision: {mean_precision : .4f}")
print(f"Std Precision: {std_precision : .4f}")

# Calculate the mean and std recall
mean_recall = sum(recall_list) / len(recall_list)
std_recall = np.std(recall_list)
# Report the mean recall up to 4 decimal points

print(f"Mean Recall: {mean_recall : .4f}")
print(f"Std Recall: {std_recall : .4f}")

# Calculate the mean and std roc_auc
mean_roc_auc = sum(roc_auc_list) / len(roc_auc_list)
std_roc_auc = np.std(roc_auc_list)
# Report the mean roc_auc up to 4 decimal points

print(f"Mean ROC-AUC: {mean_roc_auc : .4f}")
print(f"Std ROC-AUC: {std_roc_auc : .4f}")

# Calculate the mean and std f1_score
mean_f1_score = sum(f1_score_list) / len(f1_score_list)
std_f1_score = np.std(f1_score_list)
# Report the mean f1-score up to 4 decimal points

print(f"Mean F1-Score: {mean_f1_score : .4f}")
print(f"Std F1-Score: {std_f1_score : .4f}")


Printing values for 10 iterations Random Forest
Mean Accuracy:  0.9703
Std Accuracy:  0.0018
Mean Precision:  0.9788
Std Precision:  0.0004
Mean Recall:  0.9833
Std Recall:  0.0023
Mean ROC-AUC:  0.9894
Std ROC-AUC:  0.0017
Mean F1-Score:  0.9808
Std F1-Score:  0.0013


In [36]:
# SVC
accuracy_list = []
precision_list = []
recall_list = []
roc_auc_list = []
f1_score_list = []

iterations = 10

for it in range(1,iterations+1):
    print(f"Printing values for iteration {it}\n")
    df_shuffled = df.sample(frac=1) # Shuffling the DataFrame 

    # Extracting values from the shuffled DataFrame in form of NumPy array
    X1= df_shuffled.iloc[:, :-1].values
    y1 = df_shuffled.iloc[:,-1].values

    svc_classifier = SVC(kernel='rbf', C=1.0, gamma='scale')
    pipeline_decision_tree = create_pipeline(svc_classifier)
    acc, precision, roc_auc, recall, f1_score = calculate_metrics(pipeline_decision_tree,X1,y1)
    accuracy_list.append(acc)
    precision_list.append(precision)
    roc_auc_list.append(roc_auc)
    recall_list.append(recall)
    f1_score_list.append(f1_score)


Printing values for iteration 1

Printing Accuracy scores:  [0.98611111 0.98611111 0.97222222 0.97222222 0.97222222]
Mean Accuracy:  0.97778 

Printing Precision scores:  [1.         0.98214286 0.96551724 0.96610169 0.96721311]
Mean Precision:  0.97619 

Printing ROC-AUC scores:  [1.         0.98716578 0.99776786 1.         0.94132986]
Mean ROC-AUC:  0.98525 

Printing Recall scores:  [0.98148148 1.         1.         1.         1.        ]
Mean Recall:  0.9963 

Printing F1 scores:  [0.99065421 0.99099099 0.98245614 0.98275862 0.98333333]
Mean F1:  0.98604 

Printing values for iteration 2

Printing Accuracy scores:  [0.98611111 0.98611111 0.98611111 0.95833333 0.97222222]
Mean Accuracy:  0.97778 

Printing Precision scores:  [0.98148148 0.98333333 0.98333333 0.96296296 0.96610169]
Mean Precision:  0.97544 

Printing ROC-AUC scores:  [0.9980139  1.         1.         0.98609732 0.94502924]
Mean ROC-AUC:  0.98583 

Printing Recall scores:  [1.         1.         1.         0.98113208 1

In [37]:
print("Printing values for 10 iterations SVC")
# Calculate the mean and std accuracy
mean_accuracy = sum(accuracy_list) / len(accuracy_list)
std_accuracy = np.std(accuracy_list)

# Report the mean accuracy up to 4 decimal points
print(f"Mean Accuracy: {mean_accuracy : .4f}")
print(f"Std Accuracy: {std_accuracy : .4f}")

# Calculate the mean and std precision
mean_precision = sum(precision_list) / len(precision_list)
std_precision = np.std(precision_list)
# Report the mean precision up to 4 decimal points

print(f"Mean Precision: {mean_precision : .4f}")
print(f"Std Precision: {std_precision : .4f}")

# Calculate the mean and std recall
mean_recall = sum(recall_list) / len(recall_list)
std_recall = np.std(recall_list)
# Report the mean recall up to 4 decimal points

print(f"Mean Recall: {mean_recall : .4f}")
print(f"Std Recall: {std_recall : .4f}")

# Calculate the mean and std roc_auc
mean_roc_auc = sum(roc_auc_list) / len(roc_auc_list)
std_roc_auc = np.std(roc_auc_list)
# Report the mean roc_auc up to 4 decimal points

print(f"Mean ROC-AUC: {mean_roc_auc : .4f}")
print(f"Std ROC-AUC: {std_roc_auc : .4f}")

# Calculate the mean and std f1_score
mean_f1_score = sum(f1_score_list) / len(f1_score_list)
std_f1_score = np.std(f1_score_list)
# Report the mean f1-score up to 4 decimal points

print(f"Mean F1-Score: {mean_f1_score : .4f}")
print(f"Std F1-Score: {std_f1_score : .4f}")


Printing values for 10 iterations SVC
Mean Accuracy:  0.9772
Std Accuracy:  0.0017
Mean Precision:  0.9748
Std Precision:  0.0022
Mean Recall:  0.9963
Std Recall:  0.0002
Mean ROC-AUC:  0.9876
Std ROC-AUC:  0.0029
Mean F1-Score:  0.9854
Std F1-Score:  0.0011


In [38]:
# KNN
accuracy_list = []
precision_list = []
recall_list = []
roc_auc_list = []
f1_score_list = []

iterations = 10

for it in range(1,iterations+1):
    print(f"Printing values for iteration {it}\n")
    df_shuffled = df.sample(frac=1) # Shuffling the DataFrame 

    # Extracting values from the shuffled DataFrame in form of NumPy array
    X1= df_shuffled.iloc[:, :-1].values
    y1 = df_shuffled.iloc[:,-1].values

    knn = KNeighborsClassifier(n_neighbors=6)
    pipeline_decision_tree = create_pipeline(knn)
    acc, precision, roc_auc, recall, f1_score = calculate_metrics(pipeline_decision_tree,X1,y1)
    accuracy_list.append(acc)
    precision_list.append(precision)
    roc_auc_list.append(roc_auc)
    recall_list.append(recall)
    f1_score_list.append(f1_score)


Printing values for iteration 1

Printing Accuracy scores:  [0.875      0.97222222 0.94444444 0.94444444 0.97222222]
Mean Accuracy:  0.94167 

Printing Precision scores:  [0.85714286 0.98461538 0.96491228 0.98076923 0.96551724]
Mean Precision:  0.95059 

Printing ROC-AUC scores:  [0.94631766 0.92087912 0.99005848 0.99176955 0.99888393]
Mean ROC-AUC:  0.96958 

Printing Recall scores:  [0.97959184 0.98461538 0.96491228 0.94444444 1.        ]
Mean Recall:  0.97471 

Printing F1 scores:  [0.91428571 0.98461538 0.96491228 0.96226415 0.98245614]
Mean F1:  0.96171 

Printing values for iteration 2

Printing Accuracy scores:  [0.95833333 0.94444444 0.98611111 0.95833333 0.94444444]
Mean Accuracy:  0.95833 

Printing Precision scores:  [0.96428571 0.96610169 0.98275862 0.94827586 0.96363636]
Mean Precision:  0.96501 

Printing ROC-AUC scores:  [0.96417112 0.94980443 0.9994152  0.96791444 0.96042781]
Mean ROC-AUC:  0.96835 

Printing Recall scores:  [0.98181818 0.96610169 1.         1.         

In [39]:
print("Printing values for 10 iterations KNN")
# Calculate the mean and std accuracy
mean_accuracy = sum(accuracy_list) / len(accuracy_list)
std_accuracy = np.std(accuracy_list)

# Report the mean accuracy up to 4 decimal points
print(f"Mean Accuracy: {mean_accuracy : .4f}")
print(f"Std Accuracy: {std_accuracy : .4f}")

# Calculate the mean and std precision
mean_precision = sum(precision_list) / len(precision_list)
std_precision = np.std(precision_list)
# Report the mean precision up to 4 decimal points

print(f"Mean Precision: {mean_precision : .4f}")
print(f"Std Precision: {std_precision : .4f}")

# Calculate the mean and std recall
mean_recall = sum(recall_list) / len(recall_list)
std_recall = np.std(recall_list)
# Report the mean recall up to 4 decimal points

print(f"Mean Recall: {mean_recall : .4f}")
print(f"Std Recall: {std_recall : .4f}")

# Calculate the mean and std roc_auc
mean_roc_auc = sum(roc_auc_list) / len(roc_auc_list)
std_roc_auc = np.std(roc_auc_list)
# Report the mean roc_auc up to 4 decimal points

print(f"Mean ROC-AUC: {mean_roc_auc : .4f}")
print(f"Std ROC-AUC: {std_roc_auc : .4f}")

# Calculate the mean and std f1_score
mean_f1_score = sum(f1_score_list) / len(f1_score_list)
std_f1_score = np.std(f1_score_list)
# Report the mean f1-score up to 4 decimal points

print(f"Mean F1-Score: {mean_f1_score : .4f}")
print(f"Std F1-Score: {std_f1_score : .4f}")


Printing values for 10 iterations KNN
Mean Accuracy:  0.9589
Std Accuracy:  0.0072
Mean Precision:  0.9673
Std Precision:  0.0067
Mean Recall:  0.9805
Std Recall:  0.0037
Mean ROC-AUC:  0.9720
Std ROC-AUC:  0.0034
Mean F1-Score:  0.9736
Std F1-Score:  0.0050


In [40]:
# Naives Bayes
accuracy_list = []
precision_list = []
recall_list = []
roc_auc_list = []
f1_score_list = []

iterations = 10

for it in range(1,iterations+1):
    print(f"Printing values for iteration {it}\n")
    df_shuffled = df.sample(frac=1) # Shuffling the DataFrame 

    # Extracting values from the shuffled DataFrame in form of NumPy array
    X1= df_shuffled.iloc[:, :-1].values
    y1 = df_shuffled.iloc[:,-1].values

    naive_bayes = GaussianNB()
    pipeline_decision_tree = create_pipeline(naive_bayes)
    acc, precision, roc_auc, recall, f1_score = calculate_metrics(pipeline_decision_tree,X1,y1)
    accuracy_list.append(acc)
    precision_list.append(precision)
    roc_auc_list.append(roc_auc)
    recall_list.append(recall)
    f1_score_list.append(f1_score)


Printing values for iteration 1

Printing Accuracy scores:  [0.94444444 0.94444444 0.95833333 0.90277778 0.91666667]
Mean Accuracy:  0.93333 

Printing Precision scores:  [1.         0.94736842 0.98181818 0.93103448 0.93220339]
Mean Precision:  0.95848 

Printing ROC-AUC scores:  [0.99107143 0.90374332 0.97098214 0.89766082 0.88011696]
Mean ROC-AUC:  0.92871 

Printing Recall scores:  [0.92857143 0.98181818 0.96428571 0.94736842 0.96491228]
Mean Recall:  0.95739 

Printing F1 scores:  [0.96296296 0.96428571 0.97297297 0.93913043 0.94827586]
Mean F1:  0.95753 

Printing values for iteration 2

Printing Accuracy scores:  [0.93055556 0.875      0.91666667 0.97222222 0.94444444]
Mean Accuracy:  0.92778 

Printing Precision scores:  [0.96491228 0.90384615 0.95       1.         0.94915254]
Mean Precision:  0.95358 

Printing ROC-AUC scores:  [0.90763547 0.92903828 0.97222222 0.97379679 0.92397661]
Mean ROC-AUC:  0.94133 

Printing Recall scores:  [0.94827586 0.92156863 0.95       0.96363636 

In [41]:
print("Printing values for 10 iterations Naive Bayes")
# Calculate the mean and std accuracy
mean_accuracy = sum(accuracy_list) / len(accuracy_list)
std_accuracy = np.std(accuracy_list)

# Report the mean accuracy up to 4 decimal points
print(f"Mean Accuracy: {mean_accuracy : .4f}")
print(f"Std Accuracy: {std_accuracy : .4f}")

# Calculate the mean and std precision
mean_precision = sum(precision_list) / len(precision_list)
std_precision = np.std(precision_list)
# Report the mean precision up to 4 decimal points

print(f"Mean Precision: {mean_precision : .4f}")
print(f"Std Precision: {std_precision : .4f}")

# Calculate the mean and std recall
mean_recall = sum(recall_list) / len(recall_list)
std_recall = np.std(recall_list)
# Report the mean recall up to 4 decimal points

print(f"Mean Recall: {mean_recall : .4f}")
print(f"Std Recall: {std_recall : .4f}")

# Calculate the mean and std roc_auc
mean_roc_auc = sum(roc_auc_list) / len(roc_auc_list)
std_roc_auc = np.std(roc_auc_list)
# Report the mean roc_auc up to 4 decimal points

print(f"Mean ROC-AUC: {mean_roc_auc : .4f}")
print(f"Std ROC-AUC: {std_roc_auc : .4f}")

# Calculate the mean and std f1_score
mean_f1_score = sum(f1_score_list) / len(f1_score_list)
std_f1_score = np.std(f1_score_list)
# Report the mean f1-score up to 4 decimal points

print(f"Mean F1-Score: {mean_f1_score : .4f}")
print(f"Std F1-Score: {std_f1_score : .4f}")


Printing values for 10 iterations Naive Bayes
Mean Accuracy:  0.9319
Std Accuracy:  0.0050
Mean Precision:  0.9599
Std Precision:  0.0035
Mean Recall:  0.9532
Std Recall:  0.0059
Mean ROC-AUC:  0.9413
Std ROC-AUC:  0.0082
Mean F1-Score:  0.9562
Std F1-Score:  0.0033
