In [65]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Pipeline creation
from sklearn.pipeline import Pipeline

#Data processing modules
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score, cross_val_predict

#importing various Ml classifiers
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

#Importing Metric modules
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score, f1_score




In [66]:
# Extracted Features path
healthy_features_path = 'featuresvalues_new2D_automated.csv'
tumor_features_path = 'tumorfeaturesvalues_new2D_automated_70.csv'

In [67]:
# Reading the healthy pancreas features
data = pd.read_csv(healthy_features_path)
X_healthy = data.iloc[:, :-1]
y_healthy = data.iloc[:, -1]

In [68]:
# Reading the tumor pancreas features
data_tumor = pd.read_csv(tumor_features_path)
X_tumor = data_tumor.iloc[:, :-1]
y_tumor = data_tumor.iloc[:, -1]

In [69]:
# data_tumor

In [70]:
y_healthy.shape

(89,)

In [71]:
# Concat Features
z=y_tumor.values.reshape(256,1)
h = y_healthy.values.reshape(89,1)
modified_array = np.vstack((z, h))
y = modified_array.reshape(1,345)

In [72]:
X_tumor.shape

(256, 102)

In [73]:
z1=X_tumor.values.reshape(256,102)
h1 = X_healthy.values.reshape(89,102)
modified_array = np.vstack((z1, h1))
X = modified_array.reshape(102,345)

In [74]:
X =X.reshape(345,102)
y = y.reshape(345,1)

In [75]:
modified_data = np.hstack((X,y))
df = pd.DataFrame(modified_data)


In [80]:
#Pipeline Creation
#Pipeline contains a MinMaxScaler and a classifier

def create_pipeline(classifier):
    steps = list()
    steps.append(("scaler",MinMaxScaler()))
    steps.append(("classifier",classifier))
    pipeline = Pipeline(steps=steps)
    return pipeline

In [81]:
# 5 fold CV evaluation

def calculate_metrics(pipeline,X,y):
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)

    # Accuracy
    scores_accuracy = cross_val_score(pipeline, X, y, cv=kfold,scoring="accuracy")

    # Print the cross-validation scores
    print("Printing Accuracy scores: ", scores_accuracy)
    print("Mean Accuracy: ", round(np.mean(scores_accuracy),5),'\n')


    # Precision
    scores_precision = cross_val_score(pipeline, X, y, cv=kfold,scoring="precision")

    # Print the cross-validation scores
    print("Printing Precision scores: ", scores_precision)
    print("Mean Precision: ", round(np.mean(scores_precision),5),'\n')

    # ROC-AUC
    scores_roc_auc = cross_val_score(pipeline, X, y, cv=kfold,scoring="roc_auc")

    # Print the cross-validation scores
    print("Printing ROC-AUC scores: ", scores_roc_auc)
    print("Mean ROC-AUC: ", round(np.mean(scores_roc_auc),5),'\n')

    # Recall
    scores_recall = cross_val_score(pipeline, X, y, cv=kfold,scoring="recall")

    # Print the cross-validation scores
    print("Printing Recall scores: ", scores_recall)
    print("Mean Recall: ", round(np.mean(scores_recall),5),'\n')


    # F1-score
    scores_f1 = cross_val_score(pipeline, X, y, cv=kfold,scoring="f1")

    # Print the cross-validation scores
    print("Printing F1 scores: ", scores_f1)
    print("Mean F1: ", round(np.mean(scores_f1),5),'\n')

    return (round(np.mean(scores_accuracy),5),round(np.mean(scores_precision),5),round(np.mean(scores_roc_auc),5),round(np.mean(scores_recall),5),round(np.mean(scores_f1),5))

    


In [82]:
# Decision Tree
accuracy_list = []
precision_list = []
recall_list = []
roc_auc_list = []
f1_score_list = []

iterations = 10

for it in range(1,iterations+1):
    print(f"Printing values for iteration {it}\n")
    df_shuffled = df.sample(frac=1) # Shuffling the DataFrame 

    # Extracting values from the shuffled DataFrame in form of NumPy array
    X1= df_shuffled.iloc[:, :-1].values
    y1 = df_shuffled.iloc[:,-1].values

    decision_tree = DecisionTreeClassifier(criterion = "gini",random_state=42)
    pipeline_decision_tree = create_pipeline(decision_tree)
    acc, precision, roc_auc, recall, f1_score = calculate_metrics(pipeline_decision_tree,X1,y1)
    accuracy_list.append(acc)
    precision_list.append(precision)
    roc_auc_list.append(roc_auc)
    recall_list.append(recall)
    f1_score_list.append(f1_score)


Printing values for iteration 1

Printing Accuracy scores:  [0.85507246 0.91304348 0.97101449 0.8115942  0.88405797]
Mean Accuracy:  0.88696 

Printing Precision scores:  [0.92156863 0.95744681 1.         0.91111111 0.88      ]
Mean Precision:  0.93403 

Printing ROC-AUC scores:  [0.81839623 0.90918367 0.98275862 0.80473684 0.84782609]
Mean ROC-AUC:  0.87258 

Printing Recall scores:  [0.88679245 0.91836735 0.96551724 0.82       0.95652174]
Mean Recall:  0.90944 

Printing F1 scores:  [0.90384615 0.9375     0.98245614 0.86315789 0.91666667]
Mean F1:  0.92073 

Printing values for iteration 2

Printing Accuracy scores:  [0.82608696 0.92753623 0.94202899 0.92753623 0.86956522]
Mean Accuracy:  0.89855 

Printing Precision scores:  [0.9        0.96       0.93877551 0.94827586 0.93478261]
Mean Precision:  0.93637 

Printing ROC-AUC scores:  [0.78563348 0.91503268 0.92117988 0.85745614 0.86377551]
Mean ROC-AUC:  0.86862 

Printing Recall scores:  [0.86538462 0.94117647 0.9787234  0.96491228 

In [83]:
print("Printing values for 10 iterations Decision Tree")
# Calculate the mean and std accuracy
mean_accuracy = sum(accuracy_list) / len(accuracy_list)
std_accuracy = np.std(accuracy_list)

# Report the mean accuracy up to 4 decimal points
print(f"Mean Accuracy: {mean_accuracy : .4f}")
print(f"Std Accuracy: {std_accuracy : .4f}")

# Calculate the mean and std precision
mean_precision = sum(precision_list) / len(precision_list)
std_precision = np.std(precision_list)
# Report the mean precision up to 4 decimal points

print(f"Mean Precision: {mean_precision : .4f}")
print(f"Std Precision: {std_precision : .4f}")

# Calculate the mean and std recall
mean_recall = sum(recall_list) / len(recall_list)
std_recall = np.std(recall_list)
# Report the mean recall up to 4 decimal points

print(f"Mean Recall: {mean_recall : .4f}")
print(f"Std Recall: {std_recall : .4f}")

# Calculate the mean and std roc_auc
mean_roc_auc = sum(roc_auc_list) / len(roc_auc_list)
std_roc_auc = np.std(roc_auc_list)
# Report the mean roc_auc up to 4 decimal points

print(f"Mean ROC-AUC: {mean_roc_auc : .4f}")
print(f"Std ROC-AUC: {std_roc_auc : .4f}")

# Calculate the mean and std f1_score
mean_f1_score = sum(f1_score_list) / len(f1_score_list)
std_f1_score = np.std(f1_score_list)
# Report the mean f1-score up to 4 decimal points

print(f"Mean F1-Score: {mean_f1_score : .4f}")
print(f"Std F1-Score: {std_f1_score : .4f}")


Printing values for 10 iterations Decision Tree
Mean Accuracy:  0.8907
Std Accuracy:  0.0124
Mean Precision:  0.9331
Std Precision:  0.0074
Mean Recall:  0.9192
Std Recall:  0.0124
Mean ROC-AUC:  0.8655
Std ROC-AUC:  0.0139
Mean F1-Score:  0.9254
Std F1-Score:  0.0086


In [84]:
# AdaBoost
accuracy_list = []
precision_list = []
recall_list = []
roc_auc_list = []
f1_score_list = []

iterations = 10

for it in range(1,iterations+1):
    print(f"Printing values for iteration {it}\n")
    df_shuffled = df.sample(frac=1) # Shuffling the DataFrame 

    # Extracting values from the shuffled DataFrame in form of NumPy array
    X1= df_shuffled.iloc[:, :-1].values
    y1 = df_shuffled.iloc[:,-1].values

    adaboost = AdaBoostClassifier(n_estimators=50, random_state=42)
    pipeline_decision_tree = create_pipeline(adaboost)
    acc, precision, roc_auc, recall, f1_score = calculate_metrics(pipeline_decision_tree,X1,y1)
    accuracy_list.append(acc)
    precision_list.append(precision)
    roc_auc_list.append(roc_auc)
    recall_list.append(recall)
    f1_score_list.append(f1_score)


Printing values for iteration 1

Printing Accuracy scores:  [0.86956522 0.95652174 0.97101449 0.95652174 0.94202899]
Mean Accuracy:  0.93913 

Printing Precision scores:  [0.90909091 0.98076923 0.98181818 0.96078431 0.95454545]
Mean Precision:  0.9574 

Printing ROC-AUC scores:  [0.95308642 0.96108491 0.94415584 0.99368421 0.97909091]
Mean ROC-AUC:  0.96622 

Printing Recall scores:  [0.92592593 0.96226415 0.98181818 0.98       0.95454545]
Mean Recall:  0.96091 

Printing F1 scores:  [0.91743119 0.97142857 0.98181818 0.97029703 0.95454545]
Mean F1:  0.9591 

Printing values for iteration 2

Printing Accuracy scores:  [0.98550725 0.89855072 0.91304348 0.94202899 0.91304348]
Mean Accuracy:  0.93043 

Printing Precision scores:  [1.         0.91071429 0.91836735 0.95918367 0.95918367]
Mean Precision:  0.94949 

Printing ROC-AUC scores:  [1.         0.93632075 0.98259188 0.99285714 0.96840959]
Mean ROC-AUC:  0.97604 

Printing Recall scores:  [0.98214286 0.96226415 0.95744681 0.95918367 0.

In [85]:
print("Printing values for 10 iterations AdaBoost")
# Calculate the mean and std accuracy
mean_accuracy = sum(accuracy_list) / len(accuracy_list)
std_accuracy = np.std(accuracy_list)

# Report the mean accuracy up to 4 decimal points
print(f"Mean Accuracy: {mean_accuracy : .4f}")
print(f"Std Accuracy: {std_accuracy : .4f}")

# Calculate the mean and std precision
mean_precision = sum(precision_list) / len(precision_list)
std_precision = np.std(precision_list)
# Report the mean precision up to 4 decimal points

print(f"Mean Precision: {mean_precision : .4f}")
print(f"Std Precision: {std_precision : .4f}")

# Calculate the mean and std recall
mean_recall = sum(recall_list) / len(recall_list)
std_recall = np.std(recall_list)
# Report the mean recall up to 4 decimal points

print(f"Mean Recall: {mean_recall : .4f}")
print(f"Std Recall: {std_recall : .4f}")

# Calculate the mean and std roc_auc
mean_roc_auc = sum(roc_auc_list) / len(roc_auc_list)
std_roc_auc = np.std(roc_auc_list)
# Report the mean roc_auc up to 4 decimal points

print(f"Mean ROC-AUC: {mean_roc_auc : .4f}")
print(f"Std ROC-AUC: {std_roc_auc : .4f}")

# Calculate the mean and std f1_score
mean_f1_score = sum(f1_score_list) / len(f1_score_list)
std_f1_score = np.std(f1_score_list)
# Report the mean f1-score up to 4 decimal points

print(f"Mean F1-Score: {mean_f1_score : .4f}")
print(f"Std F1-Score: {std_f1_score : .4f}")


Printing values for 10 iterations AdaBoost
Mean Accuracy:  0.9330
Std Accuracy:  0.0068
Mean Precision:  0.9472
Std Precision:  0.0072
Mean Recall:  0.9636
Std Recall:  0.0052
Mean ROC-AUC:  0.9698
Std ROC-AUC:  0.0051
Mean F1-Score:  0.9550
Std F1-Score:  0.0046


In [86]:
# XGBoost
accuracy_list = []
precision_list = []
recall_list = []
roc_auc_list = []
f1_score_list = []

iterations = 10

for it in range(1,iterations+1):
    print(f"Printing values for iteration {it}\n")
    df_shuffled = df.sample(frac=1) # Shuffling the DataFrame 

    # Extracting values from the shuffled DataFrame in form of NumPy array
    X1= df_shuffled.iloc[:, :-1].values
    y1 = df_shuffled.iloc[:,-1].values

    xgb_classifier = XGBClassifier(n_estimators=100, learning_rate=0.01, random_state=42)
    pipeline_decision_tree = create_pipeline(xgb_classifier)
    acc, precision, roc_auc, recall, f1_score = calculate_metrics(pipeline_decision_tree,X1,y1)
    accuracy_list.append(acc)
    precision_list.append(precision)
    roc_auc_list.append(roc_auc)
    recall_list.append(recall)
    f1_score_list.append(f1_score)


Printing values for iteration 1

Printing Accuracy scores:  [0.91304348 0.92753623 0.92753623 0.88405797 0.92753623]
Mean Accuracy:  0.91594 

Printing Precision scores:  [0.91836735 0.92857143 0.96153846 0.94       0.91071429]
Mean Precision:  0.93184 

Printing ROC-AUC scores:  [0.95841393 0.96933962 0.9321934  0.96153846 0.96949891]
Mean ROC-AUC:  0.9582 

Printing Recall scores:  [0.95744681 0.98113208 0.94339623 0.90384615 1.        ]
Mean Recall:  0.95716 

Printing F1 scores:  [0.9375     0.95412844 0.95238095 0.92156863 0.95327103]
Mean F1:  0.94377 

Printing values for iteration 2

Printing Accuracy scores:  [0.94202899 0.84057971 0.91304348 0.91304348 0.88405797]
Mean Accuracy:  0.89855 

Printing Precision scores:  [0.96363636 0.8245614  0.88888889 0.96666667 0.85714286]
Mean Precision:  0.90018 

Printing ROC-AUC scores:  [0.92532468 0.97718254 0.99206349 0.7983871  0.98747764]
Mean ROC-AUC:  0.93609 

Printing Recall scores:  [0.96363636 0.97916667 1.         0.93548387 0

In [87]:
print("Printing values for 10 iterations XGBoost")
# Calculate the mean and std accuracy
mean_accuracy = sum(accuracy_list) / len(accuracy_list)
std_accuracy = np.std(accuracy_list)

# Report the mean accuracy up to 4 decimal points
print(f"Mean Accuracy: {mean_accuracy : .4f}")
print(f"Std Accuracy: {std_accuracy : .4f}")

# Calculate the mean and std precision
mean_precision = sum(precision_list) / len(precision_list)
std_precision = np.std(precision_list)
# Report the mean precision up to 4 decimal points

print(f"Mean Precision: {mean_precision : .4f}")
print(f"Std Precision: {std_precision : .4f}")

# Calculate the mean and std recall
mean_recall = sum(recall_list) / len(recall_list)
std_recall = np.std(recall_list)
# Report the mean recall up to 4 decimal points

print(f"Mean Recall: {mean_recall : .4f}")
print(f"Std Recall: {std_recall : .4f}")

# Calculate the mean and std roc_auc
mean_roc_auc = sum(roc_auc_list) / len(roc_auc_list)
std_roc_auc = np.std(roc_auc_list)
# Report the mean roc_auc up to 4 decimal points

print(f"Mean ROC-AUC: {mean_roc_auc : .4f}")
print(f"Std ROC-AUC: {std_roc_auc : .4f}")

# Calculate the mean and std f1_score
mean_f1_score = sum(f1_score_list) / len(f1_score_list)
std_f1_score = np.std(f1_score_list)
# Report the mean f1-score up to 4 decimal points

print(f"Mean F1-Score: {mean_f1_score : .4f}")
print(f"Std F1-Score: {std_f1_score : .4f}")


Printing values for 10 iterations XGBoost
Mean Accuracy:  0.9125
Std Accuracy:  0.0071
Mean Precision:  0.9188
Std Precision:  0.0096
Mean Recall:  0.9686
Std Recall:  0.0058
Mean ROC-AUC:  0.9529
Std ROC-AUC:  0.0070
Mean F1-Score:  0.9422
Std F1-Score:  0.0046


In [88]:
# Random Forest
accuracy_list = []
precision_list = []
recall_list = []
roc_auc_list = []
f1_score_list = []

iterations = 10

for it in range(1,iterations+1):
    print(f"Printing values for iteration {it}\n")
    df_shuffled = df.sample(frac=1) # Shuffling the DataFrame 

    # Extracting values from the shuffled DataFrame in form of NumPy array
    X1= df_shuffled.iloc[:, :-1].values
    y1 = df_shuffled.iloc[:,-1].values

    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    pipeline_decision_tree = create_pipeline(rf_classifier)
    acc, precision, roc_auc, recall, f1_score = calculate_metrics(pipeline_decision_tree,X1,y1)
    accuracy_list.append(acc)
    precision_list.append(precision)
    roc_auc_list.append(roc_auc)
    recall_list.append(recall)
    f1_score_list.append(f1_score)


Printing values for iteration 1

Printing Accuracy scores:  [0.91304348 0.95652174 0.85507246 0.89855072 0.94202899]
Mean Accuracy:  0.91304 

Printing Precision scores:  [0.95744681 0.94117647 0.86440678 0.94339623 0.94444444]
Mean Precision:  0.93017 

Printing ROC-AUC scores:  [0.93877551 0.96180556 0.87323113 0.94506173 0.99604072]
Mean ROC-AUC:  0.94298 

Printing Recall scores:  [0.91836735 1.         0.96226415 0.92592593 0.98076923]
Mean Recall:  0.95747 

Printing F1 scores:  [0.9375     0.96969697 0.91071429 0.93457944 0.96226415]
Mean F1:  0.94295 

Printing values for iteration 2

Printing Accuracy scores:  [0.82608696 0.94202899 0.91304348 0.88405797 0.88405797]
Mean Accuracy:  0.88986 

Printing Precision scores:  [0.85964912 0.94230769 0.89795918 0.89285714 0.96153846]
Mean Precision:  0.91086 

Printing ROC-AUC scores:  [0.89976415 0.98       0.97175926 0.9321267  0.94642857]
Mean ROC-AUC:  0.94602 

Printing Recall scores:  [0.9245283  0.98       0.97777778 0.96153846 

In [89]:
print("Printing values for 10 iterations Random Forest")
# Calculate the mean and std accuracy
mean_accuracy = sum(accuracy_list) / len(accuracy_list)
std_accuracy = np.std(accuracy_list)

# Report the mean accuracy up to 4 decimal points
print(f"Mean Accuracy: {mean_accuracy : .4f}")
print(f"Std Accuracy: {std_accuracy : .4f}")

# Calculate the mean and std precision
mean_precision = sum(precision_list) / len(precision_list)
std_precision = np.std(precision_list)
# Report the mean precision up to 4 decimal points

print(f"Mean Precision: {mean_precision : .4f}")
print(f"Std Precision: {std_precision : .4f}")

# Calculate the mean and std recall
mean_recall = sum(recall_list) / len(recall_list)
std_recall = np.std(recall_list)
# Report the mean recall up to 4 decimal points

print(f"Mean Recall: {mean_recall : .4f}")
print(f"Std Recall: {std_recall : .4f}")

# Calculate the mean and std roc_auc
mean_roc_auc = sum(roc_auc_list) / len(roc_auc_list)
std_roc_auc = np.std(roc_auc_list)
# Report the mean roc_auc up to 4 decimal points

print(f"Mean ROC-AUC: {mean_roc_auc : .4f}")
print(f"Std ROC-AUC: {std_roc_auc : .4f}")

# Calculate the mean and std f1_score
mean_f1_score = sum(f1_score_list) / len(f1_score_list)
std_f1_score = np.std(f1_score_list)
# Report the mean f1-score up to 4 decimal points

print(f"Mean F1-Score: {mean_f1_score : .4f}")
print(f"Std F1-Score: {std_f1_score : .4f}")


Printing values for 10 iterations Random Forest
Mean Accuracy:  0.8997
Std Accuracy:  0.0091
Mean Precision:  0.9218
Std Precision:  0.0085
Mean Recall:  0.9464
Std Recall:  0.0061
Mean ROC-AUC:  0.9484
Std ROC-AUC:  0.0045
Mean F1-Score:  0.9332
Std F1-Score:  0.0060


In [90]:
# SVC
accuracy_list = []
precision_list = []
recall_list = []
roc_auc_list = []
f1_score_list = []

iterations = 10

for it in range(1,iterations+1):
    print(f"Printing values for iteration {it}\n")
    df_shuffled = df.sample(frac=1) # Shuffling the DataFrame 

    # Extracting values from the shuffled DataFrame in form of NumPy array
    X1= df_shuffled.iloc[:, :-1].values
    y1 = df_shuffled.iloc[:,-1].values

    svc_classifier = SVC(kernel='rbf', C=1.0, gamma='scale')
    pipeline_decision_tree = create_pipeline(svc_classifier)
    acc, precision, roc_auc, recall, f1_score = calculate_metrics(pipeline_decision_tree,X1,y1)
    accuracy_list.append(acc)
    precision_list.append(precision)
    roc_auc_list.append(roc_auc)
    recall_list.append(recall)
    f1_score_list.append(f1_score)


Printing values for iteration 1

Printing Accuracy scores:  [0.91304348 0.88405797 0.91304348 0.86956522 0.85507246]
Mean Accuracy:  0.88696 

Printing Precision scores:  [0.96428571 0.88888889 0.92727273 0.86792453 0.8627451 ]
Mean Precision:  0.90222 

Printing ROC-AUC scores:  [0.90125392 0.93473684 0.94929245 0.93452381 0.95261122]
Mean ROC-AUC:  0.93448 

Printing Recall scores:  [0.93103448 0.96       0.96226415 0.95833333 0.93617021]
Mean Recall:  0.94956 

Printing F1 scores:  [0.94736842 0.92307692 0.94444444 0.91089109 0.89795918]
Mean F1:  0.92475 

Printing values for iteration 2

Printing Accuracy scores:  [0.86956522 0.85507246 0.85507246 0.89855072 0.85507246]
Mean Accuracy:  0.86667 

Printing Precision scores:  [0.92592593 0.89655172 0.83333333 0.90196078 0.88679245]
Mean Precision:  0.88891 

Printing ROC-AUC scores:  [0.94285714 0.87637363 0.9489603  0.96130952 0.92701525]
Mean ROC-AUC:  0.9313 

Printing Recall scores:  [0.90909091 0.92857143 0.97826087 0.95833333 0

In [91]:
print("Printing values for 10 iterations SVC")
# Calculate the mean and std accuracy
mean_accuracy = sum(accuracy_list) / len(accuracy_list)
std_accuracy = np.std(accuracy_list)

# Report the mean accuracy up to 4 decimal points
print(f"Mean Accuracy: {mean_accuracy : .4f}")
print(f"Std Accuracy: {std_accuracy : .4f}")

# Calculate the mean and std precision
mean_precision = sum(precision_list) / len(precision_list)
std_precision = np.std(precision_list)
# Report the mean precision up to 4 decimal points

print(f"Mean Precision: {mean_precision : .4f}")
print(f"Std Precision: {std_precision : .4f}")

# Calculate the mean and std recall
mean_recall = sum(recall_list) / len(recall_list)
std_recall = np.std(recall_list)
# Report the mean recall up to 4 decimal points

print(f"Mean Recall: {mean_recall : .4f}")
print(f"Std Recall: {std_recall : .4f}")

# Calculate the mean and std roc_auc
mean_roc_auc = sum(roc_auc_list) / len(roc_auc_list)
std_roc_auc = np.std(roc_auc_list)
# Report the mean roc_auc up to 4 decimal points

print(f"Mean ROC-AUC: {mean_roc_auc : .4f}")
print(f"Std ROC-AUC: {std_roc_auc : .4f}")

# Calculate the mean and std f1_score
mean_f1_score = sum(f1_score_list) / len(f1_score_list)
std_f1_score = np.std(f1_score_list)
# Report the mean f1-score up to 4 decimal points

print(f"Mean F1-Score: {mean_f1_score : .4f}")
print(f"Std F1-Score: {std_f1_score : .4f}")


Printing values for 10 iterations SVC
Mean Accuracy:  0.8754
Std Accuracy:  0.0093
Mean Precision:  0.8943
Std Precision:  0.0069
Mean Recall:  0.9444
Std Recall:  0.0069
Mean ROC-AUC:  0.9357
Std ROC-AUC:  0.0045
Mean F1-Score:  0.9179
Std F1-Score:  0.0062


In [92]:
# KNN
accuracy_list = []
precision_list = []
recall_list = []
roc_auc_list = []
f1_score_list = []

iterations = 10

for it in range(1,iterations+1):
    print(f"Printing values for iteration {it}\n")
    df_shuffled = df.sample(frac=1) # Shuffling the DataFrame 

    # Extracting values from the shuffled DataFrame in form of NumPy array
    X1= df_shuffled.iloc[:, :-1].values
    y1 = df_shuffled.iloc[:,-1].values

    knn = KNeighborsClassifier(n_neighbors=6)
    pipeline_decision_tree = create_pipeline(knn)
    acc, precision, roc_auc, recall, f1_score = calculate_metrics(pipeline_decision_tree,X1,y1)
    accuracy_list.append(acc)
    precision_list.append(precision)
    roc_auc_list.append(roc_auc)
    recall_list.append(recall)
    f1_score_list.append(f1_score)


Printing values for iteration 1

Printing Accuracy scores:  [0.89855072 0.84057971 0.82608696 0.91304348 0.8115942 ]
Mean Accuracy:  0.85797 

Printing Precision scores:  [0.92307692 0.86       0.94       1.         0.86792453]
Mean Precision:  0.9182 

Printing ROC-AUC scores:  [0.97276688 0.92359768 0.9010989  0.94368421 0.83766968]
Mean ROC-AUC:  0.91576 

Printing Recall scores:  [0.94117647 0.91489362 0.83928571 0.88       0.88461538]
Mean Recall:  0.89199 

Printing F1 scores:  [0.93203883 0.88659794 0.88679245 0.93617021 0.87619048]
Mean F1:  0.90356 

Printing values for iteration 2

Printing Accuracy scores:  [0.85507246 0.84057971 0.79710145 0.91304348 0.88405797]
Mean Accuracy:  0.85797 

Printing Precision scores:  [0.86666667 0.92307692 0.9375     0.94444444 0.9       ]
Mean Precision:  0.91434 

Printing ROC-AUC scores:  [0.92844365 0.89545455 0.9010989  0.97407407 0.9280754 ]
Mean ROC-AUC:  0.92543 

Printing Recall scores:  [0.90697674 0.87272727 0.80357143 0.94444444 0

In [93]:
print("Printing values for 10 iterations KNN")
# Calculate the mean and std accuracy
mean_accuracy = sum(accuracy_list) / len(accuracy_list)
std_accuracy = np.std(accuracy_list)

# Report the mean accuracy up to 4 decimal points
print(f"Mean Accuracy: {mean_accuracy : .4f}")
print(f"Std Accuracy: {std_accuracy : .4f}")

# Calculate the mean and std precision
mean_precision = sum(precision_list) / len(precision_list)
std_precision = np.std(precision_list)
# Report the mean precision up to 4 decimal points

print(f"Mean Precision: {mean_precision : .4f}")
print(f"Std Precision: {std_precision : .4f}")

# Calculate the mean and std recall
mean_recall = sum(recall_list) / len(recall_list)
std_recall = np.std(recall_list)
# Report the mean recall up to 4 decimal points

print(f"Mean Recall: {mean_recall : .4f}")
print(f"Std Recall: {std_recall : .4f}")

# Calculate the mean and std roc_auc
mean_roc_auc = sum(roc_auc_list) / len(roc_auc_list)
std_roc_auc = np.std(roc_auc_list)
# Report the mean roc_auc up to 4 decimal points

print(f"Mean ROC-AUC: {mean_roc_auc : .4f}")
print(f"Std ROC-AUC: {std_roc_auc : .4f}")

# Calculate the mean and std f1_score
mean_f1_score = sum(f1_score_list) / len(f1_score_list)
std_f1_score = np.std(f1_score_list)
# Report the mean f1-score up to 4 decimal points

print(f"Mean F1-Score: {mean_f1_score : .4f}")
print(f"Std F1-Score: {std_f1_score : .4f}")


Printing values for 10 iterations KNN
Mean Accuracy:  0.8684
Std Accuracy:  0.0077
Mean Precision:  0.9310
Std Precision:  0.0111
Mean Recall:  0.8889
Std Recall:  0.0066
Mean ROC-AUC:  0.9213
Std ROC-AUC:  0.0063
Mean F1-Score:  0.9082
Std F1-Score:  0.0048


In [94]:
# Naives Bayes
accuracy_list = []
precision_list = []
recall_list = []
roc_auc_list = []
f1_score_list = []

iterations = 10

for it in range(1,iterations+1):
    print(f"Printing values for iteration {it}\n")
    df_shuffled = df.sample(frac=1) # Shuffling the DataFrame 

    # Extracting values from the shuffled DataFrame in form of NumPy array
    X1= df_shuffled.iloc[:, :-1].values
    y1 = df_shuffled.iloc[:,-1].values

    naive_bayes = GaussianNB()
    pipeline_decision_tree = create_pipeline(naive_bayes)
    acc, precision, roc_auc, recall, f1_score = calculate_metrics(pipeline_decision_tree,X1,y1)
    accuracy_list.append(acc)
    precision_list.append(precision)
    roc_auc_list.append(roc_auc)
    recall_list.append(recall)
    f1_score_list.append(f1_score)


Printing values for iteration 1

Printing Accuracy scores:  [0.69565217 0.71014493 0.71014493 0.65217391 0.71014493]
Mean Accuracy:  0.69565 

Printing Precision scores:  [0.95652174 0.97297297 0.91891892 0.96666667 0.95      ]
Mean Precision:  0.95302 

Printing ROC-AUC scores:  [0.90035273 0.88246753 0.80010893 0.8178733  0.82211538]
Mean ROC-AUC:  0.84458 

Printing Recall scores:  [0.52380952 0.65454545 0.66666667 0.55769231 0.67857143]
Mean Recall:  0.61626 

Printing F1 scores:  [0.67692308 0.7826087  0.77272727 0.70731707 0.79166667]
Mean F1:  0.74625 

Printing values for iteration 2

Printing Accuracy scores:  [0.68115942 0.79710145 0.62318841 0.72463768 0.62318841]
Mean Accuracy:  0.68986 

Printing Precision scores:  [0.875      0.97368421 0.96774194 0.94736842 1.        ]
Mean Precision:  0.95276 

Printing ROC-AUC scores:  [0.81143667 0.91105263 0.89220779 0.87087264 0.80882353]
Mean ROC-AUC:  0.85888 

Printing Recall scores:  [0.60869565 0.74       0.54545455 0.67924528 

In [95]:
print("Printing values for 10 iterations Naive Bayes")
# Calculate the mean and std accuracy
mean_accuracy = sum(accuracy_list) / len(accuracy_list)
std_accuracy = np.std(accuracy_list)

# Report the mean accuracy up to 4 decimal points
print(f"Mean Accuracy: {mean_accuracy : .4f}")
print(f"Std Accuracy: {std_accuracy : .4f}")

# Calculate the mean and std precision
mean_precision = sum(precision_list) / len(precision_list)
std_precision = np.std(precision_list)
# Report the mean precision up to 4 decimal points

print(f"Mean Precision: {mean_precision : .4f}")
print(f"Std Precision: {std_precision : .4f}")

# Calculate the mean and std recall
mean_recall = sum(recall_list) / len(recall_list)
std_recall = np.std(recall_list)
# Report the mean recall up to 4 decimal points

print(f"Mean Recall: {mean_recall : .4f}")
print(f"Std Recall: {std_recall : .4f}")

# Calculate the mean and std roc_auc
mean_roc_auc = sum(roc_auc_list) / len(roc_auc_list)
std_roc_auc = np.std(roc_auc_list)
# Report the mean roc_auc up to 4 decimal points

print(f"Mean ROC-AUC: {mean_roc_auc : .4f}")
print(f"Std ROC-AUC: {std_roc_auc : .4f}")

# Calculate the mean and std f1_score
mean_f1_score = sum(f1_score_list) / len(f1_score_list)
std_f1_score = np.std(f1_score_list)
# Report the mean f1-score up to 4 decimal points

print(f"Mean F1-Score: {mean_f1_score : .4f}")
print(f"Std F1-Score: {std_f1_score : .4f}")


Printing values for 10 iterations Naive Bayes
Mean Accuracy:  0.6994
Std Accuracy:  0.0100
Mean Precision:  0.9502
Std Precision:  0.0053
Mean Recall:  0.6275
Std Recall:  0.0144
Mean ROC-AUC:  0.8549
Std ROC-AUC:  0.0083
Mean F1-Score:  0.7532
Std F1-Score:  0.0097
