In [78]:
#imports
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.svm import LinearSVC
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import LeaveOneOut

# Data processing

Load csv files

In [79]:
df_prostate = pd.read_csv('prostate.csv')
df_renal = pd.read_csv('renal.csv')
df_throat = pd.read_csv('throat.csv')
df_normal= pd.read_csv('normal.csv')

Combine normal and cancer entries, have an equal number of cancer and normal

In [80]:
def create_combined_df(cancer_df, normal_df, cancer_type):
    cancer_count = len(cancer_df)
    #sample until we get an equal number of normal entries (might need to do sampling with replacement depending on the count)
    normal_sample = normal_df.sample(n=cancer_count, random_state=42, replace=False)

    #combine the dataframes
    combined_df = pd.concat([cancer_df, normal_sample], ignore_index=True)
    return combined_df


In [81]:
#create combined dataframes
combined_prostate = create_combined_df(df_prostate, df_normal, 'prostate')
combined_renal = create_combined_df(df_renal, df_normal, 'renal')
combined_throat = create_combined_df(df_throat, df_normal, 'throat')

Make a copy of the dataframe for the first round of classification

In [82]:
df_prostate_cancer_classification = combined_prostate.copy()
df_renal_cancer_classification = combined_renal.copy()
df_throat_cancer_classification = combined_throat.copy()

Drop `type`since we want to predict cancer type and not subtype

In [83]:
df_prostate_cancer_classification.drop(columns=['type'], inplace=True)
df_renal_cancer_classification.drop(columns=['type'], inplace=True)
df_throat_cancer_classification.drop(columns=['type'], inplace=True)

Processing for the second round of classification

In [84]:
#make df for subtype
df_prostate_subtype_classification = combined_prostate.copy()
df_renal_subtype_classification = combined_renal.copy()
df_throat_subtype_classification = combined_throat.copy()

In [85]:
#drop cancer type since we only want subtype
df_prostate_subtype_classification.drop(columns=['cancer_type'], inplace=True)
df_renal_subtype_classification.drop(columns=['cancer_type'], inplace=True)
df_throat_subtype_classification.drop(columns=['cancer_type'], inplace=True)

In [95]:
#Scale and split for train and val
scaler = StandardScaler()

X_prostate_subtype = df_prostate_subtype_classification.drop(columns=['type'])
X_prostate_subtype = scaler.fit_transform(X_prostate_subtype)

y_prostate_subtype = df_prostate_subtype_classification['type']
X_train_prostate_subtype, X_val_prostate_subtype, y_train_prostate_subtype, y_val_prostate_subtype = train_test_split(X_prostate_subtype, y_prostate_subtype, test_size=0.2, random_state=42)

X_renal_subtype = df_renal_subtype_classification.drop(columns=['type'])
X_renal_subtype = scaler.fit_transform(X_renal_subtype)

y_renal_subtype = df_renal_subtype_classification['type']
X_train_renal_subtype, X_val_renal_subtype, y_train_renal_subtype, y_val_renal_subtype = train_test_split(X_renal_subtype, y_renal_subtype, test_size=0.2, random_state=42)

X_throat_subtype = df_throat_subtype_classification.drop(columns=['type'])
X_throat_subtype = scaler.fit_transform(X_throat_subtype)

y_throat_subtype = df_throat_subtype_classification['type']
X_train_throat_subtype, X_val_throat_subtype, y_train_throat_subtype, y_val_throat_subtype = train_test_split(X_throat_subtype, y_throat_subtype, test_size=0.2, random_state=42)

# Classification models for each cancer type

In [None]:
# function for logistic regression for cancer type using LOOCV
def logistic_regression_loocv(X, y):
    loo = LeaveOneOut()
    y_true = []
    y_pred = []
    y_pred_probs = []

    for train_index, test_index in loo.split(X):
        # split into training and testing
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # apply PCA
        pca = PCA(n_components=0.95)
        X_train_PCA = pca.fit_transform(X_train)
        X_test_PCA = pca.transform(X_test)

        # train logistic regression model
        model = LogisticRegression(max_iter=1000)
        model.fit(X_train_PCA, y_train)

        # predict and save results
        y_pred.append(model.predict(X_test_PCA)[0]) 
        y_pred_probs.append(model.predict_proba(X_test_PCA)[0])
        y_true.append(y_test[0])

    return y_true, y_pred, y_pred_probs


Prostate Cancer Classification:
Renal Cancer Classification:
Throat Cancer Classification:
LOOCV Accuracy: 0.94


' \nProstate Cancer Classification:\nLOOCV Accuracy: 0.96\nRenal Cancer Classification:\nLOOCV Accuracy: 0.96\n\n'

In [None]:
# run logistic regression with LOOCV on datasets
def process_data_and_run_loocv(df):
    # preprocess data
    scaler = StandardScaler()
    X = df.drop(columns=['cancer_type']).values  
    X = scaler.fit_transform(X)
    y = df['cancer_type'].values 

    # run model
    y_true, y_pred, y_pred_probs = logistic_regression_loocv(X, y)

    # evaluate and return results
    accuracy = accuracy_score(y_true, y_pred)
    print(f"LOOCV Accuracy: {accuracy:.2f}")
    return y_true, y_pred, y_pred_probs


# run on dataset
print("Prostate Cancer Classification:")
process_data_and_run_loocv(df_prostate_cancer_classification)

print("Renal Cancer Classification:")
process_data_and_run_loocv(df_renal_cancer_classification)

print("Throat Cancer Classification:")
process_data_and_run_loocv(df_throat_cancer_classification)

""" 
Prostate Cancer Classification:
LOOCV Accuracy: 0.96
Renal Cancer Classification:
LOOCV Accuracy: 0.96
Throat Cancer Classification:
LOOCV Accuracy: 0.94

"""

In [None]:
# function for random forest with LOOCV
def random_forest_loocv(X, y):
    loo = LeaveOneOut()
    y_true = []
    y_pred = []
    y_pred_probs = []

    for train_index, test_index in loo.split(X):
        # split into training and testing
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # apply PCA
        pca = PCA(n_components=0.95)
        X_train_PCA = pca.fit_transform(X_train)
        X_test_PCA = pca.transform(X_test)

        # train random forest model
        model = RandomForestClassifier(random_state=42)
        model.fit(X_train_PCA, y_train)

        # predict and save results
        y_pred.append(model.predict(X_test_PCA)[0]) 
        y_pred_probs.append(model.predict_proba(X_test_PCA)[0])
        y_true.append(y_test[0])

    return y_true, y_pred, y_pred_probs

In [None]:
# run random forest with LOOCV on datasets
def process_data_and_run_random_forest_loocv(df):

    # preprocess data
    scaler = StandardScaler()
    X = df.drop(columns=['cancer_type']).values 
    X = scaler.fit_transform(X)
    y = df['cancer_type'].values  

    # run model
    y_true, y_pred, y_pred_probs = random_forest_loocv(X, y)

    # evaluate and return results
    accuracy = accuracy_score(y_true, y_pred)
    print(f"LOOCV Accuracy: {accuracy:.2f}")
    return y_true, y_pred, y_pred_probs


# run on datasets
print("Prostate Cancer Classification with Random Forest:")
process_data_and_run_random_forest_loocv(df_prostate_cancer_classification)

print("Renal Cancer Classification with Random Forest:")
process_data_and_run_random_forest_loocv(df_renal_cancer_classification)

print("Throat Cancer Classification with Random Forest:")
process_data_and_run_random_forest_loocv(df_throat_cancer_classification)

"""  
Prostate Cancer Classification with Random Forest:
LOOCV Accuracy: 0.92
Renal Cancer Classification with Random Forest:
LOOCV Accuracy: 0.95
Throat Cancer Classification with Random Forest:
LOOCV Accuracy: 0.94
"""


Prostate Cancer Classification with Random Forest:
LOOCV Accuracy: 0.92
Renal Cancer Classification with Random Forest:
LOOCV Accuracy: 0.95
Throat Cancer Classification with Random Forest:
LOOCV Accuracy: 0.94


(['throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  'throat',
  't

Run the cancer type classification models

# Classification models for specific cancer subtype

In [None]:
# Decision tree function 
def decision_tree(X_train, y_train, X_val, y_val):
    # apply PCA 
    pca = PCA(n_components=0.95)
    X_train_PCA = pca.fit_transform(X_train)
    X_val_PCA = pca.transform(X_val)

    # train model
    model = DecisionTreeClassifier()
    model.fit(X_train_PCA, y_train)
    y_pred = model.predict(X_val_PCA)

    # evaluate the model
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred, average='weighted')  #use weighted in case of multi class classification
    recall = recall_score(y_val, y_pred, average='weighted')
    f1 = f1_score(y_val, y_pred, average='weighted')

    return y_pred, accuracy, precision, recall, f1


In [None]:
#Naive Bayes
def naive_bayes(X_train, y_train, X_val, y_val):
  #apply PCA 
  pca = PCA(n_components=0.95)
  X_train_PCA = pca.fit_transform(X_train)
  X_val_PCA = pca.transform(X_val)

  # train the model
  model= GaussianNB()
  model.fit(X_train_PCA, y_train)
  y_pred = model.predict(X_val_PCA)

  # evaluate the model
  accuracy = accuracy_score(y_val, y_pred)
  precision = precision_score(y_val, y_pred, average='weighted')
  recall = recall_score(y_val, y_pred, average='weighted')
  f1 = f1_score(y_val, y_pred, average='weighted')
  return y_pred, accuracy, precision, recall, f1


In [None]:
#KNN
def knn(X_train, y_train, X_val, y_val):
  # apply PCA 
  pca = PCA(n_components=0.95)
  X_train_PCA = pca.fit_transform(X_train)
  X_val_PCA = pca.transform(X_val)

  # train the model
  model= KNeighborsClassifier()
  model.fit(X_train_PCA, y_train)
  y_pred = model.predict(X_val_PCA)

  # evaluate the model
  accuracy = accuracy_score(y_val, y_pred)
  precision = precision_score(y_val, y_pred, average='weighted')
  recall = recall_score(y_val, y_pred, average='weighted')
  f1 = f1_score(y_val, y_pred, average='weighted')
  return y_pred, accuracy, precision, recall, f1


In [None]:
# Logistic Regression
def logistic_regression_subtype(X_train, y_train, X_val, y_val):
  # apply PCA 
  pca = PCA(n_components=0.95)
  X_train_PCA = pca.fit_transform(X_train)
  X_val_PCA = pca.transform(X_val)

  # run the model
  model= LogisticRegression()
  model.fit(X_train_PCA, y_train)
  y_pred = model.predict(X_val_PCA)

  # evaluate the model
  accuracy = accuracy_score(y_val, y_pred)
  precision = precision_score(y_val, y_pred, average='weighted')
  recall = recall_score(y_val, y_pred, average='weighted')
  f1 = f1_score(y_val, y_pred, average='weighted')
  return y_pred, accuracy, precision, recall, f1


In [None]:
# Random Forest
def random_forest_subtype(X_train, y_train, X_val, y_val):
  # apply PCA 
  pca = PCA(n_components=0.95)
  X_train_PCA = pca.fit_transform(X_train)
  X_val_PCA = pca.transform(X_val)

  # train the model
  model= RandomForestClassifier()
  model.fit(X_train_PCA, y_train)
  y_pred = model.predict(X_val_PCA)

  # evaluate the model
  accuracy = accuracy_score(y_val, y_pred)
  precision = precision_score(y_val, y_pred, average='weighted')
  recall = recall_score(y_val, y_pred, average='weighted')
  f1 = f1_score(y_val, y_pred, average='weighted')
  return y_pred, accuracy, precision, recall, f1


In [None]:
# SVM
def svm(X_train, y_train, X_val, y_val):
  # apply PCA 
  pca = PCA(n_components=0.95)
  X_train_PCA = pca.fit_transform(X_train)
  X_val_PCA = pca.transform(X_val)

  # train the model
  model= LinearSVC()
  model.fit(X_train_PCA, y_train)
  y_pred = model.predict(X_val_PCA)

  # evaluate the model
  accuracy = accuracy_score(y_val, y_pred)
  precision = precision_score(y_val, y_pred, average='weighted')
  recall = recall_score(y_val, y_pred, average='weighted')
  f1 = f1_score(y_val, y_pred, average='weighted')
  return y_pred, accuracy, precision, recall, f1


Run the cancer subtype classification models

In [None]:
#function to run all the models and print the metrics
def evaluate_classifier(classifier_function, X_train, y_train, X_val, y_val, classifier_name):
    #prints the metrics and returns them for further use
    
    # run the classifier
    y_pred, accuracy, precision, recall, f1 = classifier_function(X_train, y_train, X_val, y_val)

    # print results
    print(f"Results for {classifier_name}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\n")

    # return metrics for further use
    return {
        "y_pred": y_pred,
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }



In [None]:
#run on prostate
results_decision_tree = evaluate_classifier(
    decision_tree,
    X_train_prostate_subtype, y_train_prostate_subtype,
    X_val_prostate_subtype, y_val_prostate_subtype,
    "Decision Tree"
)

results_naive_bayes = evaluate_classifier(
    naive_bayes,
    X_train_prostate_subtype, y_train_prostate_subtype,
    X_val_prostate_subtype, y_val_prostate_subtype,
    "Naive Bayes"
)

results_KNN = evaluate_classifier(
    knn,
    X_train_prostate_subtype, y_train_prostate_subtype,
    X_val_prostate_subtype, y_val_prostate_subtype,
    "KNN"
)

results_logistic_regression = evaluate_classifier(
    logistic_regression_subtype,
    X_train_prostate_subtype, y_train_prostate_subtype,
     X_val_prostate_subtype, y_val_prostate_subtype,
    "Logistic Regression"
)

results_random_forest = evaluate_classifier(
    random_forest_subtype,
    X_train_prostate_subtype, y_train_prostate_subtype,
    X_val_prostate_subtype, y_val_prostate_subtype,
    "Random Forest"

)

results_SVM = evaluate_classifier(
    svm,
    X_train_prostate_subtype, y_train_prostate_subtype,
    X_val_prostate_subtype, y_val_prostate_subtype,
    "SVM"
)

"""
Results for Decision Tree:
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000


Results for Naive Bayes:
Accuracy: 0.8421
Precision: 0.8895
Recall: 0.8421
F1 Score: 0.8448


Results for KNN:
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000


Results for Logistic Regression:
Accuracy: 0.9474
Precision: 0.9539
Recall: 0.9474
F1 Score: 0.9480


Results for Random Forest:
Accuracy: 0.9474
Precision: 0.9539
Recall: 0.9474
F1 Score: 0.9480


Results for SVM:
Accuracy: 0.9474
Precision: 0.9539
Recall: 0.9474
F1 Score: 0.9480

"""


Results for Decision Tree:
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000


Results for Naive Bayes:
Accuracy: 0.8421
Precision: 0.8895
Recall: 0.8421
F1 Score: 0.8448


Results for KNN:
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000


Results for Logistic Regression:
Accuracy: 0.9474
Precision: 0.9539
Recall: 0.9474
F1 Score: 0.9480


Results for Random Forest:
Accuracy: 0.9474
Precision: 0.9539
Recall: 0.9474
F1 Score: 0.9480


Results for SVM:
Accuracy: 0.9474
Precision: 0.9539
Recall: 0.9474
F1 Score: 0.9480




'\nResults for Decision Tree:\nAccuracy: 1.0000\nPrecision: 1.0000\nRecall: 1.0000\nF1 Score: 1.0000\n\n\nResults for Naive Bayes:\nAccuracy: 0.8421\nPrecision: 0.8895\nRecall: 0.8421\nF1 Score: 0.8448\n\n\nResults for KNN:\nAccuracy: 1.0000\nPrecision: 1.0000\nRecall: 1.0000\nF1 Score: 1.0000\n\n\nResults for Logistic Regression:\nAccuracy: 0.9474\nPrecision: 0.9539\nRecall: 0.9474\nF1 Score: 0.9480\n\n\nResults for Random Forest:\nAccuracy: 0.9474\nPrecision: 0.9539\nRecall: 0.9474\nF1 Score: 0.9480\n\n\nResults for SVM:\nAccuracy: 0.9474\nPrecision: 0.9539\nRecall: 0.9474\nF1 Score: 0.9480\n\n'

In [None]:
#run on renal
results_decision_tree = evaluate_classifier(
    decision_tree,
    X_train_renal_subtype, y_train_renal_subtype,
    X_val_renal_subtype, y_val_renal_subtype,
    "Decision Tree"
)

results_naive_bayes = evaluate_classifier(
    naive_bayes,
    X_train_renal_subtype, y_train_renal_subtype,
    X_val_renal_subtype, y_val_renal_subtype,
    "Naive Bayes"
)

results_KNN = evaluate_classifier(
    knn,
    X_train_renal_subtype, y_train_renal_subtype,
    X_val_renal_subtype, y_val_renal_subtype,
    "KNN"
)

results_logistic_regression = evaluate_classifier(
    logistic_regression_subtype,
    X_train_renal_subtype, y_train_renal_subtype,
    X_val_renal_subtype, y_val_renal_subtype,
    "Logistic Regression"
)

results_random_forest = evaluate_classifier(
    random_forest_subtype,
    X_train_renal_subtype, y_train_renal_subtype,
    X_val_renal_subtype, y_val_renal_subtype,
    "Random Forest"
)

results_SVM = evaluate_classifier(
    svm,
    X_train_renal_subtype, y_train_renal_subtype,
    X_val_renal_subtype, y_val_renal_subtype,
    "SVM"
)

"""
Results for Decision Tree:
Accuracy: 0.9688
Precision: 0.9705
Recall: 0.9688
F1 Score: 0.9687


Results for Naive Bayes:
Accuracy: 0.9062
Precision: 0.9219
Recall: 0.9062
F1 Score: 0.9060


Results for KNN:
Accuracy: 0.9688
Precision: 0.9707
Recall: 0.9688
F1 Score: 0.9688


Results for Logistic Regression:
Accuracy: 0.9375
Precision: 0.9449
Recall: 0.9375
F1 Score: 0.9375


Results for Random Forest:
Accuracy: 0.9375
Precision: 0.9375
Recall: 0.9375
F1 Score: 0.9375


Results for SVM:
Accuracy: 0.9375
Precision: 0.9449
Recall: 0.9375
F1 Score: 0.9375

"""

Results for Decision Tree:
Accuracy: 0.9688
Precision: 0.9705
Recall: 0.9688
F1 Score: 0.9687


Results for Naive Bayes:
Accuracy: 0.9062
Precision: 0.9219
Recall: 0.9062
F1 Score: 0.9060


Results for KNN:
Accuracy: 0.9688
Precision: 0.9707
Recall: 0.9688
F1 Score: 0.9688


Results for Logistic Regression:
Accuracy: 0.9375
Precision: 0.9449
Recall: 0.9375
F1 Score: 0.9375


Results for Random Forest:
Accuracy: 0.9375
Precision: 0.9375
Recall: 0.9375
F1 Score: 0.9375


Results for SVM:
Accuracy: 0.9375
Precision: 0.9449
Recall: 0.9375
F1 Score: 0.9375




'\nResults for Decision Tree:\nAccuracy: 0.9375\nPrecision: 0.9375\nRecall: 0.9375\nF1 Score: 0.9375\n\n\nResults for Naive Bayes:\nAccuracy: 0.9062\nPrecision: 0.9219\nRecall: 0.9062\nF1 Score: 0.9060\n\n\nResults for KNN:\nAccuracy: 0.9688\nPrecision: 0.9707\nRecall: 0.9688\nF1 Score: 0.9688\n\n\nResults for Logistic Regression:\nAccuracy: 0.9375\nPrecision: 0.9449\nRecall: 0.9375\nF1 Score: 0.9375\n\n\nResults for Random Forest:\nAccuracy: 0.9688\nPrecision: 0.9707\nRecall: 0.9688\nF1 Score: 0.9688\n\n\nResults for SVM:\nAccuracy: 0.9375\nPrecision: 0.9449\nRecall: 0.9375\nF1 Score: 0.9375\n\n'

In [None]:
#run on throat
results_decision_tree = evaluate_classifier(
    decision_tree,
    X_train_throat_subtype, y_train_throat_subtype,
    X_val_throat_subtype, y_val_throat_subtype,
    "Decision Tree"
)

results_naive_bayes = evaluate_classifier(
    naive_bayes,
    X_train_throat_subtype, y_train_throat_subtype,
    X_val_throat_subtype, y_val_throat_subtype,
    "Naive Bayes"
)

results_KNN = evaluate_classifier(
    knn,
    X_train_throat_subtype, y_train_throat_subtype,
    X_val_throat_subtype,  y_val_throat_subtype,
    "KNN"
)

results_logistic_regression = evaluate_classifier(
    logistic_regression_subtype,
    X_train_throat_subtype, y_train_throat_subtype,
    X_val_throat_subtype,  y_val_throat_subtype,
    "Logistic Regression"
)

results_random_forest = evaluate_classifier(
    random_forest_subtype,
    X_train_throat_subtype, y_train_throat_subtype,
    X_val_throat_subtype,  y_val_throat_subtype,
    "Random Forest"
)

results_SVM = evaluate_classifier(
    svm,
    X_train_throat_subtype, y_train_throat_subtype,
    X_val_throat_subtype,  y_val_throat_subtype,
    "SVM"
)

"""
Results for Decision Tree:
Accuracy: 0.9737
Precision: 0.9749
Recall: 0.9737
F1 Score: 0.9734


Results for Naive Bayes:
Accuracy: 0.9211
Precision: 0.9380
Recall: 0.9211
F1 Score: 0.9226


Results for KNN:
Accuracy: 0.9474
Precision: 0.9555
Recall: 0.9474
F1 Score: 0.9482


Results for Logistic Regression:
Accuracy: 0.9474
Precision: 0.9522
Recall: 0.9474
F1 Score: 0.9460


Results for Random Forest:
Accuracy: 0.9474
Precision: 0.9474
Recall: 0.9474
F1 Score: 0.9474


Results for SVM:
Accuracy: 0.9474
Precision: 0.9522
Recall: 0.9474
F1 Score: 0.9460

"""


Results for Decision Tree:
Accuracy: 0.9737
Precision: 0.9749
Recall: 0.9737
F1 Score: 0.9734


Results for Naive Bayes:
Accuracy: 0.9211
Precision: 0.9380
Recall: 0.9211
F1 Score: 0.9226


Results for KNN:
Accuracy: 0.9474
Precision: 0.9555
Recall: 0.9474
F1 Score: 0.9482


Results for Logistic Regression:
Accuracy: 0.9474
Precision: 0.9522
Recall: 0.9474
F1 Score: 0.9460


Results for Random Forest:
Accuracy: 0.9474
Precision: 0.9474
Recall: 0.9474
F1 Score: 0.9474


Results for SVM:
Accuracy: 0.9474
Precision: 0.9522
Recall: 0.9474
F1 Score: 0.9460




'\nResults for Decision Tree:\nAccuracy: 0.9737\nPrecision: 0.9749\nRecall: 0.9737\nF1 Score: 0.9734\n\n\nResults for Naive Bayes:\nAccuracy: 0.9211\nPrecision: 0.9380\nRecall: 0.9211\nF1 Score: 0.9226\n\n\nResults for KNN:\nAccuracy: 0.9474\nPrecision: 0.9555\nRecall: 0.9474\nF1 Score: 0.9482\n\n\nResults for Logistic Regression:\nAccuracy: 0.9474\nPrecision: 0.9522\nRecall: 0.9474\nF1 Score: 0.9460\n\n\nResults for Random Forest:\nAccuracy: 0.9474\nPrecision: 0.9474\nRecall: 0.9474\nF1 Score: 0.9474\n\n\nResults for SVM:\nAccuracy: 0.9474\nPrecision: 0.9522\nRecall: 0.9474\nF1 Score: 0.9460\n\n\n'