In [1]:
#imports
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.svm import LinearSVC
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

# Data processing

Load csv files

In [2]:
df_prostate = pd.read_csv('prostate.csv')
df_renal = pd.read_csv('renal.csv')
df_throat = pd.read_csv('throat.csv')
df_normal= pd.read_csv('normal.csv')

Combine normal and cancer entries, have an equal number of cancer and normal

In [3]:
def create_combined_df(cancer_df, normal_df, cancer_type):
    cancer_count = len(cancer_df)
    #sample until we get an equal number of normal entries (might need to do sampling with replacement depending on the count)
    normal_sample = normal_df.sample(n=cancer_count, random_state=42, replace=False)

    #combine the dataframes
    combined_df = pd.concat([cancer_df, normal_sample], ignore_index=True)
    return combined_df


In [4]:
#create combined dataframes
combined_prostate = create_combined_df(df_prostate, df_normal, 'prostate')
combined_renal = create_combined_df(df_renal, df_normal, 'renal')
combined_throat = create_combined_df(df_throat, df_normal, 'throat')

Make a copy of the dataframe for the first round of classification

In [5]:
df_prostate_cancer_classification = combined_prostate.copy()
df_renal_cancer_classification = combined_renal.copy()
df_throat_cancer_classification = combined_throat.copy()

Drop `type`since we want to predict cancer type and not subtype

In [6]:
df_prostate_cancer_classification.drop(columns=['type'], inplace=True)
df_renal_cancer_classification.drop(columns=['type'], inplace=True)
df_throat_cancer_classification.drop(columns=['type'], inplace=True)

Scale and split the dataframes

In [7]:
scaler = StandardScaler()
X_prostate_type = df_prostate_cancer_classification.drop(columns=['cancer_type'])
X_prostate_type = scaler.fit_transform(X_prostate_type)

y_prostate_type = df_prostate_cancer_classification['cancer_type']
X_train_prostate_type, X_val_prostate_type, y_train_prostate_type, y_val_prostate_type = train_test_split(X_prostate_type, y_prostate_type, test_size=0.2, random_state=42)

X_renal_type = df_renal_cancer_classification.drop(columns=['cancer_type'])
X_renal_type = scaler.fit_transform(X_renal_type)

y_renal_type = df_renal_cancer_classification['cancer_type']
X_train_renal_type, X_val_renal_type, y_train_renal_type, y_val_renal_type = train_test_split(X_renal_type, y_renal_type, test_size=0.2, random_state=42)

X_throat_type = df_throat_cancer_classification.drop(columns=['cancer_type'])
X_throat_type = scaler.fit_transform(X_throat_type)

y_throat_type = df_throat_cancer_classification['cancer_type']
X_train_throat_type, X_val_throat_type, y_train_throat_type, y_val_throat_type = train_test_split(X_throat_type, y_throat_type, test_size=0.2, random_state=42)

Processing for the second round of classification

In [8]:
#make df for subtype
df_prostate_subtype_classification = combined_prostate.copy()
df_renal_subtype_classification = combined_renal.copy()
df_throat_subtype_classification = combined_throat.copy()

In [9]:
#drop cancer type since we only want subtype
df_prostate_subtype_classification.drop(columns=['cancer_type'], inplace=True)
df_renal_subtype_classification.drop(columns=['cancer_type'], inplace=True)
df_throat_subtype_classification.drop(columns=['cancer_type'], inplace=True)

In [10]:
#Scale and split for train and val
scaler = StandardScaler()

X_prostate_subtype = df_prostate_subtype_classification.drop(columns=['type'])
X_prostate_subtype = scaler.fit_transform(X_prostate_subtype)

y_prostate_subtype = df_prostate_subtype_classification['type']
X_train_prostate_subtype, X_val_prostate_subtype, y_train_prostate_subtype, y_val_prostate_subtype = train_test_split(X_prostate_subtype, y_prostate_subtype, test_size=0.2, random_state=42)

X_renal_subtype = df_renal_subtype_classification.drop(columns=['type'])
X_renal_subtype = scaler.fit_transform(X_renal_subtype)

y_renal_subtype = df_renal_subtype_classification['type']
X_train_renal_subtype, X_val_renal_subtype, y_train_renal_subtype, y_val_renal_subtype = train_test_split(X_renal_subtype, y_renal_subtype, test_size=0.2, random_state=42)

X_throat_subtype = df_throat_subtype_classification.drop(columns=['type'])
X_throat_subtype = scaler.fit_transform(X_throat_subtype)

y_throat_subtype = df_throat_subtype_classification['type']
X_train_throat_subtype, X_val_throat_subtype, y_train_throat_subtype, y_val_throat_subtype = train_test_split(X_throat_subtype, y_throat_subtype, test_size=0.2, random_state=42)

# Classification models for each cancer type

In [11]:
#logistic regression
def logistic_regression(X_train, y_train, X_val, y_val):
  #apply PCA first
  pca = PCA(n_components=0.95)
  X_train_PCA = pca.fit_transform(X_train)
  X_val_PCA = pca.transform(X_val)

  model= LogisticRegression()
  model.fit(X_train_PCA, y_train)

  y_pred = model.predict(X_val_PCA)
  y_pred_prob = model.predict_proba(X_val_PCA)
  return y_pred, y_pred_prob #return the label and its probability


In [12]:
#random forest
def random_forest(X_train, y_train, X_val, y_val):
  #apply PCA first
  pca = PCA(n_components=0.95)
  X_train_PCA = pca.fit_transform(X_train)
  X_val_PCA = pca.transform(X_val)

  model= RandomForestClassifier()
  model.fit(X_train_PCA, y_train)
  y_pred = model.predict(X_val_PCA)
  y_pred_prob = model.predict_proba(X_val_PCA)
  return y_pred, y_pred_prob #return the label and its probability


Run the cancer type classification models

In [13]:
#logistic regression
#testing it on the first entry of the validation set
y_pred_prostate_type, y_pred_prob_prostate_type = logistic_regression(X_train_prostate_type, y_train_prostate_type, X_val_prostate_type, y_val_prostate_type)
print("prostate: ", y_pred_prostate_type[0], y_pred_prob_prostate_type[0])

y_pred_renal_type, y_pred_prob_renal_type = logistic_regression(X_train_renal_type, y_train_renal_type, X_val_renal_type, y_val_renal_type)
print("renal: ", y_pred_renal_type[0], y_pred_prob_renal_type[0])

y_pred_throat_type, y_pred_prob_throat_type = logistic_regression(X_train_throat_type, y_train_throat_type, X_val_throat_type, y_val_throat_type)
print("throat: ", y_pred_throat_type[0], y_pred_prob_throat_type[0])

"""
prostate:  prostate [1.93153834e-06 9.99998068e-01]
renal:  normal [1.00000000e+00 4.82448525e-10]
throat:  normal [9.99999961e-01 3.88757698e-08]
"""
#confirmed that they correctly predicted the label


prostate:  prostate [1.93153834e-06 9.99998068e-01]
renal:  normal [1.00000000e+00 4.82448525e-10]
throat:  normal [9.99999961e-01 3.88757698e-08]


'\nprostate:  prostate [1.61454653e-05 9.99983855e-01]\nrenal:  normal [9.99999999e-01 1.35605250e-09]\nthroat:  normal [1.00000000e+00 4.57303653e-12]\n'

In [14]:
#random forest
#testing it on the first entry of the validation set
y_pred_prostate_type, y_pred_prob_prostate_type = random_forest(X_train_prostate_type, y_train_prostate_type, X_val_prostate_type, y_val_prostate_type)
print("prostate: ", y_pred_prostate_type[0], y_pred_prob_prostate_type[0])

y_pred_renal_type, y_pred_prob_renal_type = random_forest(X_train_renal_type, y_train_renal_type, X_val_renal_type, y_val_renal_type)
print("renal: ", y_pred_renal_type[0], y_pred_prob_renal_type[0])

y_pred_throat_type, y_pred_prob_throat_type = random_forest(X_train_throat_type, y_train_throat_type, X_val_throat_type, y_val_throat_type)
print("throat: ", y_pred_throat_type[0], y_pred_prob_throat_type[0])

"""
prostate:  prostate [0.21 0.79]
renal:  normal [0.94 0.06]
throat:  normal [0.86 0.14]
"""
#confirmed that they correctly predicted the label


prostate:  prostate [0.21 0.79]
renal:  normal [0.94 0.06]
throat:  normal [0.86 0.14]


'\nprostate:  prostate [0.29 0.71]\nnormal [0.95 0.05]\nnormal [0.97 0.03]\n'

# Make classification models for specific cancer type

In [15]:
# Decision tree function for multi-class classification
def decision_tree(X_train, y_train, X_val, y_val):
    # Apply PCA first
    pca = PCA(n_components=0.95)
    X_train_PCA = pca.fit_transform(X_train)
    X_val_PCA = pca.transform(X_val)

    # Create the model
    model = DecisionTreeClassifier()
    model.fit(X_train_PCA, y_train)
    y_pred = model.predict(X_val_PCA)

    # Evaluate the model
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred, average='weighted')  #use weighted in case of multi class classification
    recall = recall_score(y_val, y_pred, average='weighted')
    f1 = f1_score(y_val, y_pred, average='weighted')

    return y_pred, accuracy, precision, recall, f1


In [16]:
#Naive Bayes
def naive_bayes(X_train, y_train, X_val, y_val):
  #apply PCA first
  pca = PCA(n_components=0.95)
  X_train_PCA = pca.fit_transform(X_train)
  X_val_PCA = pca.transform(X_val)

  #create the model
  model= GaussianNB()
  model.fit(X_train_PCA, y_train)
  y_pred = model.predict(X_val_PCA)

  #evaluate the model
  accuracy = accuracy_score(y_val, y_pred)
  precision = precision_score(y_val, y_pred, average='weighted')
  recall = recall_score(y_val, y_pred, average='weighted')
  f1 = f1_score(y_val, y_pred, average='weighted')
  return y_pred, accuracy, precision, recall, f1


In [17]:
#KNN
def knn(X_train, y_train, X_val, y_val):
  #apply PCA first
  pca = PCA(n_components=0.95)
  X_train_PCA = pca.fit_transform(X_train)
  X_val_PCA = pca.transform(X_val)

  #create the model
  model= KNeighborsClassifier()
  model.fit(X_train_PCA, y_train)
  y_pred = model.predict(X_val_PCA)

  #evaluate the model
  accuracy = accuracy_score(y_val, y_pred)
  precision = precision_score(y_val, y_pred, average='weighted')
  recall = recall_score(y_val, y_pred, average='weighted')
  f1 = f1_score(y_val, y_pred, average='weighted')
  return y_pred, accuracy, precision, recall, f1


In [18]:
#Logistic Regression
def logistic_regression_subtype(X_train, y_train, X_val, y_val):
  #apply PCA first
  pca = PCA(n_components=0.95)
  X_train_PCA = pca.fit_transform(X_train)
  X_val_PCA = pca.transform(X_val)

  #create the model
  model= LogisticRegression()
  model.fit(X_train_PCA, y_train)
  y_pred = model.predict(X_val_PCA)

  #evaluate the model
  accuracy = accuracy_score(y_val, y_pred)
  precision = precision_score(y_val, y_pred, average='weighted')
  recall = recall_score(y_val, y_pred, average='weighted')
  f1 = f1_score(y_val, y_pred, average='weighted')
  return y_pred, accuracy, precision, recall, f1


In [19]:
#Random Forest
def random_forest_subtype(X_train, y_train, X_val, y_val):
  #apply PCA first
  pca = PCA(n_components=0.95)
  X_train_PCA = pca.fit_transform(X_train)
  X_val_PCA = pca.transform(X_val)

  #create the model
  model= RandomForestClassifier()
  model.fit(X_train_PCA, y_train)
  y_pred = model.predict(X_val_PCA)

  #evaluate the model
  accuracy = accuracy_score(y_val, y_pred)
  precision = precision_score(y_val, y_pred, average='weighted')
  recall = recall_score(y_val, y_pred, average='weighted')
  f1 = f1_score(y_val, y_pred, average='weighted')
  return y_pred, accuracy, precision, recall, f1


In [20]:
#SVM
def svm(X_train, y_train, X_val, y_val):
  #apply PCA first
  pca = PCA(n_components=0.95)
  X_train_PCA = pca.fit_transform(X_train)
  X_val_PCA = pca.transform(X_val)

  #create the model
  model= LinearSVC()
  model.fit(X_train_PCA, y_train)
  y_pred = model.predict(X_val_PCA)

  #evaluate the model
  accuracy = accuracy_score(y_val, y_pred)
  precision = precision_score(y_val, y_pred, average='weighted')
  recall = recall_score(y_val, y_pred, average='weighted')
  f1 = f1_score(y_val, y_pred, average='weighted')
  return y_pred, accuracy, precision, recall, f1


Run the cancer subtype classification models

In [21]:
#function to run all the models and print the metrics
def evaluate_classifier(classifier_function, X_train, y_train, X_val, y_val, classifier_name):
    #prints the metrics and returns them for further use
    #run the classifier
    y_pred, accuracy, precision, recall, f1 = classifier_function(X_train, y_train, X_val, y_val)

    #print results
    print(f"Results for {classifier_name}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\n")

    #return metrics for further use
    return {
        "y_pred": y_pred,
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }



In [22]:
#run on prostate
results_decision_tree = evaluate_classifier(
    decision_tree,
    X_train_prostate_subtype, y_train_prostate_subtype,
    X_val_prostate_subtype, y_val_prostate_subtype,
    "Decision Tree"
)

results_naive_bayes = evaluate_classifier(
    naive_bayes,
    X_train_prostate_subtype, y_train_prostate_subtype,
    X_val_prostate_subtype, y_val_prostate_subtype,
    "Naive Bayes"
)

results_KNN = evaluate_classifier(
    knn,
    X_train_prostate_subtype, y_train_prostate_subtype,
    X_val_prostate_subtype, y_val_prostate_subtype,
    "KNN"
)

results_logistic_regression = evaluate_classifier(
    logistic_regression_subtype,
    X_train_prostate_subtype, y_train_prostate_subtype,
     X_val_prostate_subtype, y_val_prostate_subtype,
    "Logistic Regression"
)

results_random_forest = evaluate_classifier(
    random_forest_subtype,
    X_train_prostate_subtype, y_train_prostate_subtype,
    X_val_prostate_subtype, y_val_prostate_subtype,
    "Random Forest"

)

results_SVM = evaluate_classifier(
    svm,
    X_train_prostate_subtype, y_train_prostate_subtype,
    X_val_prostate_subtype, y_val_prostate_subtype,
    "SVM"
)

"""
Results for Decision Tree:
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000


Results for Naive Bayes:
Accuracy: 0.8421
Precision: 0.8895
Recall: 0.8421
F1 Score: 0.8448


Results for KNN:
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000


Results for Logistic Regression:
Accuracy: 0.9474
Precision: 0.9539
Recall: 0.9474
F1 Score: 0.9480


Results for Random Forest:
Accuracy: 0.9474
Precision: 0.9539
Recall: 0.9474
F1 Score: 0.9480


Results for SVM:
Accuracy: 0.9474
Precision: 0.9539
Recall: 0.9474
F1 Score: 0.9480

"""


Results for Decision Tree:
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000


Results for Naive Bayes:
Accuracy: 0.8421
Precision: 0.8895
Recall: 0.8421
F1 Score: 0.8448


Results for KNN:
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000


Results for Logistic Regression:
Accuracy: 0.9474
Precision: 0.9539
Recall: 0.9474
F1 Score: 0.9480


Results for Random Forest:
Accuracy: 0.9474
Precision: 0.9539
Recall: 0.9474
F1 Score: 0.9480


Results for SVM:
Accuracy: 0.9474
Precision: 0.9539
Recall: 0.9474
F1 Score: 0.9480




'Results for Decision Tree:\nAccuracy: 1.0000\nPrecision: 1.0000\nRecall: 1.0000\nF1 Score: 1.0000\n\n\nResults for Naive Bayes:\nAccuracy: 0.8947\nPrecision: 0.9181\nRecall: 0.8947\nF1 Score: 0.8965\n\n\nResults for KNN:\nAccuracy: 1.0000\nPrecision: 1.0000\nRecall: 1.0000\nF1 Score: 1.0000\n\n\nResults for Logistic Regression:\nAccuracy: 1.0000\nPrecision: 1.0000\nRecall: 1.0000\nF1 Score: 1.0000\n\n\nResults for Random Forest:\nAccuracy: 1.0000\nPrecision: 1.0000\nRecall: 1.0000\nF1 Score: 1.0000\n\n\nResults for SVM:\nAccuracy: 1.0000\nPrecision: 1.0000\nRecall: 1.0000\nF1 Score: 1.0000\n\n'

In [23]:
#run on renal
results_decision_tree = evaluate_classifier(
    decision_tree,
    X_train_renal_subtype, y_train_renal_subtype,
    X_val_renal_subtype, y_val_renal_subtype,
    "Decision Tree"
)

results_naive_bayes = evaluate_classifier(
    naive_bayes,
    X_train_renal_subtype, y_train_renal_subtype,
    X_val_renal_subtype, y_val_renal_subtype,
    "Naive Bayes"
)

results_KNN = evaluate_classifier(
    knn,
    X_train_renal_subtype, y_train_renal_subtype,
    X_val_renal_subtype, y_val_renal_subtype,
    "KNN"
)

results_logistic_regression = evaluate_classifier(
    logistic_regression_subtype,
    X_train_renal_subtype, y_train_renal_subtype,
    X_val_renal_subtype, y_val_renal_subtype,
    "Logistic Regression"
)

results_random_forest = evaluate_classifier(
    random_forest_subtype,
    X_train_renal_subtype, y_train_renal_subtype,
    X_val_renal_subtype, y_val_renal_subtype,
    "Random Forest"
)

results_SVM = evaluate_classifier(
    svm,
    X_train_renal_subtype, y_train_renal_subtype,
    X_val_renal_subtype, y_val_renal_subtype,
    "SVM"
)

"""
Results for Decision Tree:
Accuracy: 0.9375
Precision: 0.9375
Recall: 0.9375
F1 Score: 0.9375


Results for Naive Bayes:
Accuracy: 0.9062
Precision: 0.9219
Recall: 0.9062
F1 Score: 0.9060


Results for KNN:
Accuracy: 0.9688
Precision: 0.9707
Recall: 0.9688
F1 Score: 0.9688


Results for Logistic Regression:
Accuracy: 0.9375
Precision: 0.9449
Recall: 0.9375
F1 Score: 0.9375


Results for Random Forest:
Accuracy: 0.9688
Precision: 0.9707
Recall: 0.9688
F1 Score: 0.9688


Results for SVM:
Accuracy: 0.9375
Precision: 0.9449
Recall: 0.9375
F1 Score: 0.9375

"""

Results for Decision Tree:
Accuracy: 0.9375
Precision: 0.9375
Recall: 0.9375
F1 Score: 0.9375


Results for Naive Bayes:
Accuracy: 0.9062
Precision: 0.9219
Recall: 0.9062
F1 Score: 0.9060


Results for KNN:
Accuracy: 0.9688
Precision: 0.9707
Recall: 0.9688
F1 Score: 0.9688


Results for Logistic Regression:
Accuracy: 0.9375
Precision: 0.9449
Recall: 0.9375
F1 Score: 0.9375


Results for Random Forest:
Accuracy: 0.9688
Precision: 0.9707
Recall: 0.9688
F1 Score: 0.9688


Results for SVM:
Accuracy: 0.9375
Precision: 0.9449
Recall: 0.9375
F1 Score: 0.9375




'\nResults for Decision Tree:\nAccuracy: 0.9688\nPrecision: 0.9705\nRecall: 0.9688\nF1 Score: 0.9687\n\n\nResults for Naive Bayes:\nAccuracy: 0.9375\nPrecision: 0.9449\nRecall: 0.9375\nF1 Score: 0.9375\n\n\nResults for KNN:\nAccuracy: 0.8750\nPrecision: 0.8801\nRecall: 0.8750\nF1 Score: 0.8740\n\n\nResults for Logistic Regression:\nAccuracy: 0.9375\nPrecision: 0.9441\nRecall: 0.9375\nF1 Score: 0.9370\n\n\nResults for Random Forest:\nAccuracy: 0.9688\nPrecision: 0.9705\nRecall: 0.9688\nF1 Score: 0.9687\n\n\nResults for SVM:\nAccuracy: 0.9375\nPrecision: 0.9441\nRecall: 0.9375\nF1 Score: 0.9370\n\n'

In [24]:
#run on throat
results_decision_tree = evaluate_classifier(
    decision_tree,
    X_train_throat_subtype, y_train_throat_subtype,
    X_val_throat_subtype, y_val_throat_subtype,
    "Decision Tree"
)

results_naive_bayes = evaluate_classifier(
    naive_bayes,
    X_train_throat_subtype, y_train_throat_subtype,
    X_val_throat_subtype, y_val_throat_subtype,
    "Naive Bayes"
)

results_KNN = evaluate_classifier(
    knn,
    X_train_throat_subtype, y_train_throat_subtype,
    X_val_throat_subtype,  y_val_throat_subtype,
    "KNN"
)

results_logistic_regression = evaluate_classifier(
    logistic_regression_subtype,
    X_train_throat_subtype, y_train_throat_subtype,
    X_val_throat_subtype,  y_val_throat_subtype,
    "Logistic Regression"
)

results_random_forest = evaluate_classifier(
    random_forest_subtype,
    X_train_throat_subtype, y_train_throat_subtype,
    X_val_throat_subtype,  y_val_throat_subtype,
    "Random Forest"
)

results_SVM = evaluate_classifier(
    svm,
    X_train_throat_subtype, y_train_throat_subtype,
    X_val_throat_subtype,  y_val_throat_subtype,
    "SVM"
)

"""
Results for Decision Tree:
Accuracy: 0.9737
Precision: 0.9749
Recall: 0.9737
F1 Score: 0.9734


Results for Naive Bayes:
Accuracy: 0.9211
Precision: 0.9380
Recall: 0.9211
F1 Score: 0.9226


Results for KNN:
Accuracy: 0.9474
Precision: 0.9555
Recall: 0.9474
F1 Score: 0.9482


Results for Logistic Regression:
Accuracy: 0.9474
Precision: 0.9522
Recall: 0.9474
F1 Score: 0.9460


Results for Random Forest:
Accuracy: 0.9474
Precision: 0.9474
Recall: 0.9474
F1 Score: 0.9474


Results for SVM:
Accuracy: 0.9474
Precision: 0.9522
Recall: 0.9474
F1 Score: 0.9460


"""


Results for Decision Tree:
Accuracy: 0.9737
Precision: 0.9749
Recall: 0.9737
F1 Score: 0.9734


Results for Naive Bayes:
Accuracy: 0.9211
Precision: 0.9380
Recall: 0.9211
F1 Score: 0.9226


Results for KNN:
Accuracy: 0.9474
Precision: 0.9555
Recall: 0.9474
F1 Score: 0.9482


Results for Logistic Regression:
Accuracy: 0.9474
Precision: 0.9522
Recall: 0.9474
F1 Score: 0.9460


Results for Random Forest:
Accuracy: 0.9474
Precision: 0.9474
Recall: 0.9474
F1 Score: 0.9474


Results for SVM:
Accuracy: 0.9474
Precision: 0.9522
Recall: 0.9474
F1 Score: 0.9460




'\nResults for Decision Tree:\nAccuracy: 0.9474\nPrecision: 0.9496\nRecall: 0.9474\nF1 Score: 0.9469\n\n\nResults for Naive Bayes:\nAccuracy: 0.9737\nPrecision: 0.9759\nRecall: 0.9737\nF1 Score: 0.9739\n\n\nResults for KNN:\nAccuracy: 0.9737\nPrecision: 0.9759\nRecall: 0.9737\nF1 Score: 0.9739\n\n\nResults for Logistic Regression:\nAccuracy: 0.9737\nPrecision: 0.9759\nRecall: 0.9737\nF1 Score: 0.9739\n\n\nResults for Random Forest:\nAccuracy: 0.9474\nPrecision: 0.9496\nRecall: 0.9474\nF1 Score: 0.9469\n\n\nResults for SVM:\nAccuracy: 0.9737\nPrecision: 0.9759\nRecall: 0.9737\nF1 Score: 0.9739\n\n'