## Performance evaluation
###

In [None]:
from os import path as osp
import numpy as np

# load data
def load_data():

    data_dir = './data'
    train_val_data_path = osp.join(data_dir, 'train_validation_data.npy')
    train_val_label_path = osp.join(data_dir, 'train_validation_label.npy')
    test_data_path = osp.join(data_dir, 'test_data.npy')
    test_label_path = osp.join(data_dir, 'test_label.npy')

    train_val_data = np.load(train_val_data_path)
    train_val_label = np.load(train_val_label_path)
    test_data = np.load(test_data_path)
    test_label = np.load(test_label_path)
    return train_val_data, train_val_label, test_data, test_label


train_validation_data, train_validation_label, test_data, test_label = load_data()

print(f'# ========== data info ============ #')
print(f'train validation data: {train_validation_data.shape}')
print(f'train validation label: {train_validation_label.shape}')
print(f'test data: {test_data.shape}')
print(f'test label: {test_label.shape}')
print(f'# ================================= #')

train validation data: (1000, 100)
train validation label: (1000,)
test data: (400, 100)
test label: (400,)


In [None]:
#data split for K-fold Cross-validation
from sklearn.model_selection import KFold
import numpy as np

def train_validation_split(K, train_val_data, train_val_label):

    # TODO: ==========================
    # Initialize lists to store the results for each fold
    train_datas, train_labels, val_datas, val_labels = [], [], [], []

    # Perform K-fold cross-validation
    kf = KFold(n_splits=K, shuffle=True, random_state=42)
    for train_index, val_index in kf.split(train_val_data):
        # Splitting data and labels based on indices
        X_train, X_val = train_val_data[train_index], train_val_data[val_index]
        y_train, y_val = train_val_label[train_index], train_val_label[val_index]

        # Ensure class balance in training and validation sets
        class_0_train = X_train[y_train == 0]
        class_1_train = X_train[y_train == 1]
        class_0_val = X_val[y_val == 0]
        class_1_val = X_val[y_val == 1]

        # Keep balanced amount for each class
        num_train_samples = min(len(class_0_train), len(class_1_train))
        num_val_samples = min(len(class_0_val), len(class_1_val))

        # Create balanced train and validation data
        balanced_train_data = np.concatenate([class_0_train[:num_train_samples], class_1_train[:num_train_samples]])
        balanced_train_labels = np.concatenate([np.zeros(num_train_samples), np.ones(num_train_samples)])

        balanced_val_data = np.concatenate([class_0_val[:num_val_samples], class_1_val[:num_val_samples]])
        balanced_val_labels = np.concatenate([np.zeros(num_val_samples), np.ones(num_val_samples)])

        # Shuffle the data (to ensure class balance does not impact the model in sequence)
        indices_train = np.arange(balanced_train_data.shape[0])
        indices_val = np.arange(balanced_val_data.shape[0])
        np.random.shuffle(indices_train)
        np.random.shuffle(indices_val)

        balanced_train_data = balanced_train_data[indices_train]
        balanced_train_labels = balanced_train_labels[indices_train]
        balanced_val_data = balanced_val_data[indices_val]
        balanced_val_labels = balanced_val_labels[indices_val]

        # Append to the list of folds
        train_datas.append(balanced_train_data)
        train_labels.append(balanced_train_labels)
        val_datas.append(balanced_val_data)
        val_labels.append(balanced_val_labels)

    return train_datas, train_labels, val_datas, val_labels

In [None]:
# evaluation metrics

def eva_precision(true_label, pred_label, _class):
    # Calculate True Positives (TP) and False Positives (FP)
    tp = np.sum((pred_label == _class) & (true_label == _class))
    fp = np.sum((pred_label == _class) & (true_label != _class))
    # Calculate precision
    precison = tp / (tp + fp) if (tp + fp) > 0 else 0
    return precison

def eva_recall(true_label, pred_label, _class):
    # Calculate True Positives (TP) and False Negatives (FN)
    tp = np.sum((pred_label == _class) & (true_label == _class))
    fn = np.sum((pred_label != _class) & (true_label == _class))
    # Calculate recall
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0

    return recall

def eva_f1(true_label, pred_label, _class):
    # Get precision and recall
    precision = eva_precision(true_label, pred_label, _class)
    recall = eva_recall(true_label, pred_label, _class)
    # Calculate F1 Score
    f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return f1

def eva_accuracy(true_label, pred_label):
    # Calculate True Positives (TP) and True Negatives (TN)
    tp_tn = np.sum(true_label == pred_label)
    # Calculate accuracy
    accuracy = tp_tn / len(true_label)
    return accuracy

# Evaluation function
def eva_auroc(true_label, pred_probs):
    thresholds = np.linspace(0, 1, 100)
    tpr_list, fpr_list = [], []

    for threshold in thresholds:
        binary_pred = (pred_probs >= threshold).astype(int)
        tp = np.sum((binary_pred == 1) & (true_label == 1))
        fp = np.sum((binary_pred == 1) & (true_label == 0))
        tn = np.sum((binary_pred == 0) & (true_label == 0))
        fn = np.sum((binary_pred == 0) & (true_label == 1))

        tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
        fpr = fp / (fp + tn) if (fp + tn) > 0 else 0

        tpr_list.append(tpr)
        fpr_list.append(fpr)

    sorted_indices = np.argsort(fpr_list)
    sorted_fpr = np.array(fpr_list)[sorted_indices]
    sorted_tpr = np.array(tpr_list)[sorted_indices]
    auroc = np.trapz(sorted_tpr, sorted_fpr)
    return auroc

def evaluation(true_label, pred_label, _class):

    precision = eva_precision(true_label, pred_label, _class)
    recall = eva_recall(true_label, pred_label, _class)
    f1 = eva_f1(true_label, pred_label, _class)
    accuracy = eva_accuracy(true_label, pred_label)
    auroc = eva_auroc(true_label, pred_label)

    return {'precision': precision, 'recall': recall, 'f1': f1, 'accuracy': accuracy, 'auroc': auroc}
    


In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

K = 5

# hyper-parameter for logistic regression
hyper_parameters_logistic_regression = {
    'penalty': ['l1', 'l2']  # Choose different penalties
}

# hyper-parameter for SVM
hyper_parameters_svm = {
    'C': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1]  # Choose different values for C
}

# Obtain cross-validation set
train_datas, train_labels, validation_datas, validation_labels = train_validation_split(K, train_validation_data, train_validation_label)

# DataFrame to store results
results_logistic = []
results_svm = []

# Cross-validation loop for Logistic Regression
for i, (train_data, train_label, validation_data, validation_label) in enumerate(zip(train_datas, train_labels, validation_datas, validation_labels)):
    print(f'================== {i + 1}-th time validation ==================')

    # Logistic Regression
    for penalty in hyper_parameters_logistic_regression['penalty']:
        print(f'Algorithm: [logistic regression]')
        print(f'Hyper-parameters: penalty={penalty}')
        try:
            lr_model = LogisticRegression(solver='liblinear', penalty=penalty).fit(train_data, train_label)
            # Performance evaluation
            pred_label = lr_model.predict(validation_data)
            F1_0 = eva_f1(validation_label, pred_label, _class=0)
            F1_1 = eva_f1(validation_label, pred_label, _class=1)
            F1_avg = (F1_0 + F1_1) / 2
            # Store results
            results_logistic.append({
                'Algorithm': 'Logistic Regression',
                'Penalty': penalty,
                'Fold': i + 1,
                'F1_Class_0': F1_0,
                'F1_Class_1': F1_1,
                'F1_Avg': F1_avg
            })
        except Exception as e:
            print(f"Error for penalty={penalty}: {e}")

# Convert logistic regression results to DataFrame
results_logistic_df = pd.DataFrame(results_logistic)

# Find optimal penalty for logistic regression for each fold
optimal_parameters_logistic = results_logistic_df.loc[results_logistic_df.groupby('Fold')['F1_Avg'].idxmax()]
print("Optimal penalty settings for logistic regression for each fold:")
print(optimal_parameters_logistic[['Fold', 'Penalty', 'F1_Avg']])

# Cross-validation loop for SVM
for i, (train_data, train_label, validation_data, validation_label) in enumerate(zip(train_datas, train_labels, validation_datas, validation_labels)):
    print(f'================== {i + 1}-th time validation ==================')

    # SVM
    for C in hyper_parameters_svm['C']:
        print(f'Algorithm: [SVM]')
        print(f'Hyper-parameters: C={C}')
        try:
            svm_model = SVC(kernel='linear', C=C).fit(train_data, train_label)
            # Performance evaluation
            pred_label = svm_model.predict(validation_data)
            F1_0 = eva_f1(validation_label, pred_label, _class=0)
            F1_1 = eva_f1(validation_label, pred_label, _class=1)
            F1_avg = (F1_0 + F1_1) / 2
            # Store results
            results_svm.append({
                'Algorithm': 'SVM',
                'C': C,
                'Fold': i + 1,
                'F1_Class_0': F1_0,
                'F1_Class_1': F1_1,
                'F1_Avg': F1_avg
            })
        except Exception as e:
            print(f"Error for C={C}: {e}")

# Convert SVM results to DataFrame
results_svm_df = pd.DataFrame(results_svm)

# Find optimal C for SVM for each fold
optimal_parameters_svm = results_svm_df.loc[results_svm_df.groupby('Fold')['F1_Avg'].idxmax()]
print("Optimal C settings for SVM for each fold:")
print(optimal_parameters_svm[['Fold', 'C', 'F1_Avg']])


Algorithm: [logistic regression]
Hyper-parameters: penalty=l1
Algorithm: [logistic regression]
Hyper-parameters: penalty=l2
Algorithm: [logistic regression]
Hyper-parameters: penalty=l1
Algorithm: [logistic regression]
Hyper-parameters: penalty=l2
Algorithm: [logistic regression]
Hyper-parameters: penalty=l1
Algorithm: [logistic regression]
Hyper-parameters: penalty=l2
Algorithm: [logistic regression]
Hyper-parameters: penalty=l1
Algorithm: [logistic regression]
Hyper-parameters: penalty=l2
Algorithm: [logistic regression]
Hyper-parameters: penalty=l1
Algorithm: [logistic regression]
Hyper-parameters: penalty=l2
Optimal penalty settings for logistic regression for each fold:
   Fold Penalty    F1_Avg
0     1      l1  0.942669
3     2      l2  0.926115
5     3      l2  0.920211
6     4      l1  0.947042
8     5      l1  0.944443
Algorithm: [SVM]
Hyper-parameters: C=1e-05
Algorithm: [SVM]
Hyper-parameters: C=0.0001
Algorithm: [SVM]
Hyper-parameters: C=0.001
Algorithm: [SVM]
Hyper-paramet

In [None]:
# performance evaluation on test set

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

K = 5

# Placeholders for results
# Logistic Regression
precision_class_0_lr = []
recall_class_0_lr = []
f1_class_0_lr = []
precision_class_1_lr = []
recall_class_1_lr = []
f1_class_1_lr = []
accuracy_values_lr = []

# SVM
precision_class_0_svm = []
recall_class_0_svm = []
f1_class_0_svm = []
precision_class_1_svm = []
recall_class_1_svm = []
f1_class_1_svm = []
accuracy_values_svm = []

# hyper-parameter penalty for logistic regression. Hint: len(penalty) = 5
penalty = [
    'l1', 'l2', 'l2', 'l1', 'l1'  # TODO: optimal parameters for each split
]

# hyper-parameter C for SVM. Hint: len(C) = 5
C = [
    0.0001, 0.00001, 0.00001, 0.01, 0.00001  # TODO: optimal parameters for each split
]

# obtain cross-validation set
train_datas, train_labels, validation_datas, validation_labels = train_validation_split(K, train_validation_data, train_validation_label)

for i, (train_data, train_label) in enumerate(zip(train_datas, train_labels)):

    print(f'# ======================= {i + 1}-th time validation ======================= #')
    print('Logistic Regression with optimal super parameters:')
    # performance evaluation on test set
    lr_model = LogisticRegression(solver='liblinear', penalty=penalty[i]).fit(train_data, train_label)
    pred_label = lr_model.predict(test_data)
    results_0 = evaluation(test_label, pred_label, _class=0)
    results_1 = evaluation(test_label, pred_label, _class=1)
    print(f'Result Class 0 (Test set): Precision={results_0["precision"]}, Recall={results_0["recall"]}, F1={results_0["f1"]}, Accuracy={results_0["accuracy"]}, AUROC={results_0["auroc"]}')
    print(f'Result Class 1 (Test set): Precision={results_1["precision"]}, Recall={results_1["recall"]}, F1={results_1["f1"]}, Accuracy={results_1["accuracy"]}, AUROC={results_1["auroc"]}')


    # performance evaluation on test set
    print('SVM with optimal super parameters:')
    svm_model = SVC(kernel='linear', C=C[i], probability=True).fit(train_data, train_label)
    pred_label = svm_model.predict(test_data)
    results_0 = evaluation(test_label, pred_label, _class=0)
    results_1 = evaluation(test_label, pred_label, _class=1)
    print(f'Result Class 0 (Test set): Precision={results_0["precision"]}, Recall={results_0["recall"]}, F1={results_0["f1"]}, Accuracy={results_0["accuracy"]}, AUROC={results_0["auroc"]}')
    print(f'Result Class 1 (Test set): Precision={results_1["precision"]}, Recall={results_1["recall"]}, F1={results_1["f1"]}, Accuracy={results_1["accuracy"]}, AUROC={results_1["auroc"]}')

   

    # Performance evaluation on test set
    pred_label_lr = lr_model.predict(test_data)
    pred_probs_lr = lr_model.predict_proba(test_data)[:, 1]

    # Metrics for Logistic Regression
    # Class 0
    results_0_lr = evaluation(test_label, pred_label_lr, _class=0)
    precision_class_0_lr.append(results_0_lr["precision"])
    recall_class_0_lr.append(results_0_lr["recall"])
    f1_class_0_lr.append(results_0_lr["f1"])
    # Class 1
    results_1_lr = evaluation(test_label, pred_label_lr, _class=1)
    precision_class_1_lr.append(results_1_lr["precision"])
    recall_class_1_lr.append(results_1_lr["recall"])
    f1_class_1_lr.append(results_1_lr["f1"])
    # Global Metrics
    accuracy_values_lr.append(eva_accuracy(test_label, pred_label_lr))

    

    # Performance evaluation on test set
    pred_label_svm = svm_model.predict(test_data)
    pred_probs_svm = svm_model.predict_proba(test_data)[:, 1]

    # Metrics for SVM
    # Class 0
    results_0_svm = evaluation(test_label, pred_label_svm, _class=0)
    precision_class_0_svm.append(results_0_svm["precision"])
    recall_class_0_svm.append(results_0_svm["recall"])
    f1_class_0_svm.append(results_0_svm["f1"])
    # Class 1
    results_1_svm = evaluation(test_label, pred_label_svm, _class=1)
    precision_class_1_svm.append(results_1_svm["precision"])
    recall_class_1_svm.append(results_1_svm["recall"])
    f1_class_1_svm.append(results_1_svm["f1"])
    # Global Metrics
    accuracy_values_svm.append(eva_accuracy(test_label, pred_label_svm))

# Logistic Regression Averages
precision_class_0_lr_avg = sum(precision_class_0_lr) / len(precision_class_0_lr)
recall_class_0_lr_avg = sum(recall_class_0_lr) / len(recall_class_0_lr)
f1_class_0_lr_avg = sum(f1_class_0_lr) / len(f1_class_0_lr)
precision_class_1_lr_avg = sum(precision_class_1_lr) / len(precision_class_1_lr)
recall_class_1_lr_avg = sum(recall_class_1_lr) / len(recall_class_1_lr)
f1_class_1_lr_avg = sum(f1_class_1_lr) / len(f1_class_1_lr)
accuracy_avg_lr = sum(accuracy_values_lr) / len(accuracy_values_lr)

# SVM Averages
precision_class_0_svm_avg = sum(precision_class_0_svm) / len(precision_class_0_svm)
recall_class_0_svm_avg = sum(recall_class_0_svm) / len(recall_class_0_svm)
f1_class_0_svm_avg = sum(f1_class_0_svm) / len(f1_class_0_svm)
precision_class_1_svm_avg = sum(precision_class_1_svm) / len(precision_class_1_svm)
recall_class_1_svm_avg = sum(recall_class_1_svm) / len(recall_class_1_svm)
f1_class_1_svm_avg = sum(f1_class_1_svm) / len(f1_class_1_svm)
accuracy_avg_svm = sum(accuracy_values_svm) / len(accuracy_values_svm)

# Table 2: Optimal Penalty for Logistic Regression
print("\nTable 2: Optimal Penalty for Logistic Regression")
table_2 = pd.DataFrame({
    'Hyper-parameter': ['Penalty'],
    '1': [penalty[0]],
    '2': [penalty[1]],
    '3': [penalty[2]],
    '4': [penalty[3]],
    '5': [penalty[4]],
})
display(table_2)

# Table 3: Performance of Logistic Regression for Class 0
print("Table 3: Performance of Logistic Regression for Class 0")
table_3 = pd.DataFrame({
    'Metric': ['Precision', 'Recall', 'F1'],
    '1': [precision_class_0_lr[0], recall_class_0_lr[0], f1_class_0_lr[0]],
    '2': [precision_class_0_lr[1], recall_class_0_lr[1], f1_class_0_lr[1]],
    '3': [precision_class_0_lr[2], recall_class_0_lr[2], f1_class_0_lr[2]],
    '4': [precision_class_0_lr[3], recall_class_0_lr[3], f1_class_0_lr[3]],
    '5': [precision_class_0_lr[4], recall_class_0_lr[4], f1_class_0_lr[4]],
    'Avg': [precision_class_0_lr_avg, recall_class_0_lr_avg, f1_class_0_lr_avg]
})
display(table_3)

# Table 4: Performance of Logistic Regression for Class 1
print('Table 4: The performance evaluation of logistic regression for Class-1 by Precision, Recall and F1 scores on test set.')
table_4 = pd.DataFrame({
    'Metric': ['Precision', 'Recall', 'F1'],
    '1': [precision_class_1_lr[0], recall_class_1_lr[0], f1_class_1_lr[0]],
    '2': [precision_class_1_lr[1], recall_class_1_lr[1], f1_class_1_lr[1]],
    '3': [precision_class_1_lr[2], recall_class_1_lr[2], f1_class_1_lr[2]],
    '4': [precision_class_1_lr[3], recall_class_1_lr[3], f1_class_1_lr[3]],
    '5': [precision_class_1_lr[4], recall_class_1_lr[4], f1_class_1_lr[4]],
    'Avg': [precision_class_1_lr_avg, recall_class_1_lr_avg, f1_class_1_lr_avg]
})
display(table_4)

# Table 5: Accuracy and AUROC for Logistic Regression
print('Table 5: The performance evaluation of logistic regression by Accuracy and AUROC on test set.')
table_5 = pd.DataFrame({
    'Metric': ['Accuracy', 'AUROC'],
    '1': [accuracy_values_lr[0], 0.8642750000000001],
    '2': [accuracy_values_lr[1], 0.8301875],
    '3': [accuracy_values_lr[2], 0.8602],
    '4': [accuracy_values_lr[3], 0.8855000000000001],
    '5': [accuracy_values_lr[4], 0.8832 ],
    'Avg': [accuracy_avg_lr, (0.8642750000000001+0.8301875+0.8602+0.8855000000000001+0.8832)/ 5]
})
display(table_5)

# Table 6: Optimal C for SVM
print('Table 6: The optimal setting of “C” of SVM in each split')
table_6 = pd.DataFrame({
    'Hyper-parameter': ['C'],
    '1': [C[0]],
    '2': [C[1]],
    '3': [C[2]],
    '4': [C[3]],
    '5': [C[4]],
})
display(table_6)

# Table 7: Performance of SVM for Class 0
print('Table 7: The performance evaluation of SVM for Class-0 by Precision, Recall and F1 scores on test set.')
table_7 = pd.DataFrame({
    'Metric': ['Precision', 'Recall', 'F1'],
    '1': [precision_class_0_svm[0], recall_class_0_svm[0], f1_class_0_svm[0]],
    '2': [precision_class_0_svm[1], recall_class_0_svm[1], f1_class_0_svm[1]],
    '3': [precision_class_0_svm[2], recall_class_0_svm[2], f1_class_0_svm[2]],
    '4': [precision_class_0_svm[3], recall_class_0_svm[3], f1_class_0_svm[3]],
    '5': [precision_class_0_svm[4], recall_class_0_svm[4], f1_class_0_svm[4]],
    'Avg': [precision_class_0_svm_avg, recall_class_0_svm_avg, f1_class_0_svm_avg]
})
display(table_7)

# Table 8: Performance of SVM for Class 1
print('Table 8: The performance evaluation of SVM for Class-1 by Precision, Recall and F1 scores on test set.')
table_8 = pd.DataFrame({
    'Metric': ['Precision', 'Recall', 'F1'],
    '1': [precision_class_1_svm[0], recall_class_1_svm[0], f1_class_1_svm[0]],
    '2': [precision_class_1_svm[1], recall_class_1_svm[1], f1_class_1_svm[1]],
    '3': [precision_class_1_svm[2], recall_class_1_svm[2], f1_class_1_svm[2]],
    '4': [precision_class_1_svm[3], recall_class_1_svm[3], f1_class_1_svm[3]],
    '5': [precision_class_1_svm[4], recall_class_1_svm[4], f1_class_1_svm[4]],
    'Avg': [precision_class_1_svm_avg, recall_class_1_svm_avg, f1_class_1_svm_avg]
})
display(table_8)

# Table 9: Accuracy and AUROC for SVM
print('Table 9: The performance evaluation of SVM by Accuracy and AUROC on test set.')
table_9 = pd.DataFrame({
    'Metric': ['Accuracy', 'AUROC'],
    '1': [accuracy_values_svm[0], 0.9022750000000002],
    '2': [accuracy_values_svm[1], 0.899775],
    '3': [accuracy_values_svm[2], 0.8995624999999999],
    '4': [accuracy_values_svm[3], 0.8562375],
    '5': [accuracy_values_svm[4], 0.9041875],
    'Avg': [accuracy_avg_svm, (0.9022750000000002+0.899775+0.8995624999999999+0.8562375+0.9041875)/5 ]
})
display(table_9)


    


Logistic Regression with optimal super parameters:
Result Class 0 (Test set): Precision=0.9095477386934674, Recall=0.905, F1=0.9072681704260652, Accuracy=0.9075, AUROC=0.8642750000000001
Result Class 1 (Test set): Precision=0.9054726368159204, Recall=0.91, F1=0.9077306733167083, Accuracy=0.9075, AUROC=0.8642750000000001
SVM with optimal super parameters:
Result Class 0 (Test set): Precision=0.9303482587064676, Recall=0.935, F1=0.9326683291770573, Accuracy=0.9325, AUROC=0.9022750000000002
Result Class 1 (Test set): Precision=0.9346733668341709, Recall=0.93, F1=0.9323308270676693, Accuracy=0.9325, AUROC=0.9022750000000002
Logistic Regression with optimal super parameters:
Result Class 0 (Test set): Precision=0.8185840707964602, Recall=0.925, F1=0.8685446009389671, Accuracy=0.86, AUROC=0.8301875
Result Class 1 (Test set): Precision=0.9137931034482759, Recall=0.795, F1=0.8502673796791445, Accuracy=0.86, AUROC=0.8301875
SVM with optimal super parameters:
Result Class 0 (Test set): Precision

Unnamed: 0,Hyper-parameter,1,2,3,4,5
0,Penalty,l1,l2,l2,l1,l1


Table 3: Performance of Logistic Regression for Class 0


Unnamed: 0,Metric,1,2,3,4,5,Avg
0,Precision,0.909548,0.818584,0.85,0.924623,0.915423,0.883636
1,Recall,0.905,0.925,0.935,0.92,0.92,0.921
2,F1,0.907268,0.868545,0.890476,0.922306,0.917706,0.90126


Table 4: The performance evaluation of logistic regression for Class-1 by Precision, Recall and F1 scores on test set.


Unnamed: 0,Metric,1,2,3,4,5,Avg
0,Precision,0.905473,0.913793,0.927778,0.920398,0.919598,0.917408
1,Recall,0.91,0.795,0.835,0.925,0.915,0.876
2,F1,0.907731,0.850267,0.878947,0.922693,0.917293,0.895386


Table 5: The performance evaluation of logistic regression by Accuracy and AUROC on test set.


Unnamed: 0,Metric,1,2,3,4,5,Avg
0,Accuracy,0.9075,0.86,0.885,0.9225,0.9175,0.8985
1,AUROC,0.864275,0.830187,0.8602,0.8855,0.8832,0.864672


Table 6: The optimal setting of “C” of SVM in each split


Unnamed: 0,Hyper-parameter,1,2,3,4,5
0,C,0.0001,1e-05,1e-05,0.01,1e-05


Table 7: The performance evaluation of SVM for Class-0 by Precision, Recall and F1 scores on test set.


Unnamed: 0,Metric,1,2,3,4,5,Avg
0,Precision,0.930348,0.934673,0.943878,0.931579,0.953608,0.938817
1,Recall,0.935,0.93,0.925,0.885,0.925,0.92
2,F1,0.932668,0.932331,0.934343,0.907692,0.939086,0.929224


Table 8: The performance evaluation of SVM for Class-1 by Precision, Recall and F1 scores on test set.


Unnamed: 0,Metric,1,2,3,4,5,Avg
0,Precision,0.934673,0.930348,0.926471,0.890476,0.927184,0.921831
1,Recall,0.93,0.935,0.945,0.935,0.955,0.94
2,F1,0.932331,0.932668,0.935644,0.912195,0.940887,0.930745


Table 9: The performance evaluation of SVM by Accuracy and AUROC on test set.


Unnamed: 0,Metric,1,2,3,4,5,Avg
0,Accuracy,0.9325,0.9325,0.935,0.91,0.94,0.93
1,AUROC,0.902275,0.899775,0.899562,0.856237,0.904188,0.892408


## Conclusion: 
The Support Vector Machine (SVM) outperforms Logistic Regression on this dataset.

## Reason:
SVM achieves higher F1 scores for both classes, better overall accuracy, and a higher AUROC compared to Logistic Regression. These metrics indicate that SVM balances precision and recall more effectively, handles the dataset's features more robustly, and has better discriminatory power for classifying between the two classes. The SVM's ability to manage high-dimensional spaces and class imbalances likely contributes to its superior performance.

