## Importing Data and Libraries

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import (
    accuracy_score, 
    recall_score, 
    precision_score, 
    f1_score, 
    cohen_kappa_score
)
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC


In [2]:
Train = pd.read_csv('dataTrain.csv')
Test = pd.read_csv('dataTest.csv')

stats = pd.read_csv('stats.csv')
stats = stats.iloc[:,1:]

X_train = Train.drop('output', axis=1)
y_train = Train['output']

X_test = Test.drop('output', axis=1)
y_test = Test['output']

In [3]:
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.array
y_test = y_test.array

## Building the model

### RFC

In [4]:
def trainRandomForest(X, y, n_splits=5, min_samples_split_values=[2, 6, 10], min_samples_leaf_values=[1, 5, 10]):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=1)
    
    
    best_result = {
        'min_samples_split': None,
        'min_samples_leaf': None,
        'accuracy': 0,
        'recall': 0,
        'precision': 0,
        'f1_score': 0,
        'kappa': 0
    }
    
    for min_samples_split in min_samples_split_values: 
        for min_samples_leaf in min_samples_leaf_values:  
            accuracies, recalls, precisions, f1_scores, kappas = [], [], [], [], []
            
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                
                model = RandomForestClassifier(
                    min_samples_split=min_samples_split,
                    min_samples_leaf=min_samples_leaf,
                    random_state=1
                )
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)

                
                accuracies.append(accuracy_score(y_test, y_pred))
                recalls.append(recall_score(y_test, y_pred, average='weighted'))
                precisions.append(precision_score(y_test, y_pred, average='weighted'))
                f1_scores.append(f1_score(y_test, y_pred, average='weighted'))
                kappas.append(cohen_kappa_score(y_test, y_pred))

            
            avg_accuracy = np.mean(accuracies)
            avg_recall = np.mean(recalls)
            avg_precision = np.mean(precisions)
            avg_f1_score = np.mean(f1_scores)
            avg_kappa = np.mean(kappas)

            
            if avg_accuracy > best_result['accuracy']:
                best_result['min_samples_split'] = min_samples_split
                best_result['min_samples_leaf'] = min_samples_leaf
                best_result['accuracy'] = avg_accuracy
                best_result['recall'] = avg_recall
                best_result['precision'] = avg_precision
                best_result['f1_score'] = avg_f1_score
                best_result['kappa'] = avg_kappa

    
    print(f"Best min_samples_split: {best_result['min_samples_split']}, min_samples_leaf: {best_result['min_samples_leaf']}")
    print(f"Best Average Accuracy: {best_result['accuracy']:.3f}")
    print(f"Best Average Recall: {best_result['recall']:.3f}")
    print(f"Best Average Precision: {best_result['precision']:.3f}")
    print(f"Best Average F1-Score: {best_result['f1_score']:.3f}")
    print(f"Best Average Kappa: {best_result['kappa']:.3f}")

XGB

In [5]:
def trainGradientBoosting(X, y, n_splits=5, min_child_weight_values=[1, 5, 10], max_depth_values=[3, 5, 7]):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=1)
    
    best_result = {
        'min_child_weight': None,
        'max_depth': None,
        'accuracy': 0,
        'recall': 0,
        'precision': 0,
        'f1_score': 0,
        'kappa': 0
    }
    
    for min_child_weight in min_child_weight_values:  # Loop over min_child_weight values
        for max_depth in max_depth_values:  # Loop over max_depth values
            accuracies, recalls, precisions, f1_scores, kappas = [], [], [], [], []
            
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                
                
                model = XGBClassifier(
                    min_child_weight=min_child_weight,
                    max_depth=max_depth,
                    eval_metric="logloss",
                    random_state=1,
                    use_label_encoder=False,  
                    verbosity=0  
                )
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)

               
                accuracies.append(accuracy_score(y_test, y_pred))
                recalls.append(recall_score(y_test, y_pred, average='weighted'))
                precisions.append(precision_score(y_test, y_pred, average='weighted'))
                f1_scores.append(f1_score(y_test, y_pred, average='weighted'))
                kappas.append(cohen_kappa_score(y_test, y_pred))

            
            avg_accuracy = np.mean(accuracies)
            avg_recall = np.mean(recalls)
            avg_precision = np.mean(precisions)
            avg_f1_score = np.mean(f1_scores)
            avg_kappa = np.mean(kappas)

            
            if avg_accuracy > best_result['accuracy']:
                best_result['min_child_weight'] = min_child_weight
                best_result['max_depth'] = max_depth
                best_result['accuracy'] = avg_accuracy
                best_result['recall'] = avg_recall
                best_result['precision'] = avg_precision
                best_result['f1_score'] = avg_f1_score
                best_result['kappa'] = avg_kappa

    
    print(f"Best min_child_weight: {best_result['min_child_weight']}, max_depth: {best_result['max_depth']}")
    print(f"Best Average Accuracy: {best_result['accuracy']:.3f}")
    print(f"Best Average Recall: {best_result['recall']:.3f}")
    print(f"Best Average Precision: {best_result['precision']:.3f}")
    print(f"Best Average F1-Score: {best_result['f1_score']:.3f}")
    print(f"Best Average Kappa: {best_result['kappa']:.3f}")

SVM

In [6]:
def trainSVM(X, y, n_splits=5, C_values=[0.1, 1, 10], gamma_values=['scale', 'auto']):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=1)
    
    
    best_result = {
        'C': None,
        'gamma': None,
        'accuracy': 0,
        'recall': 0,
        'precision': 0,
        'f1_score': 0,
        'kappa': 0
    }
    
    for C in C_values:  
        for gamma in gamma_values:  
            accuracies, recalls, precisions, f1_scores, kappas = [], [], [], [], []
            
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                
                
                model = SVC(kernel='rbf', C=C, gamma=gamma, class_weight='balanced', random_state=1)
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)

                
                accuracies.append(accuracy_score(y_test, y_pred))
                recalls.append(recall_score(y_test, y_pred, average='weighted'))
                precisions.append(precision_score(y_test, y_pred, average='weighted'))
                f1_scores.append(f1_score(y_test, y_pred, average='weighted'))
                kappas.append(cohen_kappa_score(y_test, y_pred))

           
            avg_accuracy = np.mean(accuracies)
            avg_recall = np.mean(recalls)
            avg_precision = np.mean(precisions)
            avg_f1_score = np.mean(f1_scores)
            avg_kappa = np.mean(kappas)

            
            if avg_accuracy > best_result['accuracy']:
                best_result['C'] = C
                best_result['gamma'] = gamma
                best_result['accuracy'] = avg_accuracy
                best_result['recall'] = avg_recall
                best_result['precision'] = avg_precision
                best_result['f1_score'] = avg_f1_score
                best_result['kappa'] = avg_kappa

    
    print(f"Best C: {best_result['C']}, Best gamma: {best_result['gamma']}")
    print(f"Best Average Accuracy: {best_result['accuracy']:.3f}")
    print(f"Best Average Recall: {best_result['recall']:.3f}")
    print(f"Best Average Precision: {best_result['precision']:.3f}")
    print(f"Best Average F1-Score: {best_result['f1_score']:.3f}")
    print(f"Best Average Kappa: {best_result['kappa']:.3f}")

In [7]:
trainRandomForest(X_train, y_train)
trainGradientBoosting(X_train, y_train)
trainSVM(X_train, y_train)

Best min_samples_split: 2, min_samples_leaf: 5
Best Average Accuracy: 0.851
Best Average Recall: 0.851
Best Average Precision: 0.854
Best Average F1-Score: 0.849
Best Average Kappa: 0.688
Best min_child_weight: 10, max_depth: 3
Best Average Accuracy: 0.834
Best Average Recall: 0.834
Best Average Precision: 0.836
Best Average F1-Score: 0.834
Best Average Kappa: 0.658
Best C: 1, Best gamma: scale
Best Average Accuracy: 0.805
Best Average Recall: 0.805
Best Average Precision: 0.811
Best Average F1-Score: 0.805
Best Average Kappa: 0.601
