In [1]:
# Importing all necessary libraries
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.feature_selection import RFE, SelectFromModel
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_recall_fscore_support
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder, OrdinalEncoder
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

In [2]:
# Load and preprocess data
data = pd.read_csv('dataset.csv')
data['BPM'] = data['BPM'].round(2)

# Apply Label Encoding
label_cols = ['While working', 'Instrumentalist', 'Composer', 'Exploratory', 'Foreign languages', 'Fav genre']
data[label_cols] = data[label_cols].apply(LabelEncoder().fit_transform)

# Define the frequency and effect order
frequency_order = ['Never', 'Rarely', 'Sometimes', 'Very frequently']
effect_order = ['Worsen', 'No effect', 'Improve']

# Apply Ordinal Encoding
encoder1 = OrdinalEncoder(categories = [frequency_order] * 16)
encoder2 = OrdinalEncoder(categories = [effect_order])
freq_cols = [
    'Frequency [Classical]', 'Frequency [Country]', 'Frequency [EDM]', 'Frequency [Folk]', 
    'Frequency [Gospel]', 'Frequency [Hip hop]', 'Frequency [Jazz]', 'Frequency [K pop]', 
    'Frequency [Latin]', 'Frequency [Lofi]', 'Frequency [Metal]', 'Frequency [Pop]', 
    'Frequency [R&B]', 'Frequency [Rap]', 'Frequency [Rock]', 'Frequency [Video game music]'
]
data[freq_cols] = encoder1.fit_transform(data[freq_cols])
data[['Music effects']] = encoder2.fit_transform(data[['Music effects']])

# Define features and target
X = np.array(data.drop('Music effects', axis = 1))
y = np.array(data['Music effects'])

# Use SMOTE to handle class imbalance
sampling_strategy = {0: 500, 1: 500, 2: 600}
smote = SMOTE(sampling_strategy = sampling_strategy, random_state = 42)
X, y = smote.fit_resample(X, y)

# Initialize K-Fold Cross-Validation
kf = KFold(n_splits = 5, shuffle = True, random_state = 42)

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

#Random Forest

In [3]:
# Feature selection with SelectFromModel
selector = SelectFromModel(ExtraTreesClassifier(n_estimators = 100, random_state = 42))
X_selected = selector.fit_transform(X, y)

# Hyperparameter tuning using GridSearchCV
rf_model = RandomForestClassifier(random_state = 42, oob_score = True)
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [6, 8, 10, 12],
    'min_samples_split': [10, 15, 20],
    'min_samples_leaf': [5, 10, 15],
    'max_features': ['sqrt', 'log2', None],
    'class_weight': ['balanced', None]
}
grid_search = GridSearchCV(estimator = rf_model, param_grid = param_grid, cv = kf, scoring = 'accuracy', n_jobs = -1, verbose = 1)
grid_search.fit(X_selected, y)

# Get the best hyperparameters for model
best_rf_model = grid_search.best_estimator_
print(f"Best Parameters: {grid_search.best_params_}")

Fitting 5 folds for each of 648 candidates, totalling 3240 fits
Best Parameters: {'class_weight': 'balanced', 'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 5, 'min_samples_split': 10, 'n_estimators': 200}


In [4]:
# Initialize lists to store metrics
train_accuracies = []
test_accuracies = []
confusion_matrices = []
precision_scores = []
recall_scores = []
f1_scores = []

for train_index, test_index in kf.split(X_selected):
    X_train, X_test = X_selected[train_index], X_selected[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train the model
    best_rf_model.fit(X_train, y_train)
    
    # Evaluate on the train set
    train_pred = best_rf_model.predict(X_train)
    train_accuracy = accuracy_score(y_train, train_pred)
    train_accuracies.append(train_accuracy)
    
    # Evaluate on the test set
    y_pred = best_rf_model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    test_accuracies.append(test_accuracy)
    
    # Calculate Precision, Recall, and F1-Score for each fold
    precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average = 'weighted')
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)
    
    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred, labels = [0, 1, 2])
    confusion_matrices.append(cm)
    
    # Print fold-specific metrics
    print(f'\nFold Test Accuracy: {test_accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Confusion Matrix for this Fold:\n{cm}')

# Calculate the average metrics across all folds
avg_test_accuracy = np.mean(test_accuracies)
avg_precision = np.mean(precision_scores)
avg_recall = np.mean(recall_scores)
avg_f1_score = np.mean(f1_scores)
avg_confusion_matrix = np.mean(confusion_matrices, axis = 0)

# Print the overall average metrics
print(f'Average Test Accuracy: {avg_test_accuracy:.4f}')
print(f'Average Precision: {avg_precision:.4f}')
print(f'Average Recall: {avg_recall:.4f}')
print(f'Average F1 Score: {avg_f1_score:.4f}')
print(f'Average Confusion Matrix:\n{avg_confusion_matrix}')


Fold Test Accuracy: 0.8375
Precision: 0.8341
Recall: 0.8375
F1 Score: 0.8339
Confusion Matrix for this Fold:
[[ 92   0   1]
 [  6  64  26]
 [  1  18 112]]

Fold Test Accuracy: 0.8625
Precision: 0.8620
Recall: 0.8625
F1 Score: 0.8603
Confusion Matrix for this Fold:
[[ 84   1   2]
 [  7  77  20]
 [  2  12 115]]

Fold Test Accuracy: 0.8719
Precision: 0.8723
Recall: 0.8719
F1 Score: 0.8705
Confusion Matrix for this Fold:
[[103   1   3]
 [  4  70  20]
 [  2  11 106]]

Fold Test Accuracy: 0.8562
Precision: 0.8540
Recall: 0.8562
F1 Score: 0.8546
Confusion Matrix for this Fold:
[[112   0   2]
 [  2  58  22]
 [  3  17 104]]

Fold Test Accuracy: 0.8063
Precision: 0.8123
Recall: 0.8063
F1 Score: 0.8028
Confusion Matrix for this Fold:
[[99  0  0]
 [ 7 80 37]
 [ 1 17 79]]
Average Test Accuracy: 0.8469
Average Precision: 0.8469
Average Recall: 0.8469
Average F1 Score: 0.8444
Average Confusion Matrix:
[[ 98.    0.4   1.6]
 [  5.2  69.8  25. ]
 [  1.8  15.  103.2]]


#SVM
#One-vs-all

In [5]:
# Feature selection with Recursive Feature Elimination (RFE)
svm_model = SVC(kernel = 'linear', random_state = 42)
selector = RFE(estimator = svm_model, n_features_to_select = 10, step = 1)
X_selected = selector.fit_transform(X, y)

# Initialize the OneVsRestClassifier with SVM as the base estimator
ovr_svm = OneVsRestClassifier(SVC(probability = True, random_state = 42))

# Hyperparameter tuning using GridSearchCV for OvA SVM
param_grid = {
    'estimator__C': [0.01, 0.1, 1, 10],
    'estimator__gamma': ['scale'],
    'estimator__kernel': ['linear', 'rbf'],
    'estimator__class_weight': ['balanced']
}

grid_search = GridSearchCV(estimator = ovr_svm, param_grid = param_grid, cv = kf, scoring = 'accuracy', n_jobs = -1, verbose = 1)
grid_search.fit(X_selected, y)

# Get the best hyperparameters for model
best_ova_svm_model = grid_search.best_estimator_
print(f"Best Parameters: {grid_search.best_params_}")

Fitting 5 folds for each of 8 candidates, totalling 40 fits
Best Parameters: {'estimator__C': 10, 'estimator__class_weight': 'balanced', 'estimator__gamma': 'scale', 'estimator__kernel': 'rbf'}


In [6]:
# Initialize lists to store metrics
train_accuracies = []
test_accuracies = []
confusion_matrices = []
precision_scores = []
recall_scores = []
f1_scores = []

# Perform K-Fold Cross-Validation with the best OvA SVM model
kf = KFold(n_splits = 10, shuffle = True, random_state = 42)

for train_index, test_index in kf.split(X_selected):
    X_train, X_test = X_selected[train_index], X_selected[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train the OvA SVM model
    best_ova_svm_model.fit(X_train, y_train)
    
    # Evaluate on the train set
    train_pred = best_ova_svm_model.predict(X_train)
    train_accuracy = accuracy_score(y_train, train_pred)
    train_accuracies.append(train_accuracy)
    
    # Evaluate on the test set
    y_pred = best_ova_svm_model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    test_accuracies.append(test_accuracy)
    
    # Calculate Precision, Recall, and F1-Score for each fold
    precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average = 'weighted')
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)
    
    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred, labels = [0, 1, 2])
    confusion_matrices.append(cm)
    
    # Print fold-specific metrics
    print(f'\nFold Test Accuracy: {test_accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Confusion Matrix for this Fold:\n{cm}')

# Calculate the average metrics across all folds
avg_test_accuracy = np.mean(test_accuracies)
avg_precision = np.mean(precision_scores)
avg_recall = np.mean(recall_scores)
avg_f1_score = np.mean(f1_scores)
avg_confusion_matrix = np.mean(confusion_matrices, axis = 0)

# Print the overall average metrics
print(f'Average Test Accuracy: {avg_test_accuracy:.4f}')
print(f'Average Precision: {avg_precision:.4f}')
print(f'Average Recall: {avg_recall:.4f}')
print(f'Average F1 Score: {avg_f1_score:.4f}')
print(f'Average Confusion Matrix:\n{avg_confusion_matrix}')


Fold Test Accuracy: 0.7937
Precision: 0.7997
Recall: 0.7937
F1 Score: 0.7946
Confusion Matrix for this Fold:
[[39  0  0]
 [ 1 37 12]
 [ 0 20 51]]

Fold Test Accuracy: 0.8313
Precision: 0.8311
Recall: 0.8313
F1 Score: 0.8308
Confusion Matrix for this Fold:
[[54  0  0]
 [ 0 34 12]
 [ 1 14 45]]

Fold Test Accuracy: 0.8187
Precision: 0.8183
Recall: 0.8187
F1 Score: 0.8163
Confusion Matrix for this Fold:
[[40  0  0]
 [ 1 45 10]
 [ 3 15 46]]

Fold Test Accuracy: 0.8250
Precision: 0.8217
Recall: 0.8250
F1 Score: 0.8227
Confusion Matrix for this Fold:
[[47  0  0]
 [ 1 35 12]
 [ 3 12 50]]

Fold Test Accuracy: 0.8250
Precision: 0.8260
Recall: 0.8250
F1 Score: 0.8243
Confusion Matrix for this Fold:
[[49  0  0]
 [ 0 35 11]
 [ 2 15 48]]

Fold Test Accuracy: 0.8938
Precision: 0.8949
Recall: 0.8938
F1 Score: 0.8923
Confusion Matrix for this Fold:
[[58  0  0]
 [ 2 42  4]
 [ 2  9 43]]

Fold Test Accuracy: 0.8375
Precision: 0.8438
Recall: 0.8375
F1 Score: 0.8362
Confusion Matrix for this Fold:
[[57  0 

#Logistic Regression
#OvR Approach

In [7]:
# Feature selection with SelectFromModel
selector = SelectFromModel(ExtraTreesClassifier(n_estimators = 100, random_state = 42))
X_selected = selector.fit_transform(X, y)

# Initialize Logistic Regression with One-vs-Rest strategy
logreg = LogisticRegression(max_iter = 1000, solver = 'lbfgs')
ovr_model = OneVsRestClassifier(logreg)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'estimator__C': [0.01, 0.1, 1, 10],
    'estimator__penalty': ['l2'],
    'estimator__class_weight': ['balanced', None]
}
grid_search = GridSearchCV(estimator = ovr_model, param_grid = param_grid, cv = kf, scoring = 'accuracy', n_jobs = -1, verbose = 1)
grid_search.fit(X_selected, y)

# Get the best hyperparameters for model
best_logreg_model = grid_search.best_estimator_
print(f"Best Parameters: {grid_search.best_params_}")

Fitting 10 folds for each of 8 candidates, totalling 80 fits
Best Parameters: {'estimator__C': 10, 'estimator__class_weight': 'balanced', 'estimator__penalty': 'l2'}


In [8]:
# Initialize lists to store metrics
train_accuracies = []
test_accuracies = []
confusion_matrices = []
precision_scores = []
recall_scores = []
f1_scores = []

# Replacing `best_lr_model` with the best estimator from GridSearchCV
best_lr_model = grid_search.best_estimator_

for train_index, test_index in kf.split(X_selected):
    X_train, X_test = X_selected[train_index], X_selected[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train the Logistic Regression model
    best_lr_model.fit(X_train, y_train)
    
    # Evaluate on the train set
    train_pred = best_lr_model.predict(X_train)
    train_accuracy = accuracy_score(y_train, train_pred)
    train_accuracies.append(train_accuracy)
    
    # Evaluate on the test set
    y_pred = best_lr_model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    test_accuracies.append(test_accuracy)
    
    # Calculate Precision, Recall, and F1-Score for each fold
    precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average = 'weighted')
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)
    
    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred, labels = [0, 1, 2])
    confusion_matrices.append(cm)
    
    # Print fold-specific metrics
    print(f'\nFold Test Accuracy: {test_accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Confusion Matrix for this Fold:\n{cm}')

# Calculate the average metrics across all folds
avg_test_accuracy = np.mean(test_accuracies)
avg_precision = np.mean(precision_scores)
avg_recall = np.mean(recall_scores)
avg_f1_score = np.mean(f1_scores)
avg_confusion_matrix = np.mean(confusion_matrices, axis = 0)

# Print the overall average metrics
print(f'Average Test Accuracy: {avg_test_accuracy:.4f}')
print(f'Average Precision: {avg_precision:.4f}')
print(f'Average Recall: {avg_recall:.4f}')
print(f'Average F1 Score: {avg_f1_score:.4f}')
print(f'Average Confusion Matrix:\n{avg_confusion_matrix}')


Fold Test Accuracy: 0.6625
Precision: 0.6714
Recall: 0.6625
F1 Score: 0.6546
Confusion Matrix for this Fold:
[[37  0  2]
 [ 8 30 12]
 [11 21 39]]

Fold Test Accuracy: 0.6312
Precision: 0.6102
Recall: 0.6312
F1 Score: 0.6036
Confusion Matrix for this Fold:
[[54  0  0]
 [16 14 16]
 [10 17 33]]

Fold Test Accuracy: 0.6188
Precision: 0.6344
Recall: 0.6188
F1 Score: 0.6078
Confusion Matrix for this Fold:
[[37  0  3]
 [12 26 18]
 [16 12 36]]

Fold Test Accuracy: 0.6188
Precision: 0.5994
Recall: 0.6188
F1 Score: 0.6012
Confusion Matrix for this Fold:
[[44  0  3]
 [ 7 17 24]
 [10 17 38]]

Fold Test Accuracy: 0.7000
Precision: 0.7091
Recall: 0.7000
F1 Score: 0.6893
Confusion Matrix for this Fold:
[[48  0  1]
 [10 25 11]
 [14 12 39]]

Fold Test Accuracy: 0.6937
Precision: 0.6982
Recall: 0.6937
F1 Score: 0.6832
Confusion Matrix for this Fold:
[[52  0  6]
 [12 23 13]
 [10  8 36]]

Fold Test Accuracy: 0.6625
Precision: 0.6583
Recall: 0.6625
F1 Score: 0.6555
Confusion Matrix for this Fold:
[[50  0 

#KNN

In [9]:
# Feature selection with Recursive Feature Elimination (RFE) using ExtraTreesClassifier
selector = RFE(estimator = ExtraTreesClassifier(n_estimators = 100, random_state = 42), n_features_to_select = 10, step = 1)
X_selected = selector.fit_transform(X, y)

# Hyperparameter tuning using GridSearchCV for KNN
knn_model = KNeighborsClassifier()
param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}
grid_search = GridSearchCV(estimator = knn_model, param_grid = param_grid, cv = kf, scoring = 'accuracy', n_jobs = -1, verbose = 1)
grid_search.fit(X_selected, y)

# Get the best hyperparameters for model
best_knn_model = grid_search.best_estimator_
print(f"Best Parameters: {grid_search.best_params_}")

Fitting 10 folds for each of 16 candidates, totalling 160 fits
Best Parameters: {'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}


In [10]:
# Initialize lists to store metrics
train_accuracies = []
test_accuracies = []
confusion_matrices = []
precision_scores = []
recall_scores = []
f1_scores = []

# Replacing `best_knn_model` with the best estimator from GridSearchCV
best_knn_model = grid_search.best_estimator_

for train_index, test_index in kf.split(X_selected):
    X_train, X_test = X_selected[train_index], X_selected[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train the KNN model
    best_knn_model.fit(X_train, y_train)
    
    # Evaluate on the train set
    train_pred = best_knn_model.predict(X_train)
    train_accuracy = accuracy_score(y_train, train_pred)
    train_accuracies.append(train_accuracy)
    
    # Evaluate on the test set
    y_pred = best_knn_model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    test_accuracies.append(test_accuracy)
    
    # Calculate Precision, Recall, and F1-Score for each fold
    precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average = 'weighted')
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)
    
    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred, labels = [0, 1, 2])
    confusion_matrices.append(cm)
    
    # Print fold-specific metrics
    print(f'\nFold Test Accuracy: {test_accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Confusion Matrix for this Fold:\n{cm}')

# Calculate the average metrics across all folds
avg_test_accuracy = np.mean(test_accuracies)
avg_precision = np.mean(precision_scores)
avg_recall = np.mean(recall_scores)
avg_f1_score = np.mean(f1_scores)
avg_confusion_matrix = np.mean(confusion_matrices, axis = 0)

# Print the overall average metrics
print(f'Average Test Accuracy: {avg_test_accuracy:.4f}')
print(f'Average Precision: {avg_precision:.4f}')
print(f'Average Recall: {avg_recall:.4f}')
print(f'Average F1 Score: {avg_f1_score:.4f}')
print(f'Average Confusion Matrix:\n{avg_confusion_matrix}')


Fold Test Accuracy: 0.8375
Precision: 0.8449
Recall: 0.8375
F1 Score: 0.8362
Confusion Matrix for this Fold:
[[39  0  0]
 [ 2 42  6]
 [ 3 15 53]]

Fold Test Accuracy: 0.8625
Precision: 0.8630
Recall: 0.8625
F1 Score: 0.8618
Confusion Matrix for this Fold:
[[54  0  0]
 [ 1 37  8]
 [ 1 12 47]]

Fold Test Accuracy: 0.8125
Precision: 0.8187
Recall: 0.8125
F1 Score: 0.8076
Confusion Matrix for this Fold:
[[40  0  0]
 [ 1 48  7]
 [ 5 17 42]]

Fold Test Accuracy: 0.8688
Precision: 0.8677
Recall: 0.8688
F1 Score: 0.8672
Confusion Matrix for this Fold:
[[47  0  0]
 [ 0 40  8]
 [ 4  9 52]]

Fold Test Accuracy: 0.8313
Precision: 0.8306
Recall: 0.8313
F1 Score: 0.8292
Confusion Matrix for this Fold:
[[49  0  0]
 [ 0 36 10]
 [ 4 13 48]]

Fold Test Accuracy: 0.8938
Precision: 0.9016
Recall: 0.8938
F1 Score: 0.8913
Confusion Matrix for this Fold:
[[58  0  0]
 [ 1 45  2]
 [ 3 11 40]]

Fold Test Accuracy: 0.8688
Precision: 0.8752
Recall: 0.8688
F1 Score: 0.8665
Confusion Matrix for this Fold:
[[56  0 

#Decision Tree

In [11]:
# Feature selection with Recursive Feature Elimination (RFE)
dt_model = DecisionTreeClassifier(random_state = 42)
selector = RFE(estimator = dt_model, n_features_to_select = 10, step = 1)
X_selected = selector.fit_transform(X, y)

# Hyperparameter tuning using GridSearchCV for Decision Tree
param_grid = {
    'max_depth': [4, 6, 8, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'criterion': ['gini', 'entropy']
}
grid_search = GridSearchCV(estimator = dt_model, param_grid = param_grid, cv = kf, scoring = 'accuracy', n_jobs = -1, verbose = 1)
grid_search.fit(X_selected, y)

# Get the best hyperparameters for model
best_dt_model = grid_search.best_estimator_
print(f"Best Parameters: {grid_search.best_params_}")

Fitting 10 folds for each of 72 candidates, totalling 720 fits
Best Parameters: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2}


In [12]:
# Initialize lists to store metrics
train_accuracies = []
test_accuracies = []
confusion_matrices = []
precision_scores = []
recall_scores = []
f1_scores = []

# Replacing `best_dt_model` with the best estimator from GridSearchCV
best_dt_model = grid_search.best_estimator_

for train_index, test_index in kf.split(X_selected):
    X_train, X_test = X_selected[train_index], X_selected[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train the Decision Tree model
    best_dt_model.fit(X_train, y_train)
    
    # Evaluate on the train set
    train_pred = best_dt_model.predict(X_train)
    train_accuracy = accuracy_score(y_train, train_pred)
    train_accuracies.append(train_accuracy)
    
    # Evaluate on the test set
    y_pred = best_dt_model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    test_accuracies.append(test_accuracy)
    
    # Calculate precision, recall, and F1 score for each fold
    precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average = 'weighted')
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)
    
    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred, labels = [0, 1, 2])
    confusion_matrices.append(cm)
    
    # Print fold-specific metrics
    print(f'\nFold Test Accuracy: {test_accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Confusion Matrix for this Fold:\n{cm}')

# Calculate the average metrics across all folds
avg_test_accuracy = np.mean(test_accuracies)
avg_precision = np.mean(precision_scores)
avg_recall = np.mean(recall_scores)
avg_f1_score = np.mean(f1_scores)
avg_confusion_matrix = np.mean(confusion_matrices, axis = 0)

# Print the overall average metrics
print(f'Average Test Accuracy: {avg_test_accuracy:.4f}')
print(f'Average Precision: {avg_precision:.4f}')
print(f'Average Recall: {avg_recall:.4f}')
print(f'Average F1 Score: {avg_f1_score:.4f}')
print(f'Average Confusion Matrix:\n{avg_confusion_matrix}')


Fold Test Accuracy: 0.7500
Precision: 0.7475
Recall: 0.7500
F1 Score: 0.7480
Confusion Matrix for this Fold:
[[37  0  2]
 [ 4 32 14]
 [ 2 18 51]]

Fold Test Accuracy: 0.7875
Precision: 0.7859
Recall: 0.7875
F1 Score: 0.7851
Confusion Matrix for this Fold:
[[49  3  2]
 [ 3 28 15]
 [ 2  9 49]]

Fold Test Accuracy: 0.7875
Precision: 0.7872
Recall: 0.7875
F1 Score: 0.7871
Confusion Matrix for this Fold:
[[36  2  2]
 [ 1 43 12]
 [ 4 13 47]]

Fold Test Accuracy: 0.8187
Precision: 0.8155
Recall: 0.8187
F1 Score: 0.8165
Confusion Matrix for this Fold:
[[47  0  0]
 [ 4 33 11]
 [ 0 14 51]]

Fold Test Accuracy: 0.7125
Precision: 0.7216
Recall: 0.7125
F1 Score: 0.7067
Confusion Matrix for this Fold:
[[45  2  2]
 [ 0 35 11]
 [ 8 23 34]]

Fold Test Accuracy: 0.7500
Precision: 0.7552
Recall: 0.7500
F1 Score: 0.7520
Confusion Matrix for this Fold:
[[51  2  5]
 [ 1 34 13]
 [ 3 16 35]]

Fold Test Accuracy: 0.7875
Precision: 0.7930
Recall: 0.7875
F1 Score: 0.7897
Confusion Matrix for this Fold:
[[50  2 