In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import xgboost as xgb

df = pd.read_csv('FinalModelResults_TrueLabels.csv')
df = df.iloc[:, 1:]

X = df.drop(columns=['True_Preds', 'max_count'])  
y = df['True_Preds']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'n_estimators': [ 200, 300, 400],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.8, 0.9, 1.0]
}

xgb_model = xgb.XGBClassifier(objective='multi:softmax', num_class=4)

grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, scoring='accuracy', cv=5, verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train)

print(f"Best parameters: {grid_search.best_params_}")
print(f"Best cross-validation accuracy: {grid_search.best_score_:.4f}")

best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)

y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test set accuracy: {accuracy:.4f}")

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

cr = classification_report(y_test, y_pred)
print("Classification Report:")
print(cr)

cv_scores = cross_val_score(best_model, X, y, cv=5)
print(f"Cross-Validation Scores: {cv_scores}")
print(f"Mean Cross-Validation Score: {cv_scores.mean():.4f}")


Fitting 5 folds for each of 324 candidates, totalling 1620 fits
Best parameters: {'colsample_bytree': 0.8, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 300, 'subsample': 0.8}
Best cross-validation accuracy: 0.9256
Test set accuracy: 0.9252
Confusion Matrix:
[[57  0  2  3]
 [ 0 69  0  0]
 [ 1  0 52  6]
 [ 1  0  6 57]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.92      0.94        62
           1       1.00      1.00      1.00        69
           2       0.87      0.88      0.87        59
           3       0.86      0.89      0.88        64

    accuracy                           0.93       254
   macro avg       0.92      0.92      0.92       254
weighted avg       0.93      0.93      0.93       254

Cross-Validation Scores: [0.84023669 0.89349112 0.9408284  0.94674556 0.95266272]
Mean Cross-Validation Score: 0.9148
