# **XGBoost Implementation with Fashion MNIST**

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score, recall_score
import xgboost as xgb
from tensorflow.keras.datasets import fashion_mnist

### *For reproducibility

In [4]:

np.random.seed(42)


## 1. Data Loading and Preprocessing

In [5]:
# loading dataset
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# Flatten the 28x28 images to 784-dimensional vectors ~ Since XGBoost works with tabular data (feature vectors) rather than image matrices, we need to flatten each 2D image into a 1D vector
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Normalize pixel values to [0, 1] ~ good esp for gradient based methods
X_train = X_train / 255.0
X_test = X_test / 255.0

# Convert to DMatrix for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)


## 2. Model Training with XGBoost

### Define hyperparameters

In [6]:

params = {
    'objective': 'multi:softmax',
    'num_class': 10,
    'max_depth': 6,
    'eta': 0.3,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'eval_metric': 'merror'
}

### Train the model

In [7]:
num_round = 100
model = xgb.train(params, dtrain, num_round)

## 3. Model Evaluation

### Make predictions

In [8]:
preds = model.predict(dtest)

### Accuracy score

In [9]:
accuracy = accuracy_score(y_test, preds)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 89.82%


### F1 Score, Precision & Recall

In [10]:
f1 = f1_score(y_test, preds, average='weighted')
precision = precision_score(y_test, preds, average='weighted')
recall = recall_score(y_test, preds, average='weighted')

print(f"F1 Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")

F1 Score: 0.8978
Precision: 0.8979
Recall: 0.8982


### Classification Report

In [11]:
print("Classification Report:")
print(classification_report(y_test, preds))


Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.87      0.85      1000
           1       0.99      0.97      0.98      1000
           2       0.80      0.84      0.82      1000
           3       0.90      0.91      0.91      1000
           4       0.82      0.84      0.83      1000
           5       0.99      0.97      0.98      1000
           6       0.74      0.68      0.71      1000
           7       0.95      0.97      0.96      1000
           8       0.98      0.97      0.97      1000
           9       0.96      0.97      0.97      1000

    accuracy                           0.90     10000
   macro avg       0.90      0.90      0.90     10000
weighted avg       0.90      0.90      0.90     10000



### Confusion Matrix

In [12]:
print("Confusion Matrix:")
print(confusion_matrix(y_test, preds))

Confusion Matrix:
[[866   0  17  17   3   1  90   0   6   0]
 [  3 973   1  15   4   0   3   0   1   0]
 [ 16   1 841   7  76   0  58   0   1   0]
 [ 18   3  16 912  24   0  25   0   2   0]
 [  0   0  79  30 836   0  53   0   2   0]
 [  0   0   1   0   0 971   0  17   0  11]
 [125   1  90  24  73   1 675   0  11   0]
 [  0   0   0   0   0   6   0 968   0  26]
 [  1   1   5   3   3   2  10   4 971   0]
 [  0   0   0   0   0   2   0  28   1 969]]
