In [1]:
import numpy as np
import tensorflow as tf
from keras.datasets import fashion_mnist
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import ConfusionMatrixDisplay, classification_report

In [2]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
assert x_train.shape == (60000, 28, 28)
assert x_test.shape == (10000, 28, 28)
assert y_train.shape == (60000,)
assert y_test.shape == (10000,)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [3]:
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

In [4]:
from sklearn.model_selection import train_test_split
X_train_clf, X_val_clf, y_train_clf, y_val_clf = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

In [5]:
X_train_rf = X_train_clf.reshape(len(X_train_clf), 784)
X_val_rf = X_val_clf.reshape(len(X_val_clf), 784)
X_test_rf = x_test.reshape(len(x_test), 784)

In [6]:
randomForest = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=35, random_state=20)

In [7]:
randomForest.fit(X_train_rf, y_train_clf)

In [8]:
import pickle

filename = 'randomForestClassifier.pkl'
pickle.dump(randomForest,  open(filename, 'wb'))

In [12]:
y_val_rf = randomForest.predict(X_val_rf)

# fig, ax = plt.subplots(figsize=(10, 5))
# ConfusionMatrixDisplay.from_predictions(testdata['label'], y_predTest, ax=ax)
# ax.xaxis.set_ticklabels(labels_info.values(), rotation = 90)
# ax.yaxis.set_ticklabels(labels_info.values())
# _ = ax.set_title(
#     f"Confusion Matrix for Random Forest Classifier Test Set"
# )

print(classification_report(y_val_clf, y_val_rf))

              precision    recall  f1-score   support

           0       0.82      0.86      0.84      1202
           1       1.00      0.97      0.98      1219
           2       0.78      0.83      0.80      1205
           3       0.86      0.93      0.89      1184
           4       0.78      0.83      0.80      1202
           5       0.97      0.96      0.97      1211
           6       0.75      0.59      0.66      1218
           7       0.94      0.94      0.94      1159
           8       0.96      0.96      0.96      1197
           9       0.95      0.96      0.95      1203

    accuracy                           0.88     12000
   macro avg       0.88      0.88      0.88     12000
weighted avg       0.88      0.88      0.88     12000



In [13]:
y_test_rf = randomForest.predict(X_test_rf)

# fig, ax = plt.subplots(figsize=(10, 5))
# ConfusionMatrixDisplay.from_predictions(testdata['label'], y_predTest, ax=ax)
# ax.xaxis.set_ticklabels(labels_info.values(), rotation = 90)
# ax.yaxis.set_ticklabels(labels_info.values())
# _ = ax.set_title(
#     f"Confusion Matrix for Random Forest Classifier Test Set"
# )

print(classification_report(y_test, y_test_rf))

              precision    recall  f1-score   support

           0       0.83      0.86      0.84      1000
           1       0.99      0.96      0.98      1000
           2       0.76      0.81      0.78      1000
           3       0.88      0.91      0.89      1000
           4       0.77      0.82      0.80      1000
           5       0.97      0.95      0.96      1000
           6       0.72      0.58      0.64      1000
           7       0.92      0.95      0.93      1000
           8       0.96      0.97      0.96      1000
           9       0.95      0.94      0.95      1000

    accuracy                           0.88     10000
   macro avg       0.88      0.88      0.87     10000
weighted avg       0.88      0.88      0.87     10000

