In [1]:
import numpy as np
import tensorflow as tf
from keras.datasets import fashion_mnist
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import ConfusionMatrixDisplay, classification_report
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
assert x_train.shape == (60000, 28, 28)
assert x_test.shape == (10000, 28, 28)
assert y_train.shape == (60000,)
assert y_test.shape == (10000,)

x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

In [4]:
x_train0 = x_train[y_train == 0]
x_train6 = x_train[y_train == 6]
x_train_others = np.vstack((x_train0, x_train6))
print(x_train_others.shape)

(12000, 28, 28)


In [5]:
x_test0 = x_test[y_test == 0]
x_test6 = x_test[y_test == 6]
x_test_others = np.vstack((x_test0, x_test6))
print(x_test_others.shape)

(2000, 28, 28)


In [6]:
y_train0 = y_train[y_train == 0]
y_train6 = y_train[y_train == 6]
y_train_others = np.concatenate((y_train0, y_train6))
print(y_train_others.shape)

(12000,)


In [7]:
y_test0 = y_test[y_test == 0]
y_test6 = y_test[y_test == 6]
y_test_others = np.concatenate((y_test0, y_test6))
print(y_test_others.shape)

(2000,)


In [8]:
from sklearn.model_selection import train_test_split
X_train_others_clf, X_val_others_clf, y_train_others_clf, y_val_others_clf = train_test_split(x_train_others, y_train_others, test_size=0.1, random_state=42)

y_train_clf_others_encoded = tf.one_hot(y_train_others_clf, depth = 10)
y_val_clf_others_encoded = tf.one_hot(y_val_others_clf, depth = 10)
y_test_others_clf_encoded = tf.one_hot(y_test_others, depth = 10)

In [9]:
X_train_others_clf = X_train_others_clf.reshape(len(X_train_others_clf), 784)
X_val_others_clf = X_val_others_clf.reshape(len(X_val_others_clf), 784)
x_test_others = x_test_others.reshape(len(x_test_others), 784)

In [10]:
randomForest = RandomForestClassifier(n_estimators=196, criterion='gini', max_depth=15, random_state=42)

In [11]:
randomForest.fit(X_train_others_clf, y_train_clf_others_encoded)

In [12]:
import pickle

filename = 'randomForestClassifier24.pkl'
pickle.dump(randomForest,  open('/content/drive/Shareddrives/ECEN_758_Project/' + filename, 'wb'))

In [13]:
y_val_rf = randomForest.predict(X_val_others_clf)

# fig, ax = plt.subplots(figsize=(10, 5))
# ConfusionMatrixDisplay.from_predictions(testdata['label'], y_predTest, ax=ax)
# ax.xaxis.set_ticklabels(labels_info.values(), rotation = 90)
# ax.yaxis.set_ticklabels(labels_info.values())
# _ = ax.set_title(
#     f"Confusion Matrix for Random Forest Classifier Test Set"
# )

print(classification_report(y_val_clf_others_encoded, y_val_rf))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       0.85      0.90      0.88       614
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.89      0.84      0.86       586
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0

   micro avg       0.87      0.87      0.87      1200
   macro avg       0.17      0.17      0.17      1200
weighted avg       0.87      0.87      0.87      1200
 samples avg       0.87      0.87      0.87      1200



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [14]:
y_test_rf = randomForest.predict(x_test_others)

# fig, ax = plt.subplots(figsize=(10, 5))
# ConfusionMatrixDisplay.from_predictions(testdata['label'], y_predTest, ax=ax)
# ax.xaxis.set_ticklabels(labels_info.values(), rotation = 90)
# ax.yaxis.set_ticklabels(labels_info.values())
# _ = ax.set_title(
#     f"Confusion Matrix for Random Forest Classifier Test Set"
# )

print(classification_report(y_test_others_clf_encoded, y_test_rf))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       0.86      0.90      0.88      1000
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.90      0.85      0.87      1000
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0

   micro avg       0.88      0.88      0.88      2000
   macro avg       0.18      0.18      0.18      2000
weighted avg       0.88      0.88      0.88      2000
 samples avg       0.88      0.88      0.88      2000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
from xgboost import XGBClassifier

In [16]:
xgboost = XGBClassifier(n_estimators=35, max_depth=15, learning_rate=1)

In [17]:
xgboost.fit(X_train_others_clf, y_train_clf_others_encoded)

In [18]:
import pickle

filename = 'xgboost26.pkl'
pickle.dump(xgboost,  open('/content/drive/Shareddrives/ECEN_758_Project/' + filename, 'wb'))

In [19]:
y_val_rf = xgboost.predict(X_val_others_clf)

# fig, ax = plt.subplots(figsize=(10, 5))
# ConfusionMatrixDisplay.from_predictions(testdata['label'], y_predTest, ax=ax)
# ax.xaxis.set_ticklabels(labels_info.values(), rotation = 90)
# ax.yaxis.set_ticklabels(labels_info.values())
# _ = ax.set_title(
#     f"Confusion Matrix for Random Forest Classifier Test Set"
# )

print(classification_report(y_val_clf_others_encoded, y_val_rf))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       0.86      0.88      0.87       614
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.88      0.87      0.87       586
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0

   micro avg       0.87      0.87      0.87      1200
   macro avg       0.17      0.17      0.17      1200
weighted avg       0.87      0.87      0.87      1200
 samples avg       0.85      0.87      0.85      1200



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [20]:
y_test_rf = xgboost.predict(x_test_others)

# fig, ax = plt.subplots(figsize=(10, 5))
# ConfusionMatrixDisplay.from_predictions(testdata['label'], y_predTest, ax=ax)
# ax.xaxis.set_ticklabels(labels_info.values(), rotation = 90)
# ax.yaxis.set_ticklabels(labels_info.values())
# _ = ax.set_title(
#     f"Confusion Matrix for Random Forest Classifier Test Set"
# )

print(classification_report(y_test_others_clf_encoded, y_test_rf))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       0.88      0.87      0.88      1000
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.87      0.87      0.87      1000
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0

   micro avg       0.88      0.87      0.87      2000
   macro avg       0.18      0.17      0.17      2000
weighted avg       0.88      0.87      0.87      2000
 samples avg       0.85      0.87      0.86      2000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
