In [1]:
import wandb
from TensorKit.Layers import *
from TensorKit.Models import *
from TensorKit.Optimizers import *
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets.fashion_mnist import load_data

np.random.seed(42)

convert_to_one_hot = lambda y : np.eye(y.max() + 1)[y]
normalize = lambda x : (x - x.mean(axis=1, keepdims=True))/x.std(axis=1, keepdims=True)

In [2]:
names = ["T-shirt", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

In [3]:
## BEST PARAMETERES ##
lr = 0.01
hl = 3
hn = 128
bs = 64
act = "leaky_relu"
opt = Adagrad(lr=lr)
l2 = 0.05
w_init = "xavier"
epochs = 15

In [4]:
(trainX, trainy), (testX, testy) = load_data()
trainX, valX, trainy, valy = train_test_split(trainX, trainy, test_size=0.1, shuffle=True)

trainX = normalize(trainX.reshape(trainX.shape[0], -1))
testX = normalize(testX.reshape(testX.shape[0], -1))
valX = normalize(valX.reshape(valX.shape[0], -1))

testy_cat = testy # for plotting confusion matrix
trainy = convert_to_one_hot(trainy)
testy = convert_to_one_hot(testy)
valy = convert_to_one_hot(valy)

In [5]:
model = Sequential()
model.add(Dense(trainX.shape[1], hn, activation=act, initialization=w_init))
for _ in range(hl):
    model.add(Dense(hn, hn, activation=act, initialization=w_init))
model.add(Dense(hn, trainy.shape[1], activation="softmax", initialization=w_init))
model.compile(optimizer=opt, loss="categorical_crossentropy", weight_decay=l2)
model.fit(trainX, trainy, epochs=epochs, batch_size=bs, validation_data=(valX, valy), wandb_log=False)

test_loss, test_acc = model.evaluate(testX, testy, batch_size=bs)
print(f"test_loss:{test_loss:.4f}, test_accuracy: {test_acc:.4f}")

epoch 1/15:	train_loss: 0.3485, train_accuracy: 0.8730, val_loss: 0.3627, val_accuracy: 0.8655
epoch 2/15:	train_loss: 0.3012, train_accuracy: 0.8905, val_loss: 0.3287, val_accuracy: 0.8813
epoch 3/15:	train_loss: 0.2858, train_accuracy: 0.8954, val_loss: 0.3219, val_accuracy: 0.8797
epoch 4/15:	train_loss: 0.2591, train_accuracy: 0.9055, val_loss: 0.3069, val_accuracy: 0.8843
epoch 5/15:	train_loss: 0.2559, train_accuracy: 0.9057, val_loss: 0.3120, val_accuracy: 0.8827
epoch 6/15:	train_loss: 0.2469, train_accuracy: 0.9091, val_loss: 0.3137, val_accuracy: 0.8870
epoch 7/15:	train_loss: 0.2308, train_accuracy: 0.9154, val_loss: 0.3034, val_accuracy: 0.8895
epoch 8/15:	train_loss: 0.2268, train_accuracy: 0.9176, val_loss: 0.3089, val_accuracy: 0.8892
epoch 9/15:	train_loss: 0.2163, train_accuracy: 0.9212, val_loss: 0.3079, val_accuracy: 0.8893
epoch 10/15:	train_loss: 0.2041, train_accuracy: 0.9258, val_loss: 0.3021, val_accuracy: 0.8922
epoch 11/15:	train_loss: 0.2041, train_accuracy: 

In [None]:
wandb.login()
run = wandb.init(project="IITM-CS6910-Projects", entity="cs21m070_cs21m022")
y_pred = model.predict(testX, batch_size=bs, predict_proba=False)
wandb.sklearn.plot_confusion_matrix(testy_cat, y_pred, labels=names)
run.finish()
