In [1]:
import numpy as np
from TensorKit.Layers import *
from TensorKit.Models import *
from TensorKit.Optimizers import *
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets.mnist import load_data
np.random.seed(42)

convert_to_one_hot = lambda y : np.eye(y.max() + 1)[y]
normalize = lambda x : (x - x.mean(axis=1, keepdims=True))/x.std(axis=1, keepdims=True)

In [2]:
(trainX, trainy), (testX, testy) = load_data()
trainX, valX, trainy, valy = train_test_split(trainX, trainy, test_size=0.1, shuffle=True)

trainX = normalize(trainX.reshape(trainX.shape[0], -1))
testX = normalize(testX.reshape(testX.shape[0], -1))
valX = normalize(valX.reshape(valX.shape[0], -1))

trainy = convert_to_one_hot(trainy)
testy = convert_to_one_hot(testy)
valy = convert_to_one_hot(valy)

### Hyperparam configuration - 1

In [3]:
hn = 128
hl = 3
l2 = 0.05
bs = 64
lr = 0.01
epochs = 15
w_init = "xavier"
activation = "leaky_relu"
optimizer = Adagrad(lr=lr)

model = Sequential()
model.add(Dense(trainX.shape[1], hn, activation=activation, initialization=w_init))
for _ in range(hl):
    model.add(Dense(hn, hn, activation=activation, initialization=w_init))
model.add(Dense(hn, trainy.shape[1], activation="softmax", initialization=w_init))
model.compile(optimizer=optimizer, loss="categorical_crossentropy", weight_decay=l2)
model.fit(trainX, trainy, batch_size=bs, epochs=epochs, validation_data=(valX, valy))
loss, acc = model.evaluate(testX, testy, batch_size=bs)
print(f"test_loss: {loss:.4f}, test_accuracy: {acc:.4f}")

epoch 1/15:	train_loss: 0.1143, train_accuracy: 0.9646, val_loss: 0.1248, val_accuracy: 0.9600
epoch 2/15:	train_loss: 0.0735, train_accuracy: 0.9780, val_loss: 0.0908, val_accuracy: 0.9710
epoch 3/15:	train_loss: 0.0535, train_accuracy: 0.9848, val_loss: 0.0763, val_accuracy: 0.9765
epoch 4/15:	train_loss: 0.0432, train_accuracy: 0.9877, val_loss: 0.0726, val_accuracy: 0.9772
epoch 5/15:	train_loss: 0.0357, train_accuracy: 0.9906, val_loss: 0.0708, val_accuracy: 0.9775
epoch 6/15:	train_loss: 0.0301, train_accuracy: 0.9923, val_loss: 0.0691, val_accuracy: 0.9783
epoch 7/15:	train_loss: 0.0259, train_accuracy: 0.9937, val_loss: 0.0648, val_accuracy: 0.9795
epoch 8/15:	train_loss: 0.0212, train_accuracy: 0.9952, val_loss: 0.0624, val_accuracy: 0.9795
epoch 9/15:	train_loss: 0.0186, train_accuracy: 0.9963, val_loss: 0.0580, val_accuracy: 0.9825
epoch 10/15:	train_loss: 0.0187, train_accuracy: 0.9957, val_loss: 0.0634, val_accuracy: 0.9802
epoch 11/15:	train_loss: 0.0148, train_accuracy: 

### Hyperparam configuration - 2

In [4]:
hn = 128
hl = 4
l2 = 0.
bs = 16
lr = 0.0001
epochs = 15
w_init = "xavier"
activation = "leaky_relu"
optimizer = Adam(lr=lr)

model = Sequential()
model.add(Dense(trainX.shape[1], hn, activation=activation, initialization=w_init))
for _ in range(hl):
    model.add(Dense(hn, hn, activation=activation, initialization=w_init))
model.add(Dense(hn, trainy.shape[1], activation="softmax", initialization=w_init))
model.compile(optimizer=optimizer, loss="categorical_crossentropy", weight_decay=l2)
model.fit(trainX, trainy, batch_size=bs, epochs=epochs, validation_data=(valX, valy))
loss, acc = model.evaluate(testX, testy, batch_size=bs)
print(f"test_loss: {loss:.4f}, test_accuracy: {acc:.4f}")

epoch 1/15:	train_loss: 0.1595, train_accuracy: 0.9521, val_loss: 0.1715, val_accuracy: 0.9480
epoch 2/15:	train_loss: 0.0985, train_accuracy: 0.9707, val_loss: 0.1216, val_accuracy: 0.9628
epoch 3/15:	train_loss: 0.0774, train_accuracy: 0.9771, val_loss: 0.1073, val_accuracy: 0.9675
epoch 4/15:	train_loss: 0.0618, train_accuracy: 0.9801, val_loss: 0.0896, val_accuracy: 0.9717
epoch 5/15:	train_loss: 0.0451, train_accuracy: 0.9868, val_loss: 0.0828, val_accuracy: 0.9740
epoch 6/15:	train_loss: 0.0350, train_accuracy: 0.9897, val_loss: 0.0778, val_accuracy: 0.9750
epoch 7/15:	train_loss: 0.0339, train_accuracy: 0.9893, val_loss: 0.0797, val_accuracy: 0.9747
epoch 8/15:	train_loss: 0.0235, train_accuracy: 0.9930, val_loss: 0.0813, val_accuracy: 0.9767
epoch 9/15:	train_loss: 0.0223, train_accuracy: 0.9934, val_loss: 0.0858, val_accuracy: 0.9748
epoch 10/15:	train_loss: 0.0218, train_accuracy: 0.9931, val_loss: 0.0867, val_accuracy: 0.9760
epoch 11/15:	train_loss: 0.0182, train_accuracy: 

### Hyperparam configuration - 3

In [5]:
hn = 64
hl = 4
l2 = 0.05
bs = 64
lr = 0.01
epochs = 10
w_init = "xavier"
activation = "relu"
optimizer = Adagrad(lr=lr)

model = Sequential()
model.add(Dense(trainX.shape[1], hn, activation=activation, initialization=w_init))
for _ in range(hl):
    model.add(Dense(hn, hn, activation=activation, initialization=w_init))
model.add(Dense(hn, trainy.shape[1], activation="softmax", initialization=w_init))
model.compile(optimizer=optimizer, loss="categorical_crossentropy", weight_decay=l2)
model.fit(trainX, trainy, batch_size=bs, epochs=epochs, validation_data=(valX, valy))
loss, acc = model.evaluate(testX, testy, batch_size=bs)
print(f"test_loss: {loss:.4f}, test_accuracy: {acc:.4f}")

epoch 1/10:	train_loss: 0.1333, train_accuracy: 0.9601, val_loss: 0.1403, val_accuracy: 0.9565
epoch 2/10:	train_loss: 0.1020, train_accuracy: 0.9701, val_loss: 0.1144, val_accuracy: 0.9650
epoch 3/10:	train_loss: 0.0862, train_accuracy: 0.9745, val_loss: 0.1027, val_accuracy: 0.9673
epoch 4/10:	train_loss: 0.0739, train_accuracy: 0.9782, val_loss: 0.0970, val_accuracy: 0.9707
epoch 5/10:	train_loss: 0.0626, train_accuracy: 0.9816, val_loss: 0.0884, val_accuracy: 0.9727
epoch 6/10:	train_loss: 0.0574, train_accuracy: 0.9839, val_loss: 0.0857, val_accuracy: 0.9743
epoch 7/10:	train_loss: 0.0480, train_accuracy: 0.9865, val_loss: 0.0797, val_accuracy: 0.9755
epoch 8/10:	train_loss: 0.0453, train_accuracy: 0.9877, val_loss: 0.0803, val_accuracy: 0.9757
epoch 9/10:	train_loss: 0.0420, train_accuracy: 0.9883, val_loss: 0.0800, val_accuracy: 0.9760
epoch 10/10:	train_loss: 0.0399, train_accuracy: 0.9890, val_loss: 0.0833, val_accuracy: 0.9758


test_loss: 0.0799, test_accuracy: 0.9749
