In [1]:
import os
from Code.Trainers import HSICBottleneck
from Code.Models import MLP, ChebyKAN, KAN
from Code.Data import load_data
from Code.Utils import show_result
import time
import torch; torch.manual_seed(1)
from torch import optim

  from .autonotebook import tqdm as notebook_tqdm


### Load the MNIST Dataset
---------------

In [2]:
batchsize = 128
train_loader, test_loader = load_data(dataset = 'mnist', batchsize=batchsize)


### Training an MLP with HSIC Bottleneck
-----------------------------------------

In [3]:
device = "cuda"
layer_sizes = [784, 32, 16]
model = MLP(layer_sizes = layer_sizes, output_size = 10).to(device)
epochs = 50
print("Model trainable parameters: ", sum(p.numel() for p in model.parameters() if p.requires_grad))
trainer = HSICBottleneck(model = model)

Model trainable parameters:  25914


In [13]:
logs = list()
for epoch in range(epochs):
    
    trainer.model.train()
    start = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.view(batchsize, -1)
        trainer.step(data.view(batchsize, -1).to(device), target.to(device))
        trainer.tune_output(data.view(batchsize, -1).to(device), target.to(device))
    end = time.time()
    if epoch % 2 == 0:
        show_result(trainer, train_loader, test_loader, epoch, logs, device)
        logs[epoch//2].append(end-start)

df = pd.DataFrame(logs)

EPOCH 0. 	 Training  ACC: 0.8361. 	 Testing ACC: 0.8396
EPOCH 2. 	 Training  ACC: 0.8562. 	 Testing ACC: 0.8643
EPOCH 4. 	 Training  ACC: 0.8802. 	 Testing ACC: 0.8857
EPOCH 6. 	 Training  ACC: 0.8729. 	 Testing ACC: 0.8766
EPOCH 8. 	 Training  ACC: 0.8887. 	 Testing ACC: 0.8943
EPOCH 10. 	 Training  ACC: 0.8933. 	 Testing ACC: 0.8967
EPOCH 12. 	 Training  ACC: 0.8972. 	 Testing ACC: 0.9006
EPOCH 14. 	 Training  ACC: 0.9024. 	 Testing ACC: 0.9029
EPOCH 16. 	 Training  ACC: 0.9042. 	 Testing ACC: 0.9019
EPOCH 18. 	 Training  ACC: 0.9079. 	 Testing ACC: 0.9097
EPOCH 20. 	 Training  ACC: 0.9127. 	 Testing ACC: 0.9147
EPOCH 22. 	 Training  ACC: 0.9090. 	 Testing ACC: 0.9105
EPOCH 24. 	 Training  ACC: 0.9159. 	 Testing ACC: 0.9168
EPOCH 26. 	 Training  ACC: 0.9160. 	 Testing ACC: 0.9118
EPOCH 28. 	 Training  ACC: 0.9195. 	 Testing ACC: 0.9191
EPOCH 30. 	 Training  ACC: 0.9175. 	 Testing ACC: 0.9187
EPOCH 32. 	 Training  ACC: 0.9196. 	 Testing ACC: 0.9202
EPOCH 34. 	 Training  ACC: 0.9221. 	

KeyboardInterrupt: 

### Training a KAN with HSIC Bottleneck
----------

In [7]:
device = "cuda"
layer_sizes = [784, 32, 16]
degree = 3
#model = ChebyKAN(degree = degree, layer_sizes = layer_sizes, output_size = 10).to(device)
model = KAN(degree = degree, layer_sizes = layer_sizes, output_size = 10).to(device)

epochs = 50
num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad); print("Model trainable parameters: ", num_parameters)
lr = 0.001
optimizer = optim.AdamW(model.parameters(), lr=lr)
trainer = HSICBottleneck(model = model, optimizer = optimizer)


Model trainable parameters:  257600


In [8]:
logs = list()
for epoch in range(epochs):
    
    trainer.model.train()
    start = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.view(batchsize, -1)
        trainer.step(data.view(batchsize, -1).to(device), target.to(device))
        trainer.tune_output(data.view(batchsize, -1).to(device), target.to(device))
    end = time.time()
    if epoch % 2 == 0:
        show_result(trainer, train_loader, test_loader, epoch, logs, device)
        logs[epoch//2].append(end-start)


EPOCH 0. 	 Training  ACC: 0.7527. 	 Testing ACC: 0.7554
EPOCH 2. 	 Training  ACC: 0.8130. 	 Testing ACC: 0.8208
EPOCH 4. 	 Training  ACC: 0.8202. 	 Testing ACC: 0.8262
EPOCH 6. 	 Training  ACC: 0.8205. 	 Testing ACC: 0.8282
EPOCH 8. 	 Training  ACC: 0.8209. 	 Testing ACC: 0.8297
EPOCH 10. 	 Training  ACC: 0.8176. 	 Testing ACC: 0.8282
EPOCH 12. 	 Training  ACC: 0.8214. 	 Testing ACC: 0.8298
EPOCH 14. 	 Training  ACC: 0.8233. 	 Testing ACC: 0.8288
EPOCH 16. 	 Training  ACC: 0.8327. 	 Testing ACC: 0.8410
EPOCH 18. 	 Training  ACC: 0.8420. 	 Testing ACC: 0.8480
EPOCH 20. 	 Training  ACC: 0.8505. 	 Testing ACC: 0.8562
EPOCH 22. 	 Training  ACC: 0.8571. 	 Testing ACC: 0.8600
EPOCH 24. 	 Training  ACC: 0.8602. 	 Testing ACC: 0.8643
EPOCH 26. 	 Training  ACC: 0.8664. 	 Testing ACC: 0.8703
EPOCH 28. 	 Training  ACC: 0.8718. 	 Testing ACC: 0.8751
EPOCH 30. 	 Training  ACC: 0.8789. 	 Testing ACC: 0.8794
EPOCH 32. 	 Training  ACC: 0.8786. 	 Testing ACC: 0.8837
EPOCH 34. 	 Training  ACC: 0.8803. 	

In [15]:
import pandas as pd
df = pd.DataFrame(logs); df.columns = ['Epoch', 'Train_loss', 'Test_loss', 'Time']; df.head()

Unnamed: 0,Epoch,Train_loss,Test_loss,Time
0,0,0.33380073,0.32952723,14.609651
1,2,0.66524774,0.67147434,11.019556
2,4,0.7240418,0.73257214,11.181293
3,6,0.7516026,0.75971556,11.144553
4,8,0.7700654,0.7805489,11.175057


In [16]:
experiment_name = 'MNIST_hsic_ChebyKAN_degree6'
experiment_name += "_lr_" + str(lr) + "_epochs_" + str(epochs) + "_parameters_" + str(num_parameters) + "_optimizer_" + str(optimizer).split("(")[0]

df.to_csv(experiment_name)