In [1]:
import os
from Code.Trainers import HSICBottleneck
from Code.Models import MLP, ChebyKAN
from Code.Data import load_data
from Code.Utils import show_result
import time
import torch; torch.manual_seed(1)


  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x1ea9c5ccc30>

### Load the MNIST Dataset
---------------

In [4]:
batchsize = 128
train_loader, test_loader = load_data(dataset = 'mnist', batchsize=batchsize)


### Training an MLP with HSIC Bottleneck
-----------------------------------------

In [11]:
device = "cuda"
layer_sizes = [784, 32, 16]
model = MLP(layer_sizes = layer_sizes, output_size = 10).to(device)
epochs = 50
print("Model trainable parameters: ", sum(p.numel() for p in model.parameters() if p.requires_grad))
trainer = HSICBottleneck(model = model)

Model trainable parameters:  25914


In [13]:
logs = list()
for epoch in range(epochs):
    
    trainer.model.train()
    start = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.view(batchsize, -1)
        trainer.step(data.view(batchsize, -1).to(device), target.to(device))
        trainer.tune_output(data.view(batchsize, -1).to(device), target.to(device))
    end = time.time()
    if epoch % 2 == 0:
        show_result(trainer, train_loader, test_loader, epoch, logs, device)
        logs[epoch//2].append(end-start)

df = pd.DataFrame(logs)

EPOCH 0. 	 Training  ACC: 0.8361. 	 Testing ACC: 0.8396
EPOCH 2. 	 Training  ACC: 0.8562. 	 Testing ACC: 0.8643
EPOCH 4. 	 Training  ACC: 0.8802. 	 Testing ACC: 0.8857
EPOCH 6. 	 Training  ACC: 0.8729. 	 Testing ACC: 0.8766
EPOCH 8. 	 Training  ACC: 0.8887. 	 Testing ACC: 0.8943
EPOCH 10. 	 Training  ACC: 0.8933. 	 Testing ACC: 0.8967
EPOCH 12. 	 Training  ACC: 0.8972. 	 Testing ACC: 0.9006
EPOCH 14. 	 Training  ACC: 0.9024. 	 Testing ACC: 0.9029
EPOCH 16. 	 Training  ACC: 0.9042. 	 Testing ACC: 0.9019
EPOCH 18. 	 Training  ACC: 0.9079. 	 Testing ACC: 0.9097
EPOCH 20. 	 Training  ACC: 0.9127. 	 Testing ACC: 0.9147
EPOCH 22. 	 Training  ACC: 0.9090. 	 Testing ACC: 0.9105
EPOCH 24. 	 Training  ACC: 0.9159. 	 Testing ACC: 0.9168
EPOCH 26. 	 Training  ACC: 0.9160. 	 Testing ACC: 0.9118
EPOCH 28. 	 Training  ACC: 0.9195. 	 Testing ACC: 0.9191
EPOCH 30. 	 Training  ACC: 0.9175. 	 Testing ACC: 0.9187
EPOCH 32. 	 Training  ACC: 0.9196. 	 Testing ACC: 0.9202
EPOCH 34. 	 Training  ACC: 0.9221. 	

KeyboardInterrupt: 

### Training a KAN with HSIC Bottleneck
----------

In [2]:
device = "cuda"
layer_sizes = [784, 32, 16]
degree = 6
model = ChebyKAN(degree = degree, layer_sizes = layer_sizes, output_size = 10).to(device)
epochs = 50
print("Model trainable parameters: ", sum(p.numel() for p in model.parameters() if p.requires_grad))

optimizer = optim.AdamW(self.model.parameters(), lr=0.0001)
trainer = HSICBottleneck(model = model, optimizer = optimizer)


Model trainable parameters:  179370


In [5]:
logs = list()
for epoch in range(epochs):
    
    trainer.model.train()
    start = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.view(batchsize, -1)
        trainer.step(data.view(batchsize, -1).to(device), target.to(device))
        trainer.tune_output(data.view(batchsize, -1).to(device), target.to(device))
    end = time.time()
    if epoch % 2 == 0:
        show_result(trainer, train_loader, test_loader, epoch, logs, device)
        logs[epoch//2].append(end-start)


EPOCH 0. 	 Training  ACC: 0.7559. 	 Testing ACC: 0.7612
EPOCH 2. 	 Training  ACC: 0.8093. 	 Testing ACC: 0.8167
EPOCH 4. 	 Training  ACC: 0.8316. 	 Testing ACC: 0.8385
EPOCH 6. 	 Training  ACC: 0.8446. 	 Testing ACC: 0.8538
EPOCH 8. 	 Training  ACC: 0.8555. 	 Testing ACC: 0.8637
EPOCH 10. 	 Training  ACC: 0.8561. 	 Testing ACC: 0.8598
EPOCH 12. 	 Training  ACC: 0.8618. 	 Testing ACC: 0.8663
EPOCH 14. 	 Training  ACC: 0.8642. 	 Testing ACC: 0.8676
EPOCH 16. 	 Training  ACC: 0.8601. 	 Testing ACC: 0.8635
EPOCH 18. 	 Training  ACC: 0.8584. 	 Testing ACC: 0.8587
EPOCH 20. 	 Training  ACC: 0.8140. 	 Testing ACC: 0.8143
EPOCH 22. 	 Training  ACC: 0.8384. 	 Testing ACC: 0.8349
EPOCH 24. 	 Training  ACC: 0.8480. 	 Testing ACC: 0.8516
EPOCH 26. 	 Training  ACC: 0.8376. 	 Testing ACC: 0.8334
EPOCH 28. 	 Training  ACC: 0.7993. 	 Testing ACC: 0.8002
EPOCH 30. 	 Training  ACC: 0.8218. 	 Testing ACC: 0.8240
EPOCH 32. 	 Training  ACC: 0.7935. 	 Testing ACC: 0.8023
EPOCH 34. 	 Training  ACC: 0.7676. 	

NameError: name 'pd' is not defined

In [8]:
import pandas as pd
df = pd.DataFrame(logs); df.columns = ['Epoch', 'Train_loss', 'Test_loss', 'Time']; df.head()

Unnamed: 0,Epoch,Train_loss,Test_loss,Time
0,0,0.75592613,0.76121795,14.32129
1,2,0.8092615,0.8167067,11.161691
2,4,0.83156383,0.8385417,11.706221
3,6,0.84456795,0.853766,11.33398
4,8,0.85545206,0.8636819,11.348775
