In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from models.cnn import SimpleCNN, LeNet
from models.mlp import MLP

In [2]:
in_channels = 1
num_classes = 10
num_conv_layers = 2
temperature = 1
num_epochs = 10
batch_size = 64
lr = 0.001
save_path_folder = 'saved_models/model'

In [None]:
#Load CNN
model = SimpleCNN(in_channels=in_channels, num_classes=num_classes, num_conv_layers=num_conv_layers, temperature=temperature).to('cuda:0')
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [3]:
# MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# model training
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        outputs = model(images.to('cuda'))
        loss = criterion(outputs, labels.to('cuda'))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
# Save the trained model
torch.save(model.state_dict(), save_path_folder)
print(f"Model saved as {save_path_folder}!")

In [None]:
torch.save(model.state_dict(), 'saved_models/model')

In [None]:
state_dict = torch.load(save_path_folder)
model = SimpleCNN(in_channels=in_channels, num_classes=num_classes, num_conv_layers=num_conv_layers, temperature=temperature).to('cuda:0')
model.load_state_dict(state_dict=state_dict)

In [None]:
# Testing the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images.to('cuda'))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.to('cuda')).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy:.4f}')

In [None]:
# MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)), transforms.Lambda(lambda x: x.view(-1))])
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
mlp = MLP(input_dim = 784, output_dim= num_classes, hidden_size= 2048, hidden_layers= 4, device='cuda')

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=lr)
mlp.train(train_loader=train_loader, optimizer=optimizer, criterion=criterion, num_epochs=5)

In [None]:
mlp.eval(test_loader=test_loader)

In [None]:
from models.mlp import MLP
mlp = MLP(input_dim = 784, output_dim= num_classes, hidden_size= 2048, hidden_layers= 4, device='cuda', from_saved_state_dict="saved_models/mlp")
mlp.eval(test_loader=test_loader)

In [4]:
save_path_folder = "saved_models\model"

In [10]:
mlp_student = MLP(input_dim = 784, output_dim= num_classes, hidden_size= 2048, hidden_layers= 4, device='cuda')
state_dict = torch.load(save_path_folder)
model_teacher = SimpleCNN(in_channels=in_channels, num_classes=num_classes, num_conv_layers=num_conv_layers, temperature=temperature).to('cuda:0')
model_teacher.load_state_dict(state_dict=state_dict)
#state_dict = torch.load("saved_models/distiller")
#mlp_student.load_state_dict(state_dict=state_dict)

Not using softmax


<All keys matched successfully>

In [6]:
from distillation_utils import Distiller
distiller = Distiller(student=mlp_student, teacher=model_teacher, device='cuda', lr=0.001, load_student_from_path = 'saved_models/distiller')

In [11]:
from distillation_utils import Distiller
distiller = Distiller(student=mlp_student, teacher=model_teacher, device='cuda', lr=0.001)

In [12]:
distiller.distill(train_loader, 5, "saved_models/")

Epoch [1/5], Step [100/938], Student Loss : 0.8781, Total Loss: -29.8506
Epoch [1/5], Step [200/938], Student Loss : 0.6724, Total Loss: -29.9157
Epoch [1/5], Step [300/938], Student Loss : 0.5258, Total Loss: -30.0003
Epoch [1/5], Step [400/938], Student Loss : 0.4998, Total Loss: -29.9974
Epoch [1/5], Step [500/938], Student Loss : 0.5316, Total Loss: -30.0190
Epoch [1/5], Step [600/938], Student Loss : 0.5330, Total Loss: -30.0127
Epoch [1/5], Step [700/938], Student Loss : 0.5118, Total Loss: -29.9997
Epoch [1/5], Step [800/938], Student Loss : 0.5368, Total Loss: -29.9960
Epoch [1/5], Step [900/938], Student Loss : 0.5102, Total Loss: -29.9583
Epoch [2/5], Step [100/938], Student Loss : 0.5081, Total Loss: -29.9777
Epoch [2/5], Step [200/938], Student Loss : 0.3899, Total Loss: -30.0113
Epoch [2/5], Step [300/938], Student Loss : 0.4097, Total Loss: -30.0319
Epoch [2/5], Step [400/938], Student Loss : 0.4769, Total Loss: -29.9935
Epoch [2/5], Step [500/938], Student Loss : 0.3679,

In [13]:
distiller.test_step(test_loader=test_loader)

Test Accuracy: 0.9688


In [8]:
t = MLP(input_dim = 784, output_dim= num_classes, hidden_size= 2048, hidden_layers= 4, device='cuda')
s = torch.load('saved_models/mlp')
t.load_state_dict(s)
t.eval(test_loader)

Not using softmax
a
Test Accuracy: 0.9704


In [None]:
for x,y in test_loader:
    print( x.shape)