#### Import required libraries

In [2]:
import torch
import torchvision
import torch.optim as optim
import torchinfo
import torch.nn as nn
from layers import CifarLoRAModel
import torchvision.transforms as transforms
from layers import CifarModel
from torch.utils.tensorboard import SummaryWriter

# Pretraining stage

#### Load the dataset from torchvision

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 32

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


#### Allow training and testing with GPU if exist

In [4]:
device = "cuda" if torch.cuda.is_available else "cpu"

#### Initialize model,loss, optimizer and tensorboard writer

In [5]:
writer = SummaryWriter()
model = CifarModel(hidden_dim=128,num_classes=len(classes)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

#### Define test function to evaluate test set performance

In [6]:
def test(model):
    correct = 0
    total = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = model(inputs)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
    return 100 * correct // total

#### Train it for 5 epochs

In [7]:
def train(model,epochs):
    for epoch in range(epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data[0].to(device), data[1].to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 500 == 499:  # print every 500 mini-batches
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 500:.3f}')
                writer.add_scalar('Loss/train',running_loss / 500,i+1)
                running_loss = 0.0
        test_acc = test(model)
        
        writer.add_scalar('Accuracy/test',test_acc,epoch)
    print('Finished Training')

In [7]:
epochs = 5
train(model=model,epochs=epochs)

[1,   500] loss: 2.137
[1,  1000] loss: 1.880
[1,  1500] loss: 1.759
Accuracy of the network on the 10000 test images: 39 %
[2,   500] loss: 1.689
[2,  1000] loss: 1.625
[2,  1500] loss: 1.597
Accuracy of the network on the 10000 test images: 45 %
[3,   500] loss: 1.548
[3,  1000] loss: 1.523
[3,  1500] loss: 1.517
Accuracy of the network on the 10000 test images: 47 %
[4,   500] loss: 1.464
[4,  1000] loss: 1.450
[4,  1500] loss: 1.451
Accuracy of the network on the 10000 test images: 48 %
[5,   500] loss: 1.403
[5,  1000] loss: 1.396
[5,  1500] loss: 1.387
Accuracy of the network on the 10000 test images: 49 %
Finished Training


In [8]:
PATH = './cifar_model.pth'
torch.save(model.state_dict(), PATH)

# LoRA fine-tuning

In [9]:
PATH = './cifar_model.pth'
lora_model = CifarLoRAModel(hidden_dim=128,num_classes=len(classes),rank=32,alpha=1).to(device)
lora_model.load_state_dict(torch.load(PATH),strict=False)
optimizer = optim.SGD(lora_model.parameters(), lr=0.001, momentum=0.9)

In [None]:
torchinfo.summary(model,input_size=(batch_size,32*32*3), col_names = ("input_size", "output_size", "num_params"), verbose = 0)

In [10]:
torchinfo.summary(lora_model,input_size=(batch_size,32*32*3), col_names = ("input_size", "output_size", "num_params"), verbose = 0)

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #
CifarLoRAModel                           [32, 3072]                [32, 10]                  --
├─Linear: 1-1                            [32, 3072]                [32, 128]                 (393,344)
├─LoRALinear: 1-2                        [32, 3072]                [32, 128]                 102,400
├─Linear: 1-3                            [32, 128]                 [32, 128]                 (16,512)
├─LoRALinear: 1-4                        [32, 128]                 [32, 128]                 8,192
├─Linear: 1-5                            [32, 128]                 [32, 10]                  (1,290)
├─LoRALinear: 1-6                        [32, 128]                 [32, 10]                  4,416
Total params: 526,154
Trainable params: 115,008
Non-trainable params: 411,146
Total mult-adds (Units.MEGABYTES): 13.16
Input size (MB): 0.39
Forward/backward pass size (MB): 0.14
Params size (MB): 2

In [11]:
epochs = 5
train(lora_model,epochs=epochs)

[1,   500] loss: 1.333
[1,  1000] loss: 1.316
[1,  1500] loss: 1.320
Accuracy of the network on the 10000 test images: 50 %
[2,   500] loss: 1.309
[2,  1000] loss: 1.306
[2,  1500] loss: 1.308
Accuracy of the network on the 10000 test images: 50 %
[3,   500] loss: 1.293
[3,  1000] loss: 1.301
[3,  1500] loss: 1.303
Accuracy of the network on the 10000 test images: 50 %
[4,   500] loss: 1.288
[4,  1000] loss: 1.292
[4,  1500] loss: 1.288
Accuracy of the network on the 10000 test images: 51 %
[5,   500] loss: 1.272
[5,  1000] loss: 1.286
[5,  1500] loss: 1.278
Accuracy of the network on the 10000 test images: 51 %
Finished Training
