In [1]:
import torch
import torch.nn as nn

class LeNet5(nn.Module):
  def __init__(self, num_classes=10):
    super(LeNet5, self).__init__()
    self.conv1 = nn.Conv2d(1, 6, kernel_size=5) # 1 input channel, 6 output channels
    self.relu1 = nn.ReLU()
    self.avgpool1 = nn.AvgPool2d(kernel_size=2, stride=2)
    self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
    self.relu2 = nn.ReLU()
    self.avgpool2 = nn.AvgPool2d(kernel_size=2, stride=2)
    self.fc1 = nn.Linear(16*4*4, 120)
    self.relu3 = nn.ReLU()
    self.fc2 = nn.Linear(120, 84)
    self.relu4 = nn.ReLU()
    self.fc3 = nn.Linear(84, num_classes)

  def forward(self, x):
    x = self.avgpool1(self.relu1(self.conv1(x)))
    x = self.avgpool2(self.relu2(self.conv2(x)))
    x = x.view(-1, 16*4*4)
    x = self.relu3(self.fc1(x))
    x = self.relu4(self.fc2(x))
    x = self.fc3(x)
    return x

In [2]:
# prompt: train LeNet5 with MNIST

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

# Define the device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Instantiate the model and move it to the device
model = LeNet5().to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 1
for epoch in range(num_epochs):
  for batch_idx, (data, target) in enumerate(train_loader):
    data, target = data.to(device), target.to(device)

    optimizer.zero_grad()
    output = model(data)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()

    if batch_idx % 500 == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
          epoch + 1, batch_idx * len(data), len(train_loader.dataset),
          100. * batch_idx / len(train_loader), loss.item()))

# Testing loop
correct = 0
total = 0
with torch.no_grad():
  for data, target in test_loader:
    data, target = data.to(device), target.to(device)
    output = model(data)
    _, predicted = torch.max(output.data, 1)
    total += target.size(0)
    correct += (predicted == target).sum().item()

print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 31970372.64it/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 1198591.93it/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 10679857.74it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<?, ?it/s]


Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw

Accuracy of the network on the 10000 test images: 96.89 %


In [3]:
# prompt: load FMNIST data, and evaluate LeNET5's accuracy as it is

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

# Define the device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the FashionMNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.FashionMNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST('./data', train=False, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)

# ... (rest of your LeNet5 class definition from the previous response)

# Instantiate the model and move it to the device
model = LeNet5(num_classes=10).to(device) # Assuming 10 classes for FashionMNIST

# ... (rest of your code for defining loss, optimizer, training loop, and testing loop,
#       but make sure to use the FashionMNIST dataloaders)
# Testing loop
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:02<00:00, 12034030.29it/s]


Extracting ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 256410.28it/s]


Extracting ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 4870802.30it/s]


Extracting ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<?, ?it/s]


Extracting ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw

Accuracy of the network on the 10000 test images: 8.14 %


In [4]:
# prompt: let's freeze everything but the last fc3 layer.  finetuen fc3 layer for one epoch with FMNIST

# Freeze all layers except fc3
for name, param in model.named_parameters():
    if name not in ['fc3.weight', 'fc3.bias']:
        param.requires_grad = False

# Define the optimizer for only the fc3 layer
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

# Training loop (only for one epoch)
num_epochs = 3
for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        if batch_idx % 500 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch + 1, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))



In [5]:
# prompt: show eval on fMNIST

# Testing loop
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Accuracy of the network on the 10000 test images: 47.2 %


In [6]:
# prompt: let's freeze everything but the last fc3 layer.  finetuen fc3 layer for one epoch with FMNIST

# Freeze all layers except fc3
for name, param in model.named_parameters():
    param.requires_grad = True

# Define the optimizer for only the fc3 layer
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

# Training loop (only for one epoch)
num_epochs = 3
for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        if batch_idx % 500 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch + 1, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))



In [7]:
# prompt: show eval on fMNIST

# Testing loop
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Accuracy of the network on the 10000 test images: 87.24 %


In [8]:
from typing_extensions import Required
# prompt: let's try Lora. first define a new model class where in the last FC layers, we have lora parameters. then, copy the pretrained model parameters from "model". then train lora parameters while freezing everything else

import torch
import torch.nn as nn
import math

class LoRALayer(nn.Module):
    def __init__(self, r, in_features, out_features):
        super().__init__()
        self.lora_A = nn.Linear(in_features, r, bias=False)
        self.lora_B = nn.Linear(r, out_features, bias=False)
        self.scaling = 1/r
        # Initialize weights
        nn.init.kaiming_uniform_(self.lora_A.weight, a=math.sqrt(5))
        nn.init.zeros_(self.lora_B.weight)


    def forward(self, x):
        return self.lora_B(self.lora_A(x)) * self.scaling


class LeNet5_Lora(nn.Module):
    def __init__(self, num_classes=10, r=4):
        super(LeNet5_Lora, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5) # 1 input channel, 6 output channels
        self.relu1 = nn.ReLU()
        self.avgpool1 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.relu2 = nn.ReLU()
        self.avgpool2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(16*4*4, 120)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(120, 84)
        self.relu4 = nn.ReLU()
        self.fc3 = nn.Linear(84, num_classes)

        # Lora
        self.lora_fc1 = LoRALayer(r, 16*4*4, 120)
        self.lora_fc2 = LoRALayer(r, 120, 84)
        self.lora_fc3 = LoRALayer(r, 84, num_classes)


    def forward(self, x):
        x = self.avgpool1(self.relu1(self.conv1(x)))
        x = self.avgpool2(self.relu2(self.conv2(x)))
        x = x.view(-1, 16*4*4)
        x = self.relu3(self.fc1(x) + self.lora_fc1(x))
        x = self.relu4(self.fc2(x) + self.lora_fc2(x))
        x = self.fc3(x) + self.lora_fc3(x)
        return x

# Assuming 'model' is your pre-trained LeNet5 model
lora_model = LeNet5_Lora().to(device)

#copy weights
lora_model.conv1.weight.data = model.conv1.weight.data
lora_model.conv1.bias.data = model.conv1.bias.data
lora_model.conv2.weight.data = model.conv2.weight.data
lora_model.conv2.bias.data = model.conv2.bias.data
lora_model.fc1.weight.data = model.fc1.weight.data
lora_model.fc1.bias.data = model.fc1.bias.data
lora_model.fc2.weight.data = model.fc2.weight.data
lora_model.fc2.bias.data = model.fc2.bias.data
lora_model.fc3.weight.data = model.fc3.weight.data
lora_model.fc3.bias.data = model.fc3.bias.data

# Freeze all layers except lora parameters
for name, param in lora_model.named_parameters():
    if "lora" not in name:
        param.requires_grad = False

# Define optimizer for only lora parameters
optimizer = optim.Adam(filter(lambda p: p.requires_grad, lora_model.parameters()), lr=0.001)

# Training loop (example)
# ... (your training loop using lora_model and optimizer)

In [9]:
num_epochs = 3
for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = lora_model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        if batch_idx % 500 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch + 1, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

