# **Huấn luyện vài mô hình mạng đơn giản với LoRA**

In [1]:
%pip install peft

Collecting peft
  Downloading peft-0.11.1-py3-none-any.whl.metadata (13 kB)
Downloading peft-0.11.1-py3-none-any.whl (251 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: peft
Successfully installed peft-0.11.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import torch
import peft
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms import transforms, ToTensor, Lambda
from torch.utils.data import DataLoader, Subset
from torchvision.datasets import CIFAR10


**1. Dataset: sử dụng CIFAR-10**

In [3]:
batch_size = 6
trainset = CIFAR10(
    root='data',
    train=True,
    transform=ToTensor(),
    download=True
)

testset = CIFAR10(
    root='data',
    train=False,
    transform=ToTensor(),
    download=True
)

num_batches = 100
subset_indices = list(range(batch_size * num_batches))
train_subset = Subset(trainset, subset_indices)

train_dataloader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(testset, batch_size=batch_size, shuffle=True)


num_classes = 10

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:10<00:00, 16080487.38it/s]


Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


**2. Model: ta sẽ thử với 2 mạng đơn giản: MLP và CNN**

In [4]:
# MLP đơn giản

class MLP(nn.Module):
    def __init__(self, input_size = 3072, num_hidden = 2000, num_classes = 10):
        super(MLP, self).__init__()
        self.seq = nn.Sequential(
            nn.Linear(input_size, num_hidden),              # 3072, phù hợp với train dữ liệu phía dưới
            nn.ReLU(),
            nn.Linear(num_hidden, num_hidden),
            nn.ReLU(),
            nn.Linear(num_hidden, num_classes),
        )

    def forward(self, x):
        return self.seq(x)

In [5]:
# CNN đơn giản

import torch
import torch.nn as nn

class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 6, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Flatten(),
            nn.Linear(16 * 5 * 5, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 10)
        )

    def forward(self, x):
        return self.model(x)

**3. Training với MLP**

In [6]:
def train_model(model, optimizer, criterion, train_dataloader, test_dataloader, epochs):
  for epoch in range(epochs):

    # Train
    model.train()
    train_loss = 0
    for x_batch, y_batch in train_dataloader:
      x_batch = x_batch.view(x_batch.size(0), -1)       # Flatten
      optimizer.zero_grad()
      y_pred = model(x_batch)
      loss = criterion(y_pred, y_batch)
      loss.backward()
      optimizer.step()
      train_loss += loss.item()

    #Evaluate
    model.eval()
    eval_loss = 0
    for x_batch, y_batch in test_dataloader:
      x_batch = x_batch.view(x_batch.size(0), -1)       # Flatten
      with torch.no_grad():
        y_pred = model(x_batch)
      loss = criterion(y_pred, y_batch)
      eval_loss += loss.item()

    avg_train_loss = (train_loss / len(train_dataloader))
    avg_eval_loss = (eval_loss / len(test_dataloader))
    print(f"{epoch=}  {avg_train_loss=:.4f}  {avg_eval_loss=:.4f}")

In [7]:
# Without PEFT

module_without_PEFT = MLP(input_size=3072)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(module_without_PEFT.parameters(), lr = 0.001)
train_model(module_without_PEFT, optimizer, criterion, train_dataloader, test_dataloader, epochs = 15)

epoch=0  avg_train_loss=2.4057  avg_eval_loss=2.1755
epoch=1  avg_train_loss=2.1739  avg_eval_loss=2.1467
epoch=2  avg_train_loss=2.0967  avg_eval_loss=2.1656
epoch=3  avg_train_loss=2.0346  avg_eval_loss=2.0840
epoch=4  avg_train_loss=2.0122  avg_eval_loss=2.0664
epoch=5  avg_train_loss=1.9680  avg_eval_loss=2.1182
epoch=6  avg_train_loss=1.9172  avg_eval_loss=2.0031
epoch=7  avg_train_loss=1.8437  avg_eval_loss=2.1101
epoch=8  avg_train_loss=1.8568  avg_eval_loss=2.2153
epoch=9  avg_train_loss=1.8132  avg_eval_loss=2.0266
epoch=10  avg_train_loss=1.7669  avg_eval_loss=2.0512
epoch=11  avg_train_loss=1.7185  avg_eval_loss=2.0678
epoch=12  avg_train_loss=1.6945  avg_eval_loss=2.0719
epoch=13  avg_train_loss=1.6922  avg_eval_loss=2.0858
epoch=14  avg_train_loss=1.6785  avg_eval_loss=2.1886


In [8]:
# With PEFT

module_with_PEFT = MLP()
[(name, type(func)) for name, func in module_with_PEFT.named_modules()]

[('', __main__.MLP),
 ('seq', torch.nn.modules.container.Sequential),
 ('seq.0', torch.nn.modules.linear.Linear),
 ('seq.1', torch.nn.modules.activation.ReLU),
 ('seq.2', torch.nn.modules.linear.Linear),
 ('seq.3', torch.nn.modules.activation.ReLU),
 ('seq.4', torch.nn.modules.linear.Linear)]

In [9]:
config = peft.LoraConfig(
    r=8,
    lora_dropout=0.05,
    target_modules = ["seq.0", "seq.2"],
    modules_to_save = ["seq.4"],
    bias="lora_only",
    task_type="classification"
)

peft_model = peft.get_peft_model(module_with_PEFT, config)
optimizer = torch.optim.Adam(peft_model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

print(peft_model.print_trainable_parameters())
train_model(peft_model, optimizer, criterion, train_dataloader, test_dataloader, epochs = 30)

trainable params: 96,586 || all params: 10,260,596 || trainable%: 0.9413
None
epoch=0  avg_train_loss=2.2682  avg_eval_loss=2.1804
epoch=1  avg_train_loss=2.0656  avg_eval_loss=2.2606
epoch=2  avg_train_loss=1.9573  avg_eval_loss=2.1115
epoch=3  avg_train_loss=1.8992  avg_eval_loss=2.1004
epoch=4  avg_train_loss=1.8017  avg_eval_loss=1.9507
epoch=5  avg_train_loss=1.7633  avg_eval_loss=2.0915
epoch=6  avg_train_loss=1.6858  avg_eval_loss=2.0574
epoch=7  avg_train_loss=1.6265  avg_eval_loss=2.0051
epoch=8  avg_train_loss=1.5286  avg_eval_loss=2.0748
epoch=9  avg_train_loss=1.5655  avg_eval_loss=1.9459
epoch=10  avg_train_loss=1.3941  avg_eval_loss=2.0033
epoch=11  avg_train_loss=1.3810  avg_eval_loss=2.4486
epoch=12  avg_train_loss=1.4702  avg_eval_loss=2.0705
epoch=13  avg_train_loss=1.4001  avg_eval_loss=2.1218
epoch=14  avg_train_loss=1.2450  avg_eval_loss=2.1730
epoch=15  avg_train_loss=1.2339  avg_eval_loss=2.2570
epoch=16  avg_train_loss=1.1956  avg_eval_loss=2.6887
epoch=17  avg_

**4. Training với CNN**

In [10]:
def train_model(model, optimizer, criterion, train_dataloader, test_dataloader, epochs):
  for epoch in range(epochs):

    # Train
    model.train()
    train_loss = 0
    for x_batch, y_batch in train_dataloader:
      optimizer.zero_grad()
      # No flatten
      y_pred = model(x_batch)
      loss = criterion(y_pred, y_batch)
      loss.backward()
      optimizer.step()
      train_loss += loss.item()

    #Evaluate
    model.eval()
    eval_loss = 0
    for x_batch, y_batch in test_dataloader:
      # No flatten
      with torch.no_grad():
        y_pred = model(x_batch)
      loss = criterion(y_pred, y_batch)
      eval_loss += loss.item()

    avg_train_loss = (train_loss / len(train_dataloader))
    avg_eval_loss = (eval_loss / len(test_dataloader))
    print(f"{epoch=}  {avg_train_loss=:.4f}  {avg_eval_loss=:.4f}")

In [11]:
# Without PEFT

module_without_PEFT = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(module_without_PEFT.parameters(), lr = 0.001)
train_model(module_without_PEFT, optimizer, criterion, train_dataloader, test_dataloader, epochs = 15)

epoch=0  avg_train_loss=2.2891  avg_eval_loss=2.2584
epoch=1  avg_train_loss=2.1644  avg_eval_loss=2.0797
epoch=2  avg_train_loss=1.9897  avg_eval_loss=1.9770
epoch=3  avg_train_loss=1.8998  avg_eval_loss=2.0900
epoch=4  avg_train_loss=1.8307  avg_eval_loss=1.9443
epoch=5  avg_train_loss=1.7407  avg_eval_loss=1.8440
epoch=6  avg_train_loss=1.7127  avg_eval_loss=1.8671
epoch=7  avg_train_loss=1.5935  avg_eval_loss=2.0219
epoch=8  avg_train_loss=1.5646  avg_eval_loss=1.8672
epoch=9  avg_train_loss=1.4709  avg_eval_loss=1.8704
epoch=10  avg_train_loss=1.3943  avg_eval_loss=1.8419
epoch=11  avg_train_loss=1.2986  avg_eval_loss=2.0282
epoch=12  avg_train_loss=1.2572  avg_eval_loss=1.9486
epoch=13  avg_train_loss=1.1471  avg_eval_loss=2.1445
epoch=14  avg_train_loss=1.0185  avg_eval_loss=2.1470


In [12]:
# With PEFT

module_with_PEFT = CNN()
[(name, type(func)) for name, func in module_with_PEFT.named_modules()]

[('', __main__.CNN),
 ('model', torch.nn.modules.container.Sequential),
 ('model.0', torch.nn.modules.conv.Conv2d),
 ('model.1', torch.nn.modules.activation.ReLU),
 ('model.2', torch.nn.modules.pooling.MaxPool2d),
 ('model.3', torch.nn.modules.conv.Conv2d),
 ('model.4', torch.nn.modules.activation.ReLU),
 ('model.5', torch.nn.modules.pooling.MaxPool2d),
 ('model.6', torch.nn.modules.flatten.Flatten),
 ('model.7', torch.nn.modules.linear.Linear),
 ('model.8', torch.nn.modules.activation.ReLU),
 ('model.9', torch.nn.modules.linear.Linear),
 ('model.10', torch.nn.modules.activation.ReLU),
 ('model.11', torch.nn.modules.linear.Linear)]

In [13]:
config = peft.LoraConfig(
    r=8,
    lora_dropout=0.05,
    target_modules = ["model.0", "model.3","model.7", "model.9", "model.11"],
    bias="lora_only",
    task_type="classification"
)

peft_model = peft.get_peft_model(module_with_PEFT, config)
optimizer = torch.optim.Adam(peft_model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

print(peft_model.print_trainable_parameters())
train_model(peft_model, optimizer, criterion, train_dataloader, test_dataloader, epochs = 30)

trainable params: 8,756 || all params: 70,526 || trainable%: 12.4153
None
epoch=0  avg_train_loss=2.3068  avg_eval_loss=2.3017
epoch=1  avg_train_loss=2.2919  avg_eval_loss=2.2923
epoch=2  avg_train_loss=2.2074  avg_eval_loss=2.1942
epoch=3  avg_train_loss=2.0769  avg_eval_loss=2.1651
epoch=4  avg_train_loss=2.0262  avg_eval_loss=2.0370
epoch=5  avg_train_loss=1.9815  avg_eval_loss=2.0818
epoch=6  avg_train_loss=1.9559  avg_eval_loss=2.0375
epoch=7  avg_train_loss=1.9160  avg_eval_loss=2.0552
epoch=8  avg_train_loss=1.8979  avg_eval_loss=1.9638
epoch=9  avg_train_loss=1.8733  avg_eval_loss=1.9326
epoch=10  avg_train_loss=1.8236  avg_eval_loss=1.9129
epoch=11  avg_train_loss=1.8068  avg_eval_loss=1.9308
epoch=12  avg_train_loss=1.7815  avg_eval_loss=1.9599
epoch=13  avg_train_loss=1.7188  avg_eval_loss=1.9078
epoch=14  avg_train_loss=1.7303  avg_eval_loss=1.9657
epoch=15  avg_train_loss=1.6900  avg_eval_loss=1.8878
epoch=16  avg_train_loss=1.6580  avg_eval_loss=1.9014
epoch=17  avg_trai