# **Huấn luyện vài mô hình mạng đơn giản với LoRA**

In [1]:
%pip install peft

Collecting peft
  Downloading peft-0.11.1-py3-none-any.whl.metadata (13 kB)
Downloading peft-0.11.1-py3-none-any.whl (251 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: peft
Successfully installed peft-0.11.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import torch
import peft
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms import transforms, ToTensor, Lambda
from torch.utils.data import DataLoader, Subset
from torchvision.datasets import CIFAR10


**1. Dataset: sử dụng CIFAR-10**

In [3]:
batch_size = 6
trainset = CIFAR10(
    root='data',
    train=True,
    transform=ToTensor(),
    download=True
)

testset = CIFAR10(
    root='data',
    train=False,
    transform=ToTensor(),
    download=True
)

num_batches = 100
subset_indices = list(range(batch_size * num_batches))
train_subset = Subset(trainset, subset_indices)

train_dataloader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(testset, batch_size=batch_size, shuffle=True)


num_classes = 10

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:11<00:00, 14875067.13it/s]


Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


**2. Model: ta sẽ thử với 2 mạng đơn giản: MLP và CNN**

In [4]:
# MLP đơn giản

class MLP(nn.Module):
    def __init__(self, input_size = 3072, num_hidden = 2000, num_classes = 10):
        super(MLP, self).__init__()
        self.seq = nn.Sequential(
            nn.Linear(input_size, num_hidden),              # 3072, phù hợp với train dữ liệu phía dưới
            nn.ReLU(),
            nn.Linear(num_hidden, num_hidden),
            nn.ReLU(),
            nn.Linear(num_hidden, num_classes),
        )

    def forward(self, x):
        return self.seq(x)

In [5]:
# CNN đơn giản

import torch
import torch.nn as nn

class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 6, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Flatten(),
            nn.Linear(16 * 5 * 5, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 10)
        )

    def forward(self, x):
        return self.model(x)

**3. Training với MLP**

In [6]:
def train_model(model, optimizer, criterion, train_dataloader, test_dataloader, epochs):
  for epoch in range(epochs):

    # Train
    model.train()
    train_loss = 0
    for x_batch, y_batch in train_dataloader:
      x_batch = x_batch.view(x_batch.size(0), -1)       # Flatten
      optimizer.zero_grad()
      y_pred = model(x_batch)
      loss = criterion(y_pred, y_batch)
      loss.backward()
      optimizer.step()
      train_loss += loss.item()

    #Evaluate
    model.eval()
    eval_loss = 0
    for x_batch, y_batch in test_dataloader:
      x_batch = x_batch.view(x_batch.size(0), -1)       # Flatten
      with torch.no_grad():
        y_pred = model(x_batch)
      loss = criterion(y_pred, y_batch)
      eval_loss += loss.item()

    avg_train_loss = (train_loss / len(train_dataloader))
    avg_eval_loss = (eval_loss / len(test_dataloader))
    print(f"{epoch=}  {avg_train_loss=:.4f}  {avg_eval_loss=:.4f}")

In [7]:
# Without PEFT

module_without_PEFT = MLP(input_size=3072)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(module_without_PEFT.parameters(), lr = 0.001)
train_model(module_without_PEFT, optimizer, criterion, train_dataloader, test_dataloader, epochs = 15)

epoch=0  avg_train_loss=2.3436  avg_eval_loss=2.1869
epoch=1  avg_train_loss=2.1614  avg_eval_loss=2.1961
epoch=2  avg_train_loss=2.0913  avg_eval_loss=2.2137
epoch=3  avg_train_loss=2.0284  avg_eval_loss=2.1401
epoch=4  avg_train_loss=1.9372  avg_eval_loss=2.0760
epoch=5  avg_train_loss=1.9410  avg_eval_loss=2.1008
epoch=6  avg_train_loss=1.9099  avg_eval_loss=2.0968
epoch=7  avg_train_loss=1.8717  avg_eval_loss=2.0335
epoch=8  avg_train_loss=1.8615  avg_eval_loss=2.0935
epoch=9  avg_train_loss=1.8207  avg_eval_loss=2.0647
epoch=10  avg_train_loss=1.7888  avg_eval_loss=2.2481
epoch=11  avg_train_loss=1.7722  avg_eval_loss=2.1267
epoch=12  avg_train_loss=1.7572  avg_eval_loss=2.1086
epoch=13  avg_train_loss=1.7672  avg_eval_loss=2.2186
epoch=14  avg_train_loss=1.7091  avg_eval_loss=2.1539


In [8]:
# With PEFT

module_with_PEFT = MLP()
[(name, type(func)) for name, func in module_with_PEFT.named_modules()]

[('', __main__.MLP),
 ('seq', torch.nn.modules.container.Sequential),
 ('seq.0', torch.nn.modules.linear.Linear),
 ('seq.1', torch.nn.modules.activation.ReLU),
 ('seq.2', torch.nn.modules.linear.Linear),
 ('seq.3', torch.nn.modules.activation.ReLU),
 ('seq.4', torch.nn.modules.linear.Linear)]

In [9]:
config = peft.LoraConfig(
    r=8,
    lora_dropout=0.05,
    target_modules = ["seq.0", "seq.2"],
    modules_to_save = ["seq.4"],
    bias="lora_only",
    task_type="classification"
)

peft_model = peft.get_peft_model(module_with_PEFT, config)
optimizer = torch.optim.Adam(peft_model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

print(peft_model.print_trainable_parameters())
train_model(peft_model, optimizer, criterion, train_dataloader, test_dataloader, epochs = 30)

trainable params: 96,586 || all params: 10,260,596 || trainable%: 0.9413
None
epoch=0  avg_train_loss=2.2578  avg_eval_loss=2.1244
epoch=1  avg_train_loss=2.0448  avg_eval_loss=2.0586
epoch=2  avg_train_loss=1.9678  avg_eval_loss=2.0004
epoch=3  avg_train_loss=1.9087  avg_eval_loss=2.0129
epoch=4  avg_train_loss=1.7831  avg_eval_loss=2.0208
epoch=5  avg_train_loss=1.7177  avg_eval_loss=1.9319
epoch=6  avg_train_loss=1.6317  avg_eval_loss=2.0151
epoch=7  avg_train_loss=1.5797  avg_eval_loss=1.9606
epoch=8  avg_train_loss=1.5526  avg_eval_loss=2.0967
epoch=9  avg_train_loss=1.5577  avg_eval_loss=2.0868
epoch=10  avg_train_loss=1.4350  avg_eval_loss=2.0236
epoch=11  avg_train_loss=1.3175  avg_eval_loss=2.2508
epoch=12  avg_train_loss=1.3394  avg_eval_loss=2.1585
epoch=13  avg_train_loss=1.3406  avg_eval_loss=2.0433
epoch=14  avg_train_loss=1.1755  avg_eval_loss=2.1223
epoch=15  avg_train_loss=1.1371  avg_eval_loss=2.3727
epoch=16  avg_train_loss=1.0981  avg_eval_loss=2.2653
epoch=17  avg_

**4. Training với CNN**

In [10]:
def train_model(model, optimizer, criterion, train_dataloader, test_dataloader, epochs):
  for epoch in range(epochs):

    # Train
    model.train()
    train_loss = 0
    for x_batch, y_batch in train_dataloader:
      optimizer.zero_grad()
      # No flatten
      y_pred = model(x_batch)
      loss = criterion(y_pred, y_batch)
      loss.backward()
      optimizer.step()
      train_loss += loss.item()

    #Evaluate
    model.eval()
    eval_loss = 0
    for x_batch, y_batch in test_dataloader:
      # No flatten
      with torch.no_grad():
        y_pred = model(x_batch)
      loss = criterion(y_pred, y_batch)
      eval_loss += loss.item()

    avg_train_loss = (train_loss / len(train_dataloader))
    avg_eval_loss = (eval_loss / len(test_dataloader))
    print(f"{epoch=}  {avg_train_loss=:.4f}  {avg_eval_loss=:.4f}")

In [11]:
# Without PEFT

module_without_PEFT = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(module_without_PEFT.parameters(), lr = 0.001)
train_model(module_without_PEFT, optimizer, criterion, train_dataloader, test_dataloader, epochs = 15)

epoch=0  avg_train_loss=2.2840  avg_eval_loss=2.2351
epoch=1  avg_train_loss=2.0918  avg_eval_loss=2.0648
epoch=2  avg_train_loss=1.9696  avg_eval_loss=1.9779
epoch=3  avg_train_loss=1.8788  avg_eval_loss=1.9297
epoch=4  avg_train_loss=1.7936  avg_eval_loss=1.9112
epoch=5  avg_train_loss=1.7338  avg_eval_loss=2.0407
epoch=6  avg_train_loss=1.6817  avg_eval_loss=1.8874
epoch=7  avg_train_loss=1.6107  avg_eval_loss=1.8981
epoch=8  avg_train_loss=1.5735  avg_eval_loss=1.8796
epoch=9  avg_train_loss=1.4992  avg_eval_loss=1.9817
epoch=10  avg_train_loss=1.4164  avg_eval_loss=1.9934
epoch=11  avg_train_loss=1.3760  avg_eval_loss=1.9055
epoch=12  avg_train_loss=1.2907  avg_eval_loss=1.8377
epoch=13  avg_train_loss=1.2068  avg_eval_loss=1.9527
epoch=14  avg_train_loss=1.1615  avg_eval_loss=1.9801


In [12]:
# With PEFT

module_with_PEFT = CNN()
[(name, type(func)) for name, func in module_with_PEFT.named_modules()]

[('', __main__.CNN),
 ('model', torch.nn.modules.container.Sequential),
 ('model.0', torch.nn.modules.conv.Conv2d),
 ('model.1', torch.nn.modules.activation.ReLU),
 ('model.2', torch.nn.modules.pooling.MaxPool2d),
 ('model.3', torch.nn.modules.conv.Conv2d),
 ('model.4', torch.nn.modules.activation.ReLU),
 ('model.5', torch.nn.modules.pooling.MaxPool2d),
 ('model.6', torch.nn.modules.flatten.Flatten),
 ('model.7', torch.nn.modules.linear.Linear),
 ('model.8', torch.nn.modules.activation.ReLU),
 ('model.9', torch.nn.modules.linear.Linear),
 ('model.10', torch.nn.modules.activation.ReLU),
 ('model.11', torch.nn.modules.linear.Linear)]

In [13]:
config = peft.LoraConfig(
    r=8,
    lora_dropout=0.05,
    target_modules = ["model.0", "model.3","model.7", "model.9", "model.11"],
    bias="lora_only",
    task_type="classification"
)

peft_model = peft.get_peft_model(module_with_PEFT, config)
optimizer = torch.optim.Adam(peft_model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

print(peft_model.print_trainable_parameters())
train_model(peft_model, optimizer, criterion, train_dataloader, test_dataloader, epochs = 30)

trainable params: 8,756 || all params: 70,526 || trainable%: 12.4153
None
epoch=0  avg_train_loss=2.3021  avg_eval_loss=2.3042
epoch=1  avg_train_loss=2.2921  avg_eval_loss=2.2824
epoch=2  avg_train_loss=2.2295  avg_eval_loss=2.2205
epoch=3  avg_train_loss=2.1883  avg_eval_loss=2.1848
epoch=4  avg_train_loss=2.1484  avg_eval_loss=2.1628
epoch=5  avg_train_loss=2.0965  avg_eval_loss=2.1209
epoch=6  avg_train_loss=2.0460  avg_eval_loss=2.0887
epoch=7  avg_train_loss=1.9609  avg_eval_loss=2.0163
epoch=8  avg_train_loss=1.8680  avg_eval_loss=1.9493
epoch=9  avg_train_loss=1.8540  avg_eval_loss=1.9223
epoch=10  avg_train_loss=1.7895  avg_eval_loss=2.0188
epoch=11  avg_train_loss=1.8064  avg_eval_loss=1.9993
epoch=12  avg_train_loss=1.7412  avg_eval_loss=1.9608
epoch=13  avg_train_loss=1.7064  avg_eval_loss=1.9043
epoch=14  avg_train_loss=1.6926  avg_eval_loss=1.8847
epoch=15  avg_train_loss=1.6545  avg_eval_loss=1.9785
epoch=16  avg_train_loss=1.6219  avg_eval_loss=1.9340
epoch=17  avg_trai