# Diffusion Model Transfer Learning

## 1. Technical Requirements

In [None]:
!pip install torch torchvision torchaudio
!pip install bitsandbytes
!pip install transformers
!pip install accelerate
!pip install diffusers

## 2. Training a Neural Network model with PyTorch

### 2.1. Prepare the training data

In [10]:
import numpy as np
w_list = np.array([2,3,4,7])

import random
x_list = []
for _ in range(10):
    x_sample = np.array([random.randint(1,100) for _ in range(len(w_list))])
    x_list.append(x_sample)

y_list = []
for x_sample in x_list:
    y_temp = x_sample@w_list
    y_list.append(y_temp)

print("x_list:",x_list)
print("y_list:",y_list)

x_list: [array([12, 31, 31, 61]), array([60, 16, 13, 50]), array([77, 32, 41, 92]), array([63, 23, 65, 24]), array([66, 17, 92, 81]), array([70, 41, 31, 27]), array([58, 38, 16, 12]), array([60, 47, 25,  2]), array([87, 51,  7, 26]), array([69, 49, 85, 74])]
y_list: [668, 570, 1058, 623, 1118, 576, 378, 375, 537, 1143]


### 2.2. Preparing for training

In [11]:
import torch 
import torch.nn as nn

class MyLinear(nn.Module):
    def __init__(self):
        super().__init__()
        self.w = nn.Parameter(torch.randn(len(w_list)))
    
    def forward(self, x:torch.Tensor):
        return self.w @ x
    
model = MyLinear()

loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.00001)

x_input = torch.tensor(x_list, dtype=torch.float32)
y_output = torch.tensor(y_list, dtype=torch.float32)

### 2.3. Train the model

In [3]:
# start train model
num_epochs = 100
for epoch in range(num_epochs):
    for i, x in enumerate(x_input):
        # forward
        y_pred = model(x)

        # calculate loss
        loss = loss_fn(y_pred,y_output[i])

        # zero out the cached parameter.
        optimizer.zero_grad()

        # backward
        loss.backward()

        # update paramters
        optimizer.step()

    if (epoch+1) % 10 == 0:
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))


print("train done") 


Epoch [10/100], Loss: 201.5572
Epoch [20/100], Loss: 10.8380
Epoch [30/100], Loss: 3.5255
Epoch [40/100], Loss: 1.7397
Epoch [50/100], Loss: 0.9160
Epoch [60/100], Loss: 0.4882
Epoch [70/100], Loss: 0.2607
Epoch [80/100], Loss: 0.1393
Epoch [90/100], Loss: 0.0745
Epoch [100/100], Loss: 0.0398
train done


In [4]:
model.w

Parameter containing:
tensor([1.9761, 3.0063, 4.0219, 6.9869], requires_grad=True)

## 3. Training a model with Hugging Face Accelerate

### 3.1 Training a model with Accelerate

In [14]:
# start train model using Accelerate
from accelerate import utils
utils.write_basic_config()

from accelerate import Accelerator
accelerator = Accelerator()
device = accelerator.device

x_input.to(device)
y_output.to(device)
model.to(device)

model, optimizer = accelerator.prepare(
    model, optimizer
)

num_epochs = 100
for epoch in range(num_epochs):
    for i, x in enumerate(x_input):
        # forward
        y_pred = model(x)

        # calculate loss
        loss = loss_fn(y_pred,y_output[i])

        # zero out the cached parameter.
        optimizer.zero_grad()

        # backward
        #loss.backward()
        accelerator.backward(loss)

        # update paramters
        optimizer.step()

    if (epoch+1) % 10 == 0:
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

print("train done") 

Configuration already exists at /home/andrewzhu/.cache/huggingface/accelerate/default_config.yaml, will not override. Run `accelerate config` manually or pass a different `save_location`.
Epoch [10/100], Loss: 0.4956
Epoch [20/100], Loss: 0.2719
Epoch [30/100], Loss: 0.1492
Epoch [40/100], Loss: 0.0818
Epoch [50/100], Loss: 0.0449
Epoch [60/100], Loss: 0.0246
Epoch [70/100], Loss: 0.0135
Epoch [80/100], Loss: 0.0074
Epoch [90/100], Loss: 0.0041
Epoch [100/100], Loss: 0.0022
train done


In [13]:
model = accelerator.unwrap_model(model)
model.w

Parameter containing:
tensor([2.0359, 2.9466, 4.0035, 6.9901], device='cuda:0', requires_grad=True)

### 3.2 Prepare the training data for multiple GPUs training

In [1]:
import numpy as np
w_list = np.array([2,3,4,7])

import random
x_list = []
for _ in range(10):
    x_sample = np.array([random.randint(1,100) for _ in range(len(w_list))])
    x_list.append(x_sample)

y_list = []
for x_sample in x_list:
    y_temp = x_sample@w_list
    y_list.append(y_temp)
train_obj = {
    'w_list':w_list.tolist()
    , 'input':x_list
    , 'output':y_list
}

import pickle
with open('train_data.pkl','wb') as f:
    pickle.dump(train_obj,f)

### 3.3 Train the model with multiple GPUs using Accelerate

The code is in the file `train_model_in_2gpus.py`