https://zhuanlan.zhihu.com/p/663557294

https://arxiv.org/abs/2106.09685

In [57]:
import numpy as np
import torch
from torch import nn, Tensor
import torch.nn.functional as F
import math

In [58]:
dim_in = 128
dim_out = 128
rank = 8        # lora的维度
alpha = 0.5     # 它是一个缩放参数，通常是一个常数。通过设置alpha有助于在变化时减少重新调整超参数的需求

In [59]:
W = nn.Parameter(torch.randn(dim_in, dim_out))  # 预训练权重
W_A = nn.Parameter(torch.randn(dim_in, rank))   # LoRA权重A,正态分布
W_B = nn.Parameter(torch.zeros(rank, dim_out))  # LoRA权重B,初始化为全为0,这样当数据第一次通过网络时，它和预训练的结果是一致的，这样便保证了模型在初始阶段便有一个不错的效果
W.shape, W_A.shape, W_B.shape

(torch.Size([128, 128]), torch.Size([128, 8]), torch.Size([8, 128]))

In [60]:
def regular_forward_matmul(x: Tensor, W: Tensor) -> Tensor:
    return x @ W

In [61]:
def lora_forward_matmul(x: Tensor, W: Tensor, W_A: Tensor, W_B: Tensor, alpha: float) -> Tensor:
    x = x @ W
    x += x @ (W_A @ W_B) * alpha
    return x

In [62]:
def fuse_lora_weights(W: Tensor, W_A: Tensor, W_B: Tensor, alpha: float) -> Tensor:
    return W + (W_A @ W_B) * alpha

In [63]:
x = torch.randn(10, dim_in)
x = regular_forward_matmul(x, W)
x.shape

torch.Size([10, 128])

In [64]:
x = lora_forward_matmul(x, W, W_A, W_B, alpha)
x.shape

torch.Size([10, 128])

In [65]:
W = fuse_lora_weights(W, W_A, W_B, alpha)
W.shape

torch.Size([128, 128])

In [66]:
x = regular_forward_matmul(x, W)
x.shape

torch.Size([10, 128])