# LoRA From Scratch â€“ Implement Low-Rank Adaptation for LLMs in PyTorch

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [10]:
count_parameters = lambda model: sum(p.numel() for p in model.parameters())
count_trainable_parameters = lambda model: sum(p.numel() for p in model.parameters() if p.requires_grad)

In [6]:
class LoRALayer(nn.Module):
    def __init__(self, fan_in, fan_out, rank, alpha):
        super().__init__()
        self.alpha = alpha
        std = torch.tensor(rank, dtype=torch.float) ** -0.5
        self.A = nn.Parameter(torch.randn(fan_in, rank) * std)
        self.B = nn.Parameter(torch.zeros(rank, fan_out))

    def forward(self, x):
        return self.alpha * (x @ self.A @ self.B)

In [8]:
linear_layer = nn.Linear(10_000, 10_000)
lora_layer = LoRALayer(10_000, 10_000, 8, 4)
count_parameters(linear_layer), count_parameters(lora_layer) # (10_000 * 10_000 + 10_000), (10_000 * 8 * 2)

(100010000, 160000)

In [11]:
x = torch.randn(50, 10_000)
out_linear = linear_layer(x)
out_lora = lora_layer(x)
out_linear.shape, out_lora.shape

(torch.Size([50, 10000]), torch.Size([50, 10000]))