https://github.com/ayyucedemirbas/DoRA/blob/main/dora.py

In [10]:
#https://github.com/ayyucedemirbas/DoRA/blob/main/dora.py
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.feature_extraction.text import CountVectorizer

torch.manual_seed(0)



<torch._C.Generator at 0x7f7ef0918d70>

In [4]:
# This layer is dropped into your pre-trained PyTorch model where nn.Linear is used
class DoRALayer(nn.Module):
    def __init__(self, d_in, d_out, rank=4, weight=None, bias=None):
        super().__init__()

        if weight is not None:
            self.weight = nn.Parameter(weight, requires_grad=False)
        else:
            self.weight = nn.Parameter(torch.Tensor(d_out, d_in), requires_grad=False)

        if bias is not None:
            self.bias = nn.Parameter(bias, requires_grad=False)
        else:
            self.bias = nn.Parameter(torch.Tensor(d_out), requires_grad=False)

        # m = Magnitude column-wise across output dimension
        self.m = nn.Parameter(self.weight.norm(p=2, dim=0, keepdim=True))
        
        std_dev = 1 / torch.sqrt(torch.tensor(rank).float())
        self.lora_A = nn.Parameter(torch.randn(d_out, rank)*std_dev)
        self.lora_B = nn.Parameter(torch.zeros(rank, d_in))

    def forward(self, x):
        lora = torch.matmul(self.lora_A, self.lora_B)
        adapted = self.weight + lora
        column_norm = adapted.norm(p=2, dim=0, keepdim=True)
        norm_adapted = adapted / column_norm
        calc_weights = self.m * norm_adapted
        return F.linear(x, calc_weights, self.bias)



In [5]:
class SimpleModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleModel, self).__init__()
        self.layer1 = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        x = self.layer1(x)
        return x

In [6]:
# Training function
def train(model, criterion, optimizer, data_loader, epochs=5):
    model.train()
    for epoch in range(epochs):
        for inputs, targets in data_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
        #print(f"Epoch {epoch+1}, Loss: {loss.item()}")

def replace_linear_with_dora(model):
    for name, module in model.named_children():
        if isinstance(module, nn.Linear):
            # Get the input and output dimensions of the current nn.Linear layer
            d_in = module.in_features
            d_out = module.out_features

            # Create a new DoRALayer with the same dimensions
            setattr(model, name, DoRALayer(d_out=d_out, d_in=d_in, weight=module.weight.data.clone(), bias=module.bias.data.clone()))
        else:
            # Recursively apply this function to submodules
            replace_linear_with_dora(module)


In [7]:
def generate_data(num_samples=100, input_dim=10):
    X = np.random.randn(num_samples, input_dim).astype(np.float32)
    y = np.sum(X, axis=1, keepdims=True)
    return X, y


In [8]:
def print_model_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    
    print(f"Total Parameters: {total_params}")
    print(f"Trainable Parameters: {trainable_params}")


In [11]:
def get_data_loader():
    # Dados textuais de exemplo
    texts = ["I love machine learning", "Deep learning is amazing", "Natural language processing"]
    labels = [0, 1, 0]

    # Usar CountVectorizer para converter texto em vetores de contagem
    vectorizer = CountVectorizer()
    data = vectorizer.fit_transform(texts).toarray()

    # Converter dados e rótulos para tensores
    data_tensor = torch.tensor(data, dtype=torch.float32)
    labels_tensor = torch.tensor(labels, dtype=torch.float32)

    # Criar TensorDataset
    dataset = TensorDataset(data_tensor, labels_tensor)

    # Usar DataLoader para iterar pelo dataset
    dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

    # Iterar pelo DataLoader
    for batch_data, batch_labels in dataloader:
        print(batch_data, batch_labels)

    return dataloader, data_tensor.shape[1], len(set(labels))


In [12]:
data_loader, input_dim, output_dim = get_data_loader()

model = SimpleModel(input_dim, output_dim)
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

#X, y = generate_data(num_samples=1000, input_dim=input_dim)
#dataset = TensorDataset(X, y)
#data_loader = DataLoader(dataset, batch_size=64, shuffle=True)


print_model_parameters(model)

train(model, criterion, optimizer, data_loader, epochs=100)


tensor([[0., 0., 0., 0., 1., 1., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 1., 1.]]) tensor([0., 0.])
tensor([[1., 1., 1., 0., 1., 0., 0., 0., 0.]]) tensor([1.])


  from .autonotebook import tqdm as notebook_tqdm


Total Parameters: 20
Trainable Parameters: 20


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


In [13]:
# Evaluate the model
model.eval()
with torch.no_grad():
    inputs, targets = next(iter(data_loader))
    predictions = model(inputs)
    loss = criterion(predictions, targets)
    print(f"Final Evaluation Loss: {loss.item()}")

replace_linear_with_dora(model)

print_model_parameters(model)

Final Evaluation Loss: 0.15919075906276703
Total Parameters: 73
Trainable Parameters: 53


In [14]:
# Continue training with the Dora model
optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
print("Continuing training with DoRA layers...")
train(model, criterion, optimizer, data_loader, epochs=5)  # Continue training

# Evaluate the model
model.eval()
with torch.no_grad():
    inputs, targets = next(iter(data_loader))
    predictions = model(inputs)
    print("INPUTS", inputs)
    print("predictions",  predictions)
    loss = criterion(predictions, targets)
    print(f"Final (DoRA) Evaluation Loss: {loss.item()}")

Continuing training with DoRA layers...
INPUTS tensor([[0., 0., 0., 0., 1., 1., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 1., 1.]])
predictions tensor([[0.1503, 0.1500],
        [0.0704, 0.4674]])
Final (DoRA) Evaluation Loss: 0.06712991744279861
