## Imports

In [1]:
# Enable automatic extension autoreloading
%load_ext autoreload
%autoreload 2

In [13]:
from pathlib import Path

from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import torch
import datasets
datasets.disable_progress_bar() # Don't show progress datasets bars
from peft import LoraConfig

import sys
sys.path.insert(0, str(Path.cwd().parent.resolve()))
from model import get_model_with_lora

## Config

In [6]:
device = torch.device(
    f"cuda:{torch.cuda.current_device()}" if torch.cuda.is_available() else "cpu"
)
device

device(type='cuda', index=0)

In [12]:
# Model
model_name = "lmsys/vicuna-7b-v1.3"

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_model_with_lora(
    model_name, device, lora_config,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
)
# model.eval()

memory_usage = model.pretrained_model.get_memory_footprint() / (1024 ** 3)
print(f"{memory_usage=:.2f} GB")

Loading policy model...

kwargs={'low_cpu_mem_usage': True, 'torch_dtype': torch.float16}


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Loaded subject model with 6,746,808,321 parameters.
Model dtype: torch.float32

memory_usage=7.10 GB


In [15]:
model.training

True

In [18]:
# Num trainable parameters
num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"{num_trainable_params:,d}")

8,392,705


In [20]:
for name, p in model.named_parameters():
    if p.requires_grad:
        print(name)

pretrained_model.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight
pretrained_model.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight
pretrained_model.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight
pretrained_model.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight
pretrained_model.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight
pretrained_model.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight
pretrained_model.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.weight
pretrained_model.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.weight
pretrained_model.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.weight
pretrained_model.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.weight
pretrained_model.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.weight
pretrained_model.base_model.mode