In [1]:
from torchtune.models.llama2 import llama2_7b, lora_llama2_7b
from torchtune.models.llama2 import llama2_tokenizer
from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params
from torchtune.datasets import text_completion_dataset

### Model and dataset preparation

#### Download the model

In [2]:
!tune download meta-llama/Llama-2-7b-hf \
  --output-dir /tmp/Llama-2-7b-hf \
  --hf-token $HF_TOKEN 

Ignoring files matching the following patterns: *.safetensors
Fetching 15 files: 100%|█████████████████████| 15/15 [00:00<00:00, 30870.74it/s]
Successfully downloaded model repo and wrote to the following locations:
/tmp/Llama-2-7b-hf/.cache
/tmp/Llama-2-7b-hf/LICENSE.txt
/tmp/Llama-2-7b-hf/config.json
/tmp/Llama-2-7b-hf/README.md
/tmp/Llama-2-7b-hf/generation_config.json
/tmp/Llama-2-7b-hf/USE_POLICY.md
/tmp/Llama-2-7b-hf/.gitattributes
/tmp/Llama-2-7b-hf/model.safetensors.index.json
/tmp/Llama-2-7b-hf/special_tokens_map.json
/tmp/Llama-2-7b-hf/tokenizer_config.json
/tmp/Llama-2-7b-hf/tokenizer.json
/tmp/Llama-2-7b-hf/pytorch_model.bin.index.json
/tmp/Llama-2-7b-hf/tokenizer.model
/tmp/Llama-2-7b-hf/Responsible-Use-Guide.pdf
/tmp/Llama-2-7b-hf/pytorch_model-00002-of-00002.bin
/tmp/Llama-2-7b-hf/pytorch_model-00001-of-00002.bin
/tmp/Llama-2-7b-hf/torchtune_config.yaml


#### Creation of the base and LoRA models

In [3]:
base_model = llama2_7b()
lora_model = lora_llama2_7b(lora_attn_modules=['q_proj','k_proj','v_proj','output_proj'], lora_rank=32, lora_alpha=64)

### Comparing the base model and the LoRA model

In [4]:
print("=== Base model ===")
print(base_model.layers[0].attn)
print("=== LoRA model ===")
print(lora_model.layers[0].attn)

=== Base model ===
CausalSelfAttention(
  (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
  (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
  (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
  (output_proj): Linear(in_features=4096, out_features=4096, bias=False)
  (pos_embeddings): RotaryPositionalEmbeddings()
)
=== LoRA model ===
CausalSelfAttention(
  (q_proj): LoRALinear(
    (dropout): Dropout(p=0.05, inplace=False)
    (lora_a): Linear(in_features=4096, out_features=32, bias=False)
    (lora_b): Linear(in_features=32, out_features=4096, bias=False)
  )
  (k_proj): LoRALinear(
    (dropout): Dropout(p=0.05, inplace=False)
    (lora_a): Linear(in_features=4096, out_features=32, bias=False)
    (lora_b): Linear(in_features=32, out_features=4096, bias=False)
  )
  (v_proj): LoRALinear(
    (dropout): Dropout(p=0.05, inplace=False)
    (lora_a): Linear(in_features=4096, out_features=32, bias=False)
    (lora_b): Linear(in_features=3

#### Printing the number of total and trainable parameters in the LoRA model

In [5]:
# Once we’ve loaded the base model weights, we also want to set only LoRA parameters to trainable.

# Fetch all params from the model that are associated with LoRA.
lora_params = get_adapter_params(lora_model)
# Set requires_grad=True on lora_params, and requires_grad=False on all others.
set_trainable_params(lora_model, lora_params)
# Print the total number of parameters
total_params = sum([p.numel() for p in lora_model.parameters()])
trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])
print(
  f"""
  {total_params} total params,
  {trainable_params} trainable params,
  {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.
  """
)


  6771970048 total params,
  33554432 trainable params,
  0.50% of all params are trainable.
  


### Training the model with the LoRA model

#### Copy the recipe and config files in current dir

In [6]:
!tune cp -n lora_finetune_distributed ./recipes/lora_finetune_distributed.py --make-parents
!tune cp -n llama2/7B_lora ./configs/custom_lora.yaml --make-parents

File already exists at /u/ouedraoi/documents/projects/torchtune-finetuning/recipes/lora_finetune_distributed.py, not overwriting.
File already exists at /u/ouedraoi/documents/projects/torchtune-finetuning/configs/custom_lora.yaml, not overwriting.


#### Start the training

In [7]:
!tune run --nnodes 1 --nproc_per_node 4 ./recipes/lora_finetune_distributed.py --config ./configs/custom_lora.yaml

Running with torchrun...
W0826 12:40:32.823000 140686333332352 torch/distributed/run.py:779] 
W0826 12:40:32.823000 140686333332352 torch/distributed/run.py:779] *****************************************
W0826 12:40:32.823000 140686333332352 torch/distributed/run.py:779] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. 
W0826 12:40:32.823000 140686333332352 torch/distributed/run.py:779] *****************************************
INFO:torchtune.utils.logging:Running LoRAFinetuneRecipeDistributed with resolved config:

batch_size: 2
checkpointer:
  _component_: torchtune.utils.FullModelHFCheckpointer
  adapter_checkpoint: null
  checkpoint_dir: /tmp/Llama-2-7b-hf
  checkpoint_files:
  - pytorch_model-00001-of-00002.bin
  - pytorch_model-00002-of-00002.bin
  model_type: LLAMA2
  output_dir: /tmp/Llama-2-7b-hf
  recipe_checkpoint: nu

### Evaluation

#### Copy the evaluation config file to current dir

In [1]:
!tune cp -n eleuther_evaluation ./configs/custom_eval_config.yaml --make-parents

File already exists at /u/ouedraoi/documents/projects/torchtune-finetuning/configs/custom_eval_config.yaml, not overwriting.


#### Start the evaluation

In [None]:
!tune run eleuther_eval --config ./configs/custom_eval_config.yaml