To set custom kernel for notebook https://scicomp.aalto.fi/triton/apps/jupyter/#installing-kernels-from-virtualenvs-or-anaconda-environments

Prerequisites: 
- Efficient resource utilization and reducing computational requirements
- LoRA
- Mixed-precision Training

https://huggingface.co/blog/4bit-transformers-bitsandbytes

https://arxiv.org/abs/2106.09685

https://arxiv.org/abs/2305.14314

In [1]:
# # load env variables, NOTE: this cell must be run first
# from dotenv import load_dotenv
# load_dotenv(override=True)
import warnings
warnings.filterwarnings('ignore')

import os
import sys
import time

In [2]:
# os.environ["HF_HUB_OFFLINE"]="1"

In [3]:
os.environ['TRANSFORMERS_OFFLINE']="1"

In [4]:
os.environ['HF_HOME']='/scratch/shareddata/dldata/huggingface-hub-cache'

In [5]:
os.environ['HF_DATASETS_OFFLINE']="1"

In [6]:
model_name = 'meta-llama/Llama-2-7b-hf'
dataset_name = 'tatsu-lab/alpaca'

# dataset_name = "knowrohit07/know_medical_dialogue_v2"
# dataset_name = "LDJnr/Pure-Dove"

# Preparing data


In [7]:
# from transformers.utils import send_example_telemetry

# send_example_telemetry("question_answering_notebook", framework="pytorch")

In [8]:
# from datasets import list_datasets
# list_datasets()

In [9]:
from datasets import load_dataset

dataset = load_dataset(dataset_name,split='train[:1000]',download_mode="reuse_dataset_if_exists")

dataset

Using the latest cached version of the dataset since tatsu-lab/alpaca couldn't be found on the Hugging Face Hub (offline mode is enabled).
Found the latest cached dataset configuration 'default-5b26dc6e2f4ba670' at /scratch/shareddata/dldata/huggingface-hub-cache/datasets/tatsu-lab___alpaca/default-5b26dc6e2f4ba670/0.0.0/ca31c69184d9832faed373922c2acccec0b13a0bb5bbbe19371385c3ff26f1d1 (last modified on Tue Jan  9 10:03:55 2024).


Dataset({
    features: ['instruction', 'input', 'output', 'text'],
    num_rows: 1000
})

In [10]:
print(dataset[0]['text'])

Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Give three tips for staying healthy.

### Response:
1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. 
2. Exercise regularly to keep your body active and strong. 
3. Get enough sleep and maintain a consistent sleep schedule.


In [11]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

In [12]:
def tokenize_function(examples):
    text = examples['text']
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.truncation_side = "left"
    tokenized_inputs = tokenizer(
        text,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=256,
    )
    
    return tokenized_inputs

In [13]:
tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    batch_size=1,
    drop_last_batch=True,
    remove_columns=dataset.column_names,
)

print(tokenized_dataset)

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 1000
})


In [14]:
tokenized_dataset.train_test_split(test_size=0.3)

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 700
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 300
    })
})

In [15]:
split = tokenized_dataset.train_test_split(test_size=0.3)
train_dataset,eval_dataset = split['train'],split['test']

In [16]:
from torch.utils.data import DataLoader
from transformers import DataCollatorForLanguageModeling


In [None]:
data_collator=DataCollatorForLanguageModeling(tokenizer,mlm=False)

train_dataloader = DataLoader(
        train_dataset, shuffle=True, collate_fn=data_collator, batch_size=2, pin_memory=True
    )
eval_dataloader = DataLoader(
        eval_dataset, shuffle=True, collate_fn=data_collator, batch_size=2, pin_memory=True
    )

# Try out genefation

In [None]:
# # Example usage
# prompt = "Once upon a time"  # Replace with your own prompt
# # Encode the prompt
# input_ids = tokenizer.encode(prompt, return_tensors='pt')

# # Generate text
# output = base_model.generate(input_ids, max_length=20, num_return_sequences=3, no_repeat_ngram_size=2)

# # Decode and return the generated text
# for text in [tokenizer.decode(generated_id, skip_special_tokens=True) for generated_id in output]:
#     print(text)

In [None]:
# import os
# import csv
# import torch


#  DataCollatorForSeq2Seq

# from datasets import load_dataset
# from peft import LoraConfig, TaskType, get_peft_model
# from peft.utils.other import fsdp_auto_wrap_policy


In [None]:
from transformers import AutoModelForCausalLM
from peft import PeftModel, LoraConfig, get_peft_model
import torch

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(model_name,
#                                                  load_in_8bit=True,
#                                                   torch_dtype=torch.float16,
#                                                  device_map="auto",
                                                  # device_map = {"": "cuda:" + str(int(os.environ.get("LOCAL_RANK") or 0))}
                                                 )

In [None]:
# for i, param in enumerate(base_model.named_parameters()):
#     print(f'{i},{param[0]}\t {param[1].device} \t{param[1].dtype}')

In [None]:
# Freeze all parameters
for param in base_model.parameters():
    param.requires_grad = False

In [None]:
def print_trainable_parameters(model):
    """
    Print the names and shapes of trainable parameters in a Hugging Face model.

    Args:
    model: A Hugging Face model instance.
    """
    trainable_params = 0
    all_params = 0
    for name, param in model.named_parameters():
        all_params += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(f"trainable_params: {trainable_params}")
    print(f"all_params: {all_params}")
    
print_trainable_parameters(base_model)

In [None]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

LoRA: Low-Rank Adaptation of Large Language Models
https://arxiv.org/abs/2106.09685

In [None]:
base_model

In [None]:
peft_model = get_peft_model(base_model, config)

In [None]:
peft_model

**Different training tools from Huggingface**:

- Huggingface accelerate library

- Trainer: this API supports distributed training on multiple GPUs/TPUs, mixed precision through [NVIDIA Apex] for NVIDIA GPUs, ROCm APEX for AMD GPUs, and Native AMP for PyTorch.


## Huggingface accelerate library

You may want to use different settings/resources/environments for model training in different phases of your research, different APIs or libraries can provide interfaces to run training:
```bash
# A single GPU/CPU
python your_script.py
```
or 

```bash
# Multiple GPUs
torchrun --nnode=1 --nproc_per_node=4 your_script.py
```
or 

```bash
# Multiple GPUs
deepspeed --num_gpus=4 your_script.py
```
or

......

This often means many lines of code changed. 

Is there a better way of doing this? 

Yes, the accelerate library solves this and ensures the same code can be ran on different computing resources.

In [None]:
from accelerate import Accelerator

In [None]:
accelerator = Accelerator()

batch_size = 1
gradient_accumulation_steps = 8
max_length = 512
lr = 1e-4
num_epochs = 3

accelerator.print(peft_model.print_trainable_parameters())

In [None]:
# for batch in train_dataloader:
#     print(batch)
#     input_ids, attention_mask = batch
#     outputs = model(input_ids, attention_mask=attention_mask)
#     # Now, you can use outputs for your task


In [None]:
from torch import optim
from transformers import get_linear_schedule_with_warmup

In [None]:
optimizer = optim.AdamW(peft_model.parameters(), lr=lr)

lr_scheduler = get_linear_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=0,
        num_training_steps=(len(train_dataloader) * num_epochs)
    )

In [None]:
peft_model, train_dataloader, eval_dataloader, optimizer, lr_scheduler = accelerator.prepare(
        peft_model, train_dataloader, eval_dataloader, optimizer, lr_scheduler
    )

In [None]:
if getattr(accelerator.state, "fsdp_plugin", None) is not None:
        accelerator.state.fsdp_plugin.auto_wrap_policy = fsdp_auto_wrap_policy(model)

In [None]:
from tqdm import tqdm
for epoch in range(num_epochs):
    peft_model.train()
    total_loss = 0
    for step, batch in enumerate(tqdm(train_dataloader)):
        # print(step)
        outputs = peft_model(**batch)
        loss = outputs.loss
        total_loss += loss.detach().float()
        # print(loss)
        accelerator.backward(loss)
        
        if step % gradient_accumulation_steps == 0:
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
            peft_model.zero_grad()

#         capture_batch_analytics(epoch, 'train', step, loss.detach().float(), total_loss, batch["input_ids"], batch["labels"])

#     peft_model.eval()
#     eval_loss = 0
#     for step, batch in enumerate(tqdm(eval_dataloader)):
#         with torch.no_grad():
#             outputs = model(**batch)
#         loss = outputs.loss
#         eval_loss += loss.detach().float()
#         capture_batch_analytics(epoch, 'eval', step, loss.detach().float(), eval_loss, batch["input_ids"], batch["labels"])

# #     model.save_pretrained(f"trained_model-{epoch}")

In [None]:
model

In [None]:
# import torch
# device_count = torch.cuda.device_count()
# if device_count > 0:
# #     logger.debug("Select GPU device")
#     device = torch.device("cuda")
# else:
# #     logger.debug("Select CPU device")
#     device = torch.device("cpu")

# transformers Trainer API

In [None]:
from transformers import Trainer, TrainingArguments
# DataCollatorForLanguageModeling

In [None]:
training_args = TrainingArguments(

  # Learning rate
  learning_rate=1.0e-5,

  # Number of training epochs
  num_train_epochs=1,

  # Max steps to train for (each step is a batch of data)
  # Overrides num_train_epochs, if not -1
  max_steps=30,
  output_dir='out',
  # Batch size for training
  per_device_train_batch_size=1,
  warmup_steps=1, # Number of warmup steps for learning rate scheduler
  per_device_eval_batch_size=1, 
  logging_steps=1,
  gradient_accumulation_steps = 4,
  ddp_find_unused_parameters=False,
)

In [None]:
trainer = Trainer(
    model=peft_model,
    train_dataset=tokenized_dataset,
#     eval_dataset=test_dataset,
    args=training_args,
    data_collator=DataCollatorForLanguageModeling(tokenizer,mlm=False)
    
)

In [None]:
result = trainer.train()

In [None]:

peft_model.save_pretrained("./llama_7b_peft", save_adapter=True, save_config=True)

# model_to_merge = PeftModel.from_pretrained(AutoModelForCausalLM.from_pretrained(base_model).to("cuda"), "./llama_7b_peft")

# merged_model = model_to_merge.merge_and_unload()
# merged_model.save_pretrained(merged_model)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=False,
    torch_dtype=torch.float16,
    device_map="auto",
    offload_folder="offload", 
)


In [None]:
model = PeftModel.from_pretrained(
    model, 
    "./llama_7b_peft", 
    torch_dtype=torch.float16,
    device_map="auto",
    offload_folder="offload", 
)

In [None]:
model = model.merge_and_unload()

In [None]:
model.save_pretrained("./finetuned_llama2-7b")

In [None]:
######  TEST THIS ##########
trainer.generate()

In [None]:
def inference(text, model, tokenizer, max_input_tokens=1000, max_output_tokens=100):
    # Tokenize
    input_ids = tokenizer.encode(
          text,
          return_tensors="pt",
          truncation=True,
          max_length=max_input_tokens
    )

    # Generate
    device = model.device
    generated_tokens_with_prompt = model.generate(
    input_ids=input_ids.to(device),
    max_length=max_output_tokens
    )

    # Decode
    generated_text_with_prompt = tokenizer.batch_decode(generated_tokens_with_prompt, skip_special_tokens=True)

    # Strip the prompt
    generated_text_answer = generated_text_with_prompt[0][len(text):]

    return generated_text_answer

# Candidate topics for next session(TBD):