##Setting up WANDB for metric tracking and working of hugging face trainers and libraries

In [1]:
import os

with open("wandb_key.txt", "r") as f:
    wandb_api_key = f.read().strip()

os.environ["WANDB_API_KEY"] = wandb_api_key

FileNotFoundError: [Errno 2] No such file or directory: 'wandb_key.txt'

## Step 1: Installing and importing the libraries

In [None]:
!pip uninstall accelerate peft bitsandbytes transformers trl -y
!pip install accelerate peft==0.13.2 bitsandbytes transformers trl==0.12.0
!pip install huggingface_hub

In [None]:
import torch
from trl import SFTTrainer
from peft import LoraConfig
from datasets import load_dataset
from transformers import (AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, pipeline)

In [None]:
## Step 2: Loading the model

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path = "aboonaji/llama2finetune-v3",
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16, #setting float limit to 16 reducing accuracy but greatly helping speed and reducing space
    bnb_4bit_quant_type="nf4", #better than previous qunant type
    bnb_4bit_use_double_quant=True, #Double quantisiation increasing compression
    device_map="auto" #allows usage of all gpus
)

model.config.use_cache = False #doesnt store output of previously computed layers
model.config.pretraining_tp = 1 #deactivatves more accurate computations

## Step 3: Loading the tokenizer

In [None]:
tknizer = AutoTokenizer.from_pretrained(
    pretrained_model_name_or_path = "aboonaji/llama2finetune-v3",
    trust_remote_code = True
)

In [None]:
tknizer.pad_token = tknizer.eos_token #identifies pads as eos to avoid errors in training
tknizer.padding_side = "right" #helps with right padding

## Step 4: Setting the training arguments

In [None]:
training_arguements = TrainingArguments(
    output_dir = "./results", 
    per_device_train_batch_size = 1, #cudnt train more than that
    max_steps=100
    ) #use lower device train batch size for time optimisation(also doesnt run on collab if high)

## Step 5: Creating the Supervised Fine-Tuning trainer

In [None]:
sft_trainer = SFTTrainer(model = model, 
              args = training_arguements, 
              train_dataset = load_dataset(path = "aboonaji/wiki_medical_terms_llam2_format", split = "train"),
              tokenizer = tknizer,
              peft_config = LoraConfig(r = 128, lora_alpha = 16, lora_dropout = 0.05, task_type = "CAUSAL_LM"),
              dataset_text_field = "text")

In [None]:
torch.cuda.empty_cache()    #empyts cache so that the vram doesnt get overloaded(PLEASE TURN OFF DEVICE MAP AUTO IF ON VM)


## Step 6: Training the model and Prompting

In [None]:
sft_trainer.train()

In [None]:
user_prompt = "please tell me about Piroplasmosis"
text_gen_pipe = pipeline(task = "text-generation", model = model, tokenizer = tknizer, max_length = 300)
output = text_gen_pipe(f"<s> [INST] {user_prompt} [/INST]")
print(output[0]['generated_text'])