In [None]:
DATASET_NAME = "TRnlp/MixSub"
MODEL_NAME = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"
TRAINED_MODEL_NAME = "Llama-3.2-1B-Instruct-bnb-4bit-MixSub"
TRAINED_MODEL_REPO = f"AdityaMayukhSom/{TRAINED_MODEL_NAME}"
MAX_SEQ_LEN = 2048
LOAD_IN_4BIT = True
DTYPE = None

In [None]:
from huggingface_hub import login, create_repo
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("HF_TOKEN")
login(token=hf_token)
# create_repo(TRAINED_MODEL_REPO)

In [None]:
import os
import pandas as pd
from pathlib import Path
from datasets import load_dataset, load_from_disk, Dataset

In [None]:
from packaging.version import Version as V

try:
    import torch
    from torch.version import cuda
except Exception as e:
    raise ImportError("Install torch via `pip install torch`")

v = V(torch.__version__)
is_ampere = torch.cuda.get_device_capability()[0] >= 8
xformers = "xformers==0.0.27" if v < V("2.4.0") else "xformers"
device = "cuda" if torch.cuda.is_available() else "cpu"

if cuda != "12.1" and cuda != "11.8" and cuda != "12.4":
    raise RuntimeError(f"CUDA = {cuda} not supported!")
if   v <= V('2.1.0'):
    raise RuntimeError(f"Torch = {v} too old!")
elif v <= V('2.1.1'):
    x = 'cu{}{}-torch211'
elif v <= V('2.1.2'):
    x = 'cu{}{}-torch212'
elif v  < V('2.3.0'):
    x = 'cu{}{}-torch220'
elif v  < V('2.4.0'):
    x = 'cu{}{}-torch230'
elif v  < V('2.5.0'):
    x = 'cu{}{}-torch240'
elif v  < V('2.6.0'):
    x = 'cu{}{}-torch250'
else:
    raise RuntimeError(f"Torch = {v} too new!")

x = x.format(cuda.replace(".", ""), "-ampere" if is_ampere else "")
print(f'pip install --upgrade pip && pip install "unsloth[{x}] @ git+https://github.com/unslothai/unsloth.git"')

In [None]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install --upgrade pip
!pip install --no-deps {xformers} trl peft accelerate bitsandbytes triton
!pip install "unsloth[cu124-torch250] @ git+https://github.com/unslothai/unsloth.git"

### **Reference Links For Fine Tunning Llama 3.2 With Unsloth** 

1. [Fine-tuning Llama 3.2 Using Unsloth](https://www.kdnuggets.com/fine-tuning-llama-using-unsloth)
2. [Fine-tuning Llama 3 with Unsloth: A Beginner’s Guide](https://medium.com/@seekmeai/fine-tuning-llama-3-with-unsloth-a-beginners-guide-d239d48eaf71)

In [None]:
from unsloth import FastLanguageModel

fast_language_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL_NAME,
    max_seq_length = MAX_SEQ_LEN,
    dtype = DTYPE,
    load_in_4bit = LOAD_IN_4BIT
)

In [None]:
model = FastLanguageModel.get_peft_model(
    fast_language_model, 
    r = 16,
    target_modules = [
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "up_proj",
        "down_proj",
        "gate_proj",
    ],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 69,
    use_rslora = False,
    loftq_config = None,
)

In [None]:
# https://huggingface.co/docs/datasets/en/loading#hugging-face-hub
dataset = load_dataset(DATASET_NAME)

# Changing all the column names to have uniform singular forms
# All column names are now in singular form
dataset = dataset.rename_column("Highlights", "Highlight")

In [None]:
# Define how many examples to use for training and evaluation during setup
TRAIN_DATASET_LENGTH = 10
EVAL_DATASET_LENGTH = 5

# select less number of examples for training, and even less for testing, 
# if everything goes well,  we can fine tune on a larger dataset
train_dataset = dataset["train"].select(range(TRAIN_DATASET_LENGTH))
eval_dataset = dataset["test"].select(range(EVAL_DATASET_LENGTH))

# Check train dataset before appending 'Prompt' column
# train_dataset.to_pandas().head()
# eval_dataset.to_pandas().head()

In [None]:
INSTRUCTIONS = """
You are instructed to generate a scientifically accurate highlight of the provided passage without additional 
sentences such as headings or introductions before or after the generated text as it will be used as summary 
in a custom dataset. The highlight should sound plausible and should not contain incorrect information. Generate 
3-5 concise highlight points from the provided research paper abstract, covering key contributions, methods and 
outcomes. Each point should contain 10 to 15 words only. Return the points in plain text format without bullets.

No Additional Commentary: Exclude lines like "Here are 3-5 concise highlight points".
"""

EOS_TOKEN = tokenizer.eos_token

def format_abstract_highlight_as_prompt(examples: list):  
    prompts: list[str] = []

    abstracts = examples["Abstract"]
    highlights = examples['Highlight']
    
    for abstract, highlight in zip(abstracts, highlights):
        row_json = [
            {"role": "system", "content": INSTRUCTIONS},
            {"role": "user", "content": abstract},
            # Must add EOS_TOKEN, otherwise your generation will go on forever!
            {"role": "assistant", "content": highlight + EOS_TOKEN}
        ]
        
        prompt = tokenizer.apply_chat_template(
            row_json, 
            tokenize=False, 
            add_generation_prompt=False,
            return_tensors="pt"
        )

        prompts.append(prompt)
        
    return { 
        "Prompt": prompts,
    }

In [None]:
# Append Prompt column on which the model will be trained
train_dataset = train_dataset.map(format_abstract_highlight_as_prompt, batched=True)
eval_dataset = eval_dataset.map(format_abstract_highlight_as_prompt, batched=True) 

# Check train dataset after adding 'Prompt' column
# train_dataset.to_pandas().head()
# eval_dataset.to_pandas().head()

In [None]:
train_dataset[0]['Prompt']

### Setup Model Training And Evaluation Pipeline

In [None]:
from trl import SFTTrainer
from unsloth import is_bfloat16_supported
from unsloth.chat_templates import train_on_responses_only
from transformers import TrainingArguments, DataCollatorForSeq2Seq

#### Setup The Evaluation Metrics Calculator Function

[Reference To Calculate ROUGE Metrics](https://medium.com/@MUmarAmanat/llm-evaluation-with-rouge-0ebf6cf2aed4)

In [None]:
# import nltk
# from nltk.translate.bleu_score import sentence_bleu
import evaluate

In [None]:
# https://huggingface.co/learn/nlp-course/chapter3/3
evaluate.list_evaluation_modules(
    # change this to None to list all metrics, but here we are interested only in 'metric', not in 'comparison' or 'measurement'
    # in case 'rouge' or 'bleu' doesn't show up in the results, we may need to change it to something else to check what other 
    # options are available as module in the evaluate library.
    module_type = 'metric', 
    include_community = True,
    with_details = False
)

In [None]:
# https://huggingface.co/docs/evaluate/package_reference/loading_methods#evaluate.load.path
metric = evaluate.load('rouge', 'bleu')

def compute_metrics(pred):
    print(pred)
    
    references = pred.label_ids
    generated_texts = pred.predictions
    
    # bleu_scores = []
    # for reference, generated_text in zip(references, generated_texts):
    #     reference_text = train_dataset[reference]['text']
    #     bleu_score = sentence_bleu([reference_text], generated_text)
    #     bleu_scores.append(bleu_score)

    # All we need to do is refine pred into ground truth and the summary
    # generated by the model, and pass them as predictions and reference
    scores = metric.compute(
        predictions = original_model_summaries, 
        references = human_baseline_summaries[0: len(original_model_summaries)],
        use_aggregator = True
    )

    print(scores)

    return scores

#### Setup Hyperparameters

We need to specifically check what is the optimal train and eval batch size, to optimally utilize the GPU, so that the kaggle runtime doesn't expire or crash.

In [None]:
PER_DEVICE_TRAIN_BATCH_SIZE = 2
PER_DEVICE_EVAL_BATCH_SIZE = 2
GRADIENT_ACCUMULATION_STEPS = 4
WARMUP_STEPS = 5
SAVE_STEPS = 500
MAX_STEPS = 60
NUM_TRAIN_EPOCHS = 3
SAVE_TOTAL_LIMIT = 2
LEARNING_RATE = 2e-4
WEIGHT_DECAY = 0.01

In [None]:
training_args = TrainingArguments(
    per_device_train_batch_size = PER_DEVICE_TRAIN_BATCH_SIZE,
    per_device_eval_batch_size = PER_DEVICE_EVAL_BATCH_SIZE,
    gradient_accumulation_steps = GRADIENT_ACCUMULATION_STEPS,
    warmup_steps = WARMUP_STEPS,
    eval_strategy="steps",
    eval_steps = 0.2,
    num_train_epochs = NUM_TRAIN_EPOCHS, # Set this to 1 for one full training run
    save_total_limit = SAVE_TOTAL_LIMIT,
    save_steps = SAVE_STEPS,
    max_steps = MAX_STEPS,
    learning_rate = LEARNING_RATE,
    fp16 = not is_bfloat16_supported(),
    bf16 = is_bfloat16_supported(),
    optim = "adamw_8bit",
    weight_decay = WEIGHT_DECAY,
    lr_scheduler_type = "linear",
    seed = 3407,
    output_dir = TRAINED_MODEL_NAME,
    report_to = "none",
    load_best_model_at_end=True,
    push_to_hub=True
)

In [None]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    dataset_text_field = "Prompt", # The field on which to train the model, we have added the generated prompt under 'Prompt' header
    max_seq_length = MAX_SEQ_LEN,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False,
    args = training_args,
    compute_metrics = compute_metrics
)

In [None]:
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>system<|end_header_id|>",
    response_part = "<|start_header_id|>assistant<|end_header_id|>",
)

### Model Training

In [None]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

In [None]:
trainer_stats = trainer.train()

In [None]:
# @title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

### IMPORTANT: DO NOT RUN THIS WHILE TRYING OUT, AS IT SHOWS UP IN HUGGINGFACE HISTORY

In [None]:
trainer.push_to_hub(
    commit_message="first epoch fine tuning on mixsub",
    model_name=TRAINED_MODEL_NAME,
    # language="en",
    # finetuned_from=MODEL_NAME,
    # dataset=DATASET_NAME
)

### Model Evaluation

In [None]:
# This is to evaluate the fine-tuned model on the eval dataset
# it will compute the compute metrics for the model
results = trainer.evaluate()

In [None]:
print(results)