# Math Question Answer Verification Competition

## Starter Code

**_Authors_**
- Chin - CPK286
- Divyansh Agarwal - DA3245
- Rohaan - RNA3535

Borrowed from [official Unsloth implementation](https://colab.research.google.com/drive/1Ys44kVvmeZtnICzWz0xgpRnrIOjZAuxp?usp=sharing#scrollTo=MKX_XKs_BNZR)

### Package Installation

### Continued

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.


In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

## Load model and wrap with LoRA adapters

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 64,
    lora_dropout = 0.001, # Supports any, but = 0 is optimized
    bias = "all",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = True,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

## Competition dataset

In [None]:
# download and load competition dataset

from datasets import load_dataset
dataset = load_dataset("LinNY-DLM/train_dataset")
# print and see dataset
dataset

In [None]:
prompt = """You are a highly skilled mathematician. Determine if the provided Answer and Explanation is the correct answer to the Question. Return True if it’s correct and False if it’s wrong. 

### Question:
{}

### Answer:
{}

### Explanation:
{}

### True/False:
{}"""



EOS_TOKEN = tokenizer.eos_token  # Ensure to add EOS_TOKEN to prevent infinite generation

def formatting_prompts_func(examples):
    question = examples["question"]
    ans       = examples["answer"]
    solution = examples["solution"]
    output      = examples["is_correct"]
    texts = []
    for q, a, s, o in zip(question, ans, solution, output):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = prompt.format(q, a, s, o) + EOS_TOKEN
        # print(text)
        texts.append(text)
    return { "text" : texts, }

In [None]:
formatting_prompts_func(dataset['train'][:2])

In [None]:
complete_train_dataset = dataset['train'].map(formatting_prompts_func, batched = True,)

In [None]:
split_dataset = complete_train_dataset.train_test_split(test_size=0.0025, shuffle = False)
train_dataset = split_dataset['train']
val_dataset = split_dataset['test']

print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")

## SFT
How do we know how many training samples we are actually looking at?

Can be calulated with:
per_device_train_batch_size * gradient_accumulation_steps * max_steps

also max_steps overrides num_train_epochs and takes precedence. 

In [None]:
per_device_train_batch_size = 32
gradient_accumulation_steps = 8
max_steps = 100
print(f'Total training samples: {per_device_train_batch_size * gradient_accumulation_steps * max_steps}')
print(f'Samples per step: {per_device_train_batch_size * gradient_accumulation_steps}')

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
training_args = TrainingArguments(
        per_device_train_batch_size = per_device_train_batch_size,
        gradient_accumulation_steps = gradient_accumulation_steps,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = max_steps,
        warmup_steps = 50,
        learning_rate = 3e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        report_to = "none", # Use this for WandB etc
        eval_strategy="steps",
        eval_steps=50, 
        save_steps=50,            # Save a checkpoint every 10 steps
        save_total_limit=1,
        output_dir = "outputs-A100",
    )


In [None]:
from transformers import TrainerCallback, TrainerControl, TrainerState
import numpy as np

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=4,
    packing=False, 
    args=training_args,
)


In [None]:
# Start training
trainer_stats = trainer.train()

In [None]:
trainer.save_model("outputs-A100")

In [None]:
torch.save(trainer.optimizer.state_dict(), "outputs-A100/optimizer.pt")  # Save optimizer state
torch.save(trainer.lr_scheduler.state_dict(), "outputs-A100/scheduler.pt")  # Save scheduler state
trainer.state.save_to_json("outputs-A100/trainer_state.json")

## saving model

In [None]:
model.save_pretrained("lora_model") # Local saving
tokenizer.save_pretrained("lora_model")