In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any
dtype = torch.bfloat16 # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = False 

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
checkpoint_path='LinNY-DLM/'

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(checkpoint_path)

In [None]:
# download and load competition dataset
from datasets import load_dataset
dataset = load_dataset("ad6398/nyu-dl-teach-maths-comp")
# print and see dataset
dataset

In [None]:
prompt = """You are a highly skilled mathematician. Determine if the provided Answer and Explanation to a math question is correct or incorrect. Return True if it’s correct and False if it’s wrong. 

### Question:
{}

### Answer:
{}

### Explanation:
{}

### True/False:
{}"""


EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    question = examples["question"]
    ans       = examples["answer"]
    solution = examples["solution"]
    output      = examples["is_correct"]
    texts = []
    for q, a, s, o in zip(question, ans, solution, output):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = prompt.format(q, a, s, o) + EOS_TOKEN
        # print(text)
        texts.append(text)
    return { "text" : texts, }

In [None]:
complete_train_dataset = dataset['train'].map(formatting_prompts_func, batched = True,)
split_dataset = complete_train_dataset.train_test_split(test_size=0.001, seed=100)
train_dataset = split_dataset['train']
val_dataset = split_dataset['test']

print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")

In [None]:
per_device_train_batch_size = 32
gradient_accumulation_steps = 8
max_steps = 270
print(f'Total training samples: {per_device_train_batch_size * gradient_accumulation_steps * max_steps}')
print(f'Samples per step: {per_device_train_batch_size * gradient_accumulation_steps}')

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
training_args = TrainingArguments(
        per_device_train_batch_size = per_device_train_batch_size,
        gradient_accumulation_steps = gradient_accumulation_steps,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = max_steps,
        warmup_steps = int(0.1 * max_steps),
        learning_rate = 1e-5,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        max_grad_norm=1,
        lr_scheduler_type = "linear",
        seed = 3407,
        resume_from_checkpoint=checkpoint_path,
        report_to = "none", # Use this for WandB etc
        eval_strategy="steps",
        eval_steps=30, 
        save_steps=30,            # Save a checkpoint every 10 steps
        save_total_limit=2,
        output_dir = "outputs/",
        load_best_model_at_end=True,
    )

In [None]:
from transformers import TrainerCallback, TrainerControl, TrainerState,  EarlyStoppingCallback
import numpy as np

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=4,
    packing=False,
    callbacks=[
        EarlyStoppingCallback(early_stopping_patience=3)
    ],
    args=training_args,
)

In [None]:
trainer.train()

Move on to inference.
