In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from datasets import load_dataset
from utils import *
from trl import SFTTrainer

# Step 1: Load the model and tokenizer
model_name = "gpt2-large"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Ensure the tokenizer has a padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id

# Step 2: Load and preprocess the SQuAD dataset
dataset = load_dataset("squad")

def preprocess_function(examples):
    contexts = examples['context']
    questions = examples['question']
    answers = examples['answers']
    
    prompts = [
        f"Context: {context}\nQuestion: {question}\nAnswer:"
        for context, question in zip(contexts, questions)
    ]
    
    targets = [answer['text'][0] for answer in answers]
    
    return {
        "prompt": prompts,
        "target": targets
    }

tokenized_dataset = dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=dataset["train"].column_names
)

model.to("cuda")

ltype = "curlora"
for param in model.parameters():
        param.requires_grad = False

for name, module in model.named_modules():
    if isinstance(module, type(model.transformer.h[0].attn)):
        if ltype == "lora":
            module.c_attn = LinearWithLoRA(module.c_attn, 24, 1)
        else:
            module.c_attn = LinearWithCURLoRA(module.c_attn, 24, 1)

total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total trainable parameters after: {total_params:,}")
model.to("cuda")

def generate_answer(context, question = None):
    if question:
        prompt = f"Context: {context}\nQuestion: {question}\nAnswer:"
    else:
        prompt = context
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens = 25,
                            pad_token_id= tokenizer.eos_token_id,
                            eos_token_id= tokenizer.eos_token_id)#, num_return_sequences=1, do_sample=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


# Example usage
context = "The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ('Norman' comes from 'Norseman') raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia."
question = "Who were the Normans descended from?"

generated_answer = generate_answer(context, question)
print("Generated Answer:", generated_answer)

  warn(


Total trainable parameters after: 20,736
Generated Answer: Context: The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ('Norman' comes from 'Norseman') raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia.
Question: Who were the Normans descended from?
Answer: The Normans were descended from the Norsemen who invaded and conquered the northern parts of Europe in the 10th and 11th


In [2]:
generated_answer = generate_answer(tokenized_dataset["train"][13]["prompt"])
print("Generated Answer:", generated_answer)

Generated Answer: Context: The university is the major seat of the Congregation of Holy Cross (albeit not its official headquarters, which are in Rome). Its main seminary, Moreau Seminary, is located on the campus across St. Joseph lake from the Main Building. Old College, the oldest building on campus and located near the shore of St. Mary lake, houses undergraduate seminarians. Retired priests and brothers reside in Fatima House (a former retreat center), Holy Cross House, as well as Columba Hall near the Grotto. The university through the Moreau Seminary has ties to theologian Frederick Buechner. While not Catholic, Buechner has praised writers from Notre Dame and Moreau Seminary created a Buechner Prize for Preaching.
Question: What individuals live at Fatima House at Notre Dame?
Answer: The house is a residence for retired priests and brothers. It is located on the campus of Notre Dame. The house is a


In [3]:
tokenized_dataset["train"][13]["target"]

'Retired priests and brothers'

In [4]:
# Step 3: Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    #num_train_epochs=3,
    max_steps = 300,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    warmup_steps=30,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    eval_steps=60,
    save_steps=120,
    gradient_accumulation_steps=4,
    learning_rate=2.5e-4,
    eval_strategy="steps",
    save_strategy="steps",
    load_best_model_at_end=True,
    push_to_hub = False,
)

# Step 4: Initialize the SFTTrainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"].select(range(300)),
    dataset_text_field="prompt",
    max_seq_length=512,
)

# Step 5: Train the model
trainer.train()


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
max_steps is given, it will override any value given in num_train_epochs


Step,Training Loss,Validation Loss
60,3.0042,2.284526
120,3.0138,2.268037
180,2.9788,2.256925
240,2.9985,2.249362
300,2.9307,2.246941


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


TrainOutput(global_step=300, training_loss=3.0027708943684894, metrics={'train_runtime': 355.4773, 'train_samples_per_second': 3.376, 'train_steps_per_second': 0.844, 'total_flos': 916574914669056.0, 'train_loss': 3.0027708943684894, 'epoch': 0.013698786515827807})

In [5]:
# Step 6: Save the fine-tuned model
#trainer.save_model("./fine_tuned_gpt2_large_squad")

# Step 7: Test the model
# Example usage
context = "The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ('Norman' comes from 'Norseman') raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia."
question = "Who were the Normans descended from?"

generated_answer = generate_answer(context, question)
print("Generated Answer:", generated_answer)

Generated Answer: Context: The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ('Norman' comes from 'Norseman') raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia.
Question: Who were the Normans descended from?
Answer: The Normans were descended from the Norsemen who invaded and conquered the northern parts of Europe in the 10th and 11th


In [6]:
generated_answer = generate_answer(tokenized_dataset["train"][13]["prompt"])
print("Generated Answer:", generated_answer)

Generated Answer: Context: The university is the major seat of the Congregation of Holy Cross (albeit not its official headquarters, which are in Rome). Its main seminary, Moreau Seminary, is located on the campus across St. Joseph lake from the Main Building. Old College, the oldest building on campus and located near the shore of St. Mary lake, houses undergraduate seminarians. Retired priests and brothers reside in Fatima House (a former retreat center), Holy Cross House, as well as Columba Hall near the Grotto. The university through the Moreau Seminary has ties to theologian Frederick Buechner. While not Catholic, Buechner has praised writers from Notre Dame and Moreau Seminary created a Buechner Prize for Preaching.
Question: What individuals live at Fatima House at Notre Dame?
Answer: The house is a residence for retired priests and brothers. It is located on the campus of Notre Dame. The house is a


In [7]:
generated_answer = generate_answer(tokenized_dataset["validation"][1311]["prompt"])
print("Generated Answer:", generated_answer)
print("\nActual Answer:", tokenized_dataset["validation"][1311]["target"])

Generated Answer: Context: After leaving Edison's company Tesla partnered with two businessmen in 1886, Robert Lane and Benjamin Vail, who agreed to finance an electric lighting company in Tesla's name, Tesla Electric Light & Manufacturing. The company installed electrical arc light based illumination systems designed by Tesla and also had designs for dynamo electric machine commutators, the first patents issued to Tesla in the US.
Question: What did lane and vail finance?
Answer: Tesla Electric Light & Manufacturing was a company that was incorporated in 1887 and was incorporated in 1891. Lane and Vail

Actual Answer: Tesla Electric Light & Manufacturing
