fine-tuned Causal model via lora - Adapted from week5 tutorial

In [3]:
# Source: https://github.com/huggingface/peft/blob/main/examples/int8_training/Finetune_opt_bnb_peft.ipynb

from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import pandas as pd
import torch
from datasets import Dataset
from peft import get_peft_config, get_peft_model, LoraConfig, TaskType

In [None]:
device = torch.cuda.current_device() if torch.cuda.is_available() else "cpu" # This line checks if a GPU is available and sets the device to GPU (e.g., cuda:0) or CPU.
#device = "cpu"
print(device)

# Initialise the model and tokenizer to a pre-trained model.
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m").to(device)
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")

print(tokenizer.vocab_size) 

cpu
50265


GPU safety checks

In [5]:
import os

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

print(torch.__version__)
print("CUDA Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device:", torch.cuda.get_device_name(0))
    print("Memory allocated:", torch.cuda.memory_allocated())

2.6.0
CUDA Available: False


Lora Training

In [6]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1
)

model = get_peft_model(model, peft_config)

#checks
model.print_trainable_parameters()
type(model)

trainable params: 786,432 || all params: 331,982,848 || trainable%: 0.2369


peft.peft_model.PeftModelForCausalLM

Training Data

In [None]:
import pandas as pd

fp = "../../MISC/work_arrangements_development_set.csv"
df = pd.read_csv(fp)
df.drop("id", axis=1, inplace=True) #get rid of id column
df.rename(columns={"job_ad": "text"}, inplace=True)

data = Dataset.from_pandas(df)

print(df)

                                                 text  y_true
0   Job title: CEO\nAbstract: Exciting opportunity...  Remote
1   Job title: Home-Based Online ESL Teacher (Onli...  Remote
2   Job title: Safeguarding, De La Salle\nAbstract...  Hybrid
3   Job title: Delivery Driver\nAbstract: Pickup t...  OnSite
4   Job title: Store Supervisor\nAbstract: We are ...  OnSite
..                                                ...     ...
94  Job title: Senior Pipeline Technical Director\...  Hybrid
95  Job title: Customer Support Administrator\nAbs...  OnSite
96  Job title: Remote Writing Evaluator for AI (As...  Remote
97  Job title: People & Culture Advisor\nAbstract:...  Hybrid
98  Job title: Draftsperson\nAbstract: Residential...  Hybrid

[99 rows x 2 columns]


Training

In [8]:
import transformers

def train_prompt_format(example):
    label = example['y_true']
    return f"{example['text']}\nWhat is the work arrangement of this job ad? You must return either Onsite, Remote or Hybrid\n Label: {label}"

data = data.map(lambda samples: {"prompt": train_prompt_format(samples)}, batched=False,remove_columns=["text", "y_true"])   #remove unneeded labels(both saved into prompt)
data = data.map(lambda samples: tokenizer(samples['prompt'], truncation=True, padding="max_length", max_length=2048), batched=True)  # Tokenize with fixed length
data = data.map(lambda samples: {"labels": samples["input_ids"]}, batched=False)  # Set labels as input_ids for causal LM

Map:   0%|          | 0/99 [00:00<?, ? examples/s]

Map:   0%|          | 0/99 [00:00<?, ? examples/s]

Map:   0%|          | 0/99 [00:00<?, ? examples/s]

In [None]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=data,
    args=transformers.TrainingArguments(
        num_train_epochs=3,
        learning_rate=2e-5,
        gradient_accumulation_steps=4,
        per_device_train_batch_size=4, 
        output_dir="./outputs",
        logging_steps=10,           # Log every 10 steps to monitor progress
        disable_tqdm=False,         # Enable progress bar (optional)
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,3.291


TrainOutput(global_step=18, training_loss=3.294173240661621, metrics={'train_runtime': 3965.4271, 'train_samples_per_second': 0.075, 'train_steps_per_second': 0.005, 'total_flos': 979179461935104.0, 'train_loss': 3.294173240661621, 'epoch': 2.64})

eval

In [None]:
#get test data

fp = "../../MISC/work_arrangements_test_set.csv"
df = pd.read_csv(fp)
df.drop("id", axis=1, inplace=True) #get rid of id column
df.rename(columns={"job_ad": "text"}, inplace=True)

data = Dataset.from_pandas(df)

print(data)
print(data[0])

Dataset({
    features: ['text', 'y_true'],
    num_rows: 99
})
{'text': 'Job title: Restaurant Kitchen Hand\nAbstract: We are seeking experienced Kitchen Hand to join our hospitality team.\nEmployer: Catering HQ\nLocation: pitttown\nHighlights: opportunity for growth, Opportunity to work in an industry leading hospitality group, Positive, fun and supportive work culture\nContents: We are currently searching for talented and polished Full Time Kitchen Hands to join our hospitality team.\n Key duties\n Thorough cleaning of the kitchen, including dishes and floors, Food preparation assistance, Stock rotation and stock control, Ensuring to follow all health and safety procedures when caring out all tasks, Any other adhoc duties as required by our fantastic Chefs and Management, Operating a commercial dishwasher to a high standard, Assist with general kitchen duties under the direction of the Head Chef.\n The Person \n Previous relevant experience in a high-volume catering/kitchen hand rol

In [12]:
def prompt_format(example):
    prompt = f"{example['text']}\nWhat is the work arrangement of this job ad? You must return either Onsite, Remote or Hybrid\n Label:"
    return prompt

In [None]:
# Set the model in evaluation mode
model.eval()

valid = 0
correct = 0
answers = ["Onsite", "Remote", "Hybrid"]

with torch.no_grad():
    for i in range(len(data)):
    #for i in range(10):
        sample = prompt_format(data[i])
        input = tokenizer(sample, return_tensors="pt").to(device)
        output_tokens = model.generate(**input, do_sample=False, num_beams=5, no_repeat_ngram_size=2, early_stopping=True, max_new_tokens=5)

        out_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
        out_text = out_text[len(sample):]   #raw new output

        if out_text[0] in answers:
            valid += 1
            if out_text[0] == sample['y_true']:
                correct += 1

print(f"Valid: {valid} / {len(data)} == {valid / len(data)}")
print(f"Correct(Accuracy): {correct} / {len(data)} == {correct / len(data)}")

 Job Description
Job Description
 This ad is for a


Job Title:
 Job Description
LaunchPad


Job Title:
 Yes

Job Description
 Job Description
The Business
 Work From Home


 Job Description
Job Description
 "Teleradiologist"
 Job Description
We are


Job Title:
 "Job Description"

 This ad is for a
 "Customer Xperience


Job Title:
 Cardiothorac
 https://www.gl
 https://www.air
 "Remote Financial Adviser"
 https://www.creat
 "Jord International"
 Job Description
Job Description
 "Social Media and Content


We are looking
 "Transport Operations"
 Work from Home


 Job Description
Job Description
 Job Description
KMC
 "Campaigns & Digital
 "Dental Assistant/
 The job title is Train
 https://www.av
 Job Description
Job Description
 Job Description
Bitut

Job Description
Microsoft
 "Administration Clerk"


Job Title:


Job Description:
 Job Description
Job Description
 Job Description:
The
 "Digital PR Executive"
 "Open to new grad


Job Title:


Job Title:
 "HR Advisor"

 "Clear Health M