In [7]:
# Source: https://github.com/huggingface/peft/blob/main/examples/int8_training/Finetune_opt_bnb_peft.ipynb

from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import pandas as pd
import torch
from datasets import Dataset

In [8]:
device = torch.cuda.current_device() if torch.cuda.is_available() else "cpu" # This line checks if a GPU is available and sets the device to GPU (e.g., cuda:0) or CPU.
#device = "cpu"
print(device)

# Initialise the model and tokenizer to a pre-trained model. Suggestions: facebook/opt-350m, bigscience/bloom-560m
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m").to(device)
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")

cpu


In [9]:
#get test data

fp = "/Users/eddiezhang/NLP_project-2/DATASETS/work_arrangements_test_set.csv"
df = pd.read_csv(fp)
df.drop("id", axis=1, inplace=True) #get rid of id column
df.rename(columns={"job_ad": "text"}, inplace=True)

data = Dataset.from_pandas(df)
#data = data.map(lambda samples: tokenizer(samples["text"], truncation=True, padding="max_length", max_length=512), batched=True)
#data.set_format("torch")    #conversion to pyTorch tensors

print(data)
print(data[0])

Dataset({
    features: ['text', 'y_true'],
    num_rows: 99
})
{'text': 'Job title: Restaurant Kitchen Hand\nAbstract: We are seeking experienced Kitchen Hand to join our hospitality team.\nEmployer: Catering HQ\nLocation: pitttown\nHighlights: opportunity for growth, Opportunity to work in an industry leading hospitality group, Positive, fun and supportive work culture\nContents: We are currently searching for talented and polished Full Time Kitchen Hands to join our hospitality team.\n Key duties\n Thorough cleaning of the kitchen, including dishes and floors, Food preparation assistance, Stock rotation and stock control, Ensuring to follow all health and safety procedures when caring out all tasks, Any other adhoc duties as required by our fantastic Chefs and Management, Operating a commercial dishwasher to a high standard, Assist with general kitchen duties under the direction of the Head Chef.\n The Person \n Previous relevant experience in a high-volume catering/kitchen hand rol

In [10]:
def prompt_format(example):
    prompt = f"{example['text']}\nWhat is the work arrangement of this job ad? You must return either Onsite, Remote or Hybrid\n Label:"
    return prompt

GPU safety checks

In [11]:
import os

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

print(torch.__version__)
print("CUDA Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device:", torch.cuda.get_device_name(0))
    print("Memory allocated:", torch.cuda.memory_allocated())

2.6.0
CUDA Available: False


In [13]:
# Set the model in evaluation mode
model.eval()

valid = 0
correct = 0
answers = ["Onsite", "Remote", "Hybrid"]

with torch.no_grad():
    for i in range(len(data)):
    #for i in range(10):
        sample = prompt_format(data[i])
        input = tokenizer(sample, return_tensors="pt").to(device)
        output_tokens = model.generate(**input, do_sample=False, num_beams=5, no_repeat_ngram_size=2, early_stopping=True, max_new_tokens=10)

        out_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
        out_text = out_text[len(sample):]   #raw new output
        #print(out_text)

        if out_text[0] in answers:
            valid += 1
            if out_text[0] == sample['y_true']:
                correct += 1

print(f"Valid: {valid} / {len(data)} == {valid / len(data)}")
print(f"correct: {correct} / {len(data)} == {correct / len(data)}")

Valid: 0 / 99 == 0.0
correct: 0 / 99 == 0.0
