In [None]:
! pip install --upgrade transformers==4.50.1 trl
! pip install 'accelerate>=0.26.0'

In [None]:
import copy
import json
import os
import pathlib
import warnings

from datasets import Dataset, load_dataset
import pandas as pd
from peft import LoraConfig, get_peft_model
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import SFTConfig, SFTTrainer
from tqdm import tqdm
from typing import Optional, Callable
import wandb

from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

# Create Dataset

In [7]:
data = pd.read_csv('data/work_arrangements_development_set.csv')

In [8]:
data

Unnamed: 0,id,job_ad,y_true
0,79484040,Job title: CEO\nAbstract: Exciting opportunity...,Remote
1,80331384,Job title: Home-Based Online ESL Teacher (Onli...,Remote
2,79721069,"Job title: Safeguarding, De La Salle\nAbstract...",Hybrid
3,80190376,Job title: Delivery Driver\nAbstract: Pickup t...,OnSite
4,80082230,Job title: Store Supervisor\nAbstract: We are ...,OnSite
...,...,...,...
94,80151196,Job title: Senior Pipeline Technical Director\...,Hybrid
95,79652545,Job title: Customer Support Administrator\nAbs...,OnSite
96,79718230,Job title: Remote Writing Evaluator for AI (As...,Remote
97,80226188,Job title: People & Culture Advisor\nAbstract:...,Hybrid


In [17]:
for i in range(data.shape[0]):
    desc = data.iloc[i].job_ad
    
    if desc.strip()[:10] != 'Job title:':
        print(i)
        print(desc)
        print()

In [18]:
desc

'Job title: Draftsperson\nAbstract: Residential Draftsperson to assist our Custom Design team.\nEmployer: Tallwood Constructions Pty Ltd\nLocation: busselton\nHighlights: Flexibility, Variety, Friendly, efficient team\nContents: Tallwood Custom Built Homes, the South West leading bespoke builders based in Busselton, require the services of a locally based draftsperson experienced in the Western Australian home building industry.\n Our designer has an exciting pipeline of high-end homes and renovations and requires a draftsperson to assist deliver our client expectations. The position can be flexible in both time and working conditions.\n Only West Australian applicants please to apply, with questions and applications to Jenna Miller @'

In [23]:
dataset = []

for i in tqdm(range(data.shape[0])):
    desc = data.iloc[i].job_ad
    
    label = f'{{"work_arrangement": "{data.iloc[i].y_true}"}}.'
    
    messages = []
    messages.append(
        {
            'role': 'system',
            'content': 'You are an expert job ad annotator. Your role is to determine the work arrangement of a job from its description. Classify the work arrangement into one of the following: "Remote", "Hybrid", or "OnSite".'
        }
    )
    messages.append(
        {
            'role': 'user',
            'content': (
                f'{desc}. Extract work arrangement label from this job description. The label must be one of the following: "Remote", "Hybrid", or "OnSite". '
                'Respond in JSON: {"work_arrangement": ""}.'
            )
        }
    )
    messages.append(
        {
            'role': 'assistant',
            'content': label
        }
    )
    
    dataset.append({'messages': messages})

100%|██████████| 99/99 [00:00<00:00, 5541.58it/s]


In [24]:
dataset[0]

{'messages': [{'role': 'system',
   'content': 'You are an expert job ad annotator. Your role is to determine the work arrangement of a job from its description. Classify the work arrangement into one of the following: "Remote", "Hybrid", or "OnSite".'},
  {'role': 'user',
   'content': 'Job title: CEO\nAbstract: Exciting opportunity to lead and progress our work in addressing the harmful impacts of gendered expectations in disaster. \n\n\nEmployer: Marita Ryan Consulting\nLocation: melbourne\nHighlights: Dynamic role in a highly regarded national organisation, Lead change and organisational growth and sustainability, Fully remote working arrangements\nContents: Full time â€“ 3 year contract (subject to Government funding)\n Gender and Disaster Australia Ltd. (GADAus) is the leading national organisation offering evidence-based education, training and resources to address the harmful impacts of gendered expectations in disaster.\n The GADAus Board is currently seeking a highly motivate

In [25]:
with open('work_arrangements_dataset.json', 'w') as f:
    json.dump(dataset, f)

# Finetune Qwen2.5 1.5B

In [3]:
dataset = load_dataset("json", data_files="work_arrangements_dataset.json")

Generating train split: 0 examples [00:00, ? examples/s]

In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['messages'],
        num_rows: 99
    })
})

In [5]:
model_name = "Qwen/Qwen2.5-1.5B-Instruct"

In [6]:
# Load model and tokenizer.
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(
   model_name, padding=True, truncation=True
)

config.json:   0%|          | 0.00/660 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

In [7]:
wandb.login()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [8]:
seed = 123

num_train_epochs = 4
max_steps = -1
bf16 = False
output_dir = 'finetune_qwen_results'
run_name = f"{model_name.split('/')[-1]}-work_arrangements"
output_dir_final = os.path.join(output_dir, run_name)
pathlib.Path(output_dir_final).mkdir(parents=True, exist_ok=True)

# Adjust tokenizer settings as warned by the trainer
tokenizer.padding_side = 'right'

print("Creating trainer...")
training_args = SFTConfig(
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    bf16=bf16,
    tf32=False, # use tf32 for faster training on Ampere GPUs or newer.
    dataloader_pin_memory=False,
    torch_compile=False,
    warmup_steps=5,
    max_steps=max_steps,
    learning_rate=1e-4,
    lr_scheduler_type="cosine",
    weight_decay=0.01,
    logging_strategy="steps",
    save_strategy="steps",
    save_steps=2000,
    save_total_limit=10,
    logging_steps=5,
    output_dir=output_dir_final,
    optim="paged_adamw_8bit",
    remove_unused_columns=True,
    seed=seed,
    run_name=run_name,
    report_to="wandb",
    push_to_hub=False,
)

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    processing_class=tokenizer,
)

print("Training...")
trainer.train()

Creating trainer...


Converting train dataset to ChatML:   0%|          | 0/99 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/99 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/99 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/99 [00:00<?, ? examples/s]

[2025-04-26 08:50:15,238] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


[34m[1mwandb[0m: Currently logged in as: [33mhuwarr[0m. Use [1m`wandb login --relogin`[0m to force relogin


Training...


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
5,2.4727
10,2.1387
15,2.2152
20,2.5509
25,2.2931
30,2.5314
35,2.1435
40,2.1932
45,2.4424
50,2.2973


TrainOutput(global_step=196, training_loss=0.9974052832861032, metrics={'train_runtime': 557.0852, 'train_samples_per_second': 0.711, 'train_steps_per_second': 0.352, 'total_flos': 2001857139784704.0, 'train_loss': 0.9974052832861032})

Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7f2d40096810>> (for post_run_cell), with arguments args (<ExecutionResult object at 7f2f2012ea50, execution_count=8 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 7f2f103da210, raw_cell="seed = 123

num_train_epochs = 4
max_steps = -1
bf.." store_history=True silent=False shell_futures=True cell_id=7b8e63c6-b5e3-4e36-a372-2a0ce14b75fa> result=TrainOutput(global_step=196, training_loss=0.9974052832861032, metrics={'train_runtime': 557.0852, 'train_samples_per_second': 0.711, 'train_steps_per_second': 0.352, 'total_flos': 2001857139784704.0, 'train_loss': 0.9974052832861032})>,),kwargs {}:


TypeError: _WandbInit._pause_backend() takes 1 positional argument but 2 were given

In [9]:
wandb.finish()

Error in callback <bound method _WandbInit._resume_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7f2d40096810>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 7f2d2bf81890, raw_cell="wandb.finish()" store_history=True silent=False shell_futures=True cell_id=4e3cb0f3-2e57-4c1e-b94b-5c95d7641424>,),kwargs {}:


TypeError: _WandbInit._resume_backend() takes 1 positional argument but 2 were given

0,1
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train/grad_norm,▄▃█▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▁▁▁▂▂▂▂▁▂▂▁▁▁▁▂▁▁▁▁
train/learning_rate,███████▇▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁
train/loss,█▇▇█▇█▇▇█▇▄▃▄▄▅▄▄▃▄▄▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
train/mean_token_accuracy,▁▂▂▁▂▁▂▂▁▂▄▅▄▄▃▄▄▅▄▄▆▇▇▇▇▇▆▇▇▇██████████
train/num_tokens,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████

0,1
total_flos,2001857139784704.0
train/epoch,3.92929
train/global_step,196.0
train/grad_norm,1.03072
train/learning_rate,0.0
train/loss,0.0519
train/mean_token_accuracy,0.98778
train/num_tokens,254623.0
train_loss,0.99741
train_runtime,557.0852


# Inference

In [2]:
max_new_tokens = 256
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
checkpoint_path = 'finetune_qwen_results/Qwen2.5-1.5B-Instruct-work_arrangements/checkpoint-196/'

In [3]:
model = AutoModelForCausalLM.from_pretrained(checkpoint_path, device_map="cuda:0")
tokenizer = AutoTokenizer.from_pretrained(
   model_name, padding=True, truncation=True
)

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
os.environ['HF_TOKEN'] = 'hf_...'
model.push_to_hub('qwen_work_arrangements')

[2025-04-27 14:02:15,754] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.18G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/huwar/qwen_work_arrangements/commit/8083dd29168e0ec218a5b005eb812da7c2db0413', commit_message='Upload Qwen2ForCausalLM', commit_description='', oid='8083dd29168e0ec218a5b005eb812da7c2db0413', pr_url=None, repo_url=RepoUrl('https://huggingface.co/huwar/qwen_work_arrangements', endpoint='https://huggingface.co', repo_type='model', repo_id='huwar/qwen_work_arrangements'), pr_revision=None, pr_num=None)

In [4]:
tokenizer.push_to_hub('qwen_work_arrangements')

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/huwar/qwen_work_arrangements/commit/f4ef6db3ad0695d11c0e02bf4f2b9aa790d5c7aa', commit_message='Upload tokenizer', commit_description='', oid='f4ef6db3ad0695d11c0e02bf4f2b9aa790d5c7aa', pr_url=None, repo_url=RepoUrl('https://huggingface.co/huwar/qwen_work_arrangements', endpoint='https://huggingface.co', repo_type='model', repo_id='huwar/qwen_work_arrangements'), pr_revision=None, pr_num=None)

In [12]:
test_df = pd.read_csv('data/work_arrangements_test_set.csv')

Example:

In [13]:
messages_static = [
    {"role": "system", "content": 'You are an expert job ad annotator. Your role is to determine the work arrangement of a job from its description. Classify the work arrangement into one of the following: "Remote", "Hybrid", or "OnSite".'},
]

In [14]:
i = 0

desc = test_df.iloc[i].job_ad

messages = copy.deepcopy(messages_static)
messages.append(
    {
        'role': 'user',
        'content': (
            f'{desc}. Extract work arrangement label from this job description. The label must be one of the following: "Remote", "Hybrid", or "OnSite". '
            'Respond in JSON: {"work_arrangement": ""}.'
        )
    }
)

In [15]:
messages

[{'role': 'system',
  'content': 'You are an expert job ad annotator. Your role is to determine the work arrangement of a job from its description. Classify the work arrangement into one of the following: "Remote", "Hybrid", or "OnSite".'},
 {'role': 'user',
  'content': 'Job title: Restaurant Kitchen Hand\nAbstract: We are seeking experienced Kitchen Hand to join our hospitality team.\nEmployer: Catering HQ\nLocation: pitttown\nHighlights: opportunity for growth, Opportunity to work in an industry leading hospitality group, Positive, fun and supportive work culture\nContents: We are currently searching for talented and polished Full Time Kitchen Hands to join our hospitality team.\n Key duties\n Thorough cleaning of the kitchen, including dishes and floors, Food preparation assistance, Stock rotation and stock control, Ensuring to follow all health and safety procedures when caring out all tasks, Any other adhoc duties as required by our fantastic Chefs and Management, Operating a com

In [16]:
prompt = tokenizer.apply_chat_template(messages, tokenize=False)

In [17]:
prompt

'<|im_start|>system\nYou are an expert job ad annotator. Your role is to determine the work arrangement of a job from its description. Classify the work arrangement into one of the following: "Remote", "Hybrid", or "OnSite".<|im_end|>\n<|im_start|>user\nJob title: Restaurant Kitchen Hand\nAbstract: We are seeking experienced Kitchen Hand to join our hospitality team.\nEmployer: Catering HQ\nLocation: pitttown\nHighlights: opportunity for growth, Opportunity to work in an industry leading hospitality group, Positive, fun and supportive work culture\nContents: We are currently searching for talented and polished Full Time Kitchen Hands to join our hospitality team.\n Key duties\n Thorough cleaning of the kitchen, including dishes and floors, Food preparation assistance, Stock rotation and stock control, Ensuring to follow all health and safety procedures when caring out all tasks, Any other adhoc duties as required by our fantastic Chefs and Management, Operating a commercial dishwasher 

In [18]:
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.inference_mode():
    outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, pad_token_id=tokenizer.eos_token_id)
outputs = outputs[:, inputs['input_ids'].shape[-1]:]
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

In [19]:
response

'assistant\n{"work_arrangement": "OnSite"}.'

In [20]:
assistant_token = 'assistant\n'
response[response.find(assistant_token) + len(assistant_token):]

'{"work_arrangement": "OnSite"}.'

Test set:

In [24]:
# df to store model predictions
test_pred_df = pd.DataFrame(columns=["y_pred"])

In [25]:
messages_static = [
    {"role": "system", "content": 'You are an expert job ad annotator. Your role is to determine the work arrangement of a job from its description. Classify the work arrangement into one of the following: "Remote", "Hybrid", or "OnSite".'},
]

In [26]:
for i in tqdm(range(len(test_df)), position=0, leave=True):
    desc = test_df.iloc[i].job_ad

    messages = copy.deepcopy(messages_static)
    messages.append(
        {
            'role': 'user',
            'content': (
                f'{desc}. Extract work arrangement label from this job description. The label must be one of the following: "Remote", "Hybrid", or "OnSite". '
                'Respond in JSON: {"work_arrangement": ""}.'
            )
        }
    )
    
    prompt = tokenizer.apply_chat_template(messages, tokenize=False)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.inference_mode():
        outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, pad_token_id=tokenizer.eos_token_id)
    outputs = outputs[:, inputs['input_ids'].shape[-1]:]
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    assistant_token = 'assistant\n'
    answer_str = response[response.find(assistant_token) + len(assistant_token):]

    # format the output
    try:
        answer_str_ = answer_str[answer_str.find('{'):answer_str.find('}') + 1]
        answer_str_ = answer_str_.replace('“', '"')
        answer_str_ = answer_str_.replace('”', '"')
        answer = json.loads(answer_str_)
        
        label = answer['work_arrangement']

    except json.JSONDecodeError:
        print(f"Failed to parse model output as JSON: {answer_str}")
        label = "ERROR " + answer_str

    test_pred_df.loc[len(test_pred_df)] = label

100%|██████████| 99/99 [00:56<00:00,  1.75it/s]


In [27]:
# export the dataframe to a new csv file
test_pred_df.to_csv('work_arrangements_test_set_qwen_finetune_preds.csv', index=False)

# Metrics

In [1]:
import pandas as pd
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [2]:
def get_metrics(path_to_preds):
    preds = pd.read_csv(path_to_preds)
    test_df = pd.read_csv('data/work_arrangements_test_set.csv')

    test_df['y_pred'] = preds.y_pred.values

    precision, recall, f1_score, _ = precision_recall_fscore_support(
        test_df.y_true.values, test_df.y_pred.values, labels=['Remote', 'Hybrid', 'OnSite']
    )
    precision_macro, recall_macro, f1_score_macro, _ = precision_recall_fscore_support(
        test_df.y_true.values, test_df.y_pred.values, average='macro'
    )
    precision_micro, recall_micro, f1_score_micro, _ = precision_recall_fscore_support(
        test_df.y_true.values, test_df.y_pred.values, average='micro'
    )
    precision_weighted, recall_weighted, f1_score_weighted, _ = precision_recall_fscore_support(
        test_df.y_true.values, test_df.y_pred.values, average='weighted'
    )
    acc = accuracy_score(test_df.y_true.values, test_df.y_pred.values)
    
    print(f'Accuracy: {round(acc * 100, 1)}')
    res = pd.DataFrame(
        {
                'Precision': [*[round(p * 100, 1) for p in precision], "", round(precision_macro * 100, 1), round(precision_weighted * 100, 1), round(precision_micro * 100, 1)],
                'Recall': [*[round(r * 100, 1) for r in recall], "", round(recall_macro * 100, 1), round(recall_weighted * 100, 1), round(recall_micro * 100, 1)],
                'F1-score': [*[round(f1 * 100, 1) for f1 in f1_score], "", round(f1_score_macro * 100, 1), round(f1_score_weighted * 100, 1), round(f1_score_micro * 100, 1)],
        },
        index=['Remote', 'Hybrid', 'OnSite', "", 'macro average', 'weighted average', 'micro average']
    )
    return res

In [3]:
get_metrics('work_arrangements_test_set_qwen_finetune_preds.csv')

Accuracy: 89.9


Unnamed: 0,Precision,Recall,F1-score
Remote,88.5,88.5,88.5
Hybrid,85.2,85.2,85.2
OnSite,93.5,93.5,93.5
,,,
macro average,89.0,89.0,89.0
weighted average,89.9,89.9,89.9
micro average,89.9,89.9,89.9
