# Loading

In [None]:
! pip install --upgrade transformers==4.50.1

In [3]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from transformers import pipeline
import torch
import string
import re
from bs4 import BeautifulSoup
import json
import copy

In [5]:
pipe = pipeline(
    "text-generation", model="google/gemma-3-1b-it", device="cuda", torch_dtype=torch.bfloat16,
    token='' # put your HF access token here
)

Device set to use cuda


# Seniority

In [6]:
train_df = pd.read_csv('data/seniority_labelled_development_set_cleaned.csv')
test_df = pd.read_csv('data/seniority_labelled_test_set_cleaned.csv')

In [7]:
train_df.iloc[1131]

job_id                                                             33027740
job_title                                                   Fitter & Welder
job_summary               A food manufacturing company has an opportunit...
job_ad_details            A well known food manufacturer operating since...
classification_name                    Manufacturing, Transport & Logistics
subclassification_name                                                Other
y_true                                                          experienced
Name: 1131, dtype: object

# RBIC Functions

### RB


In [8]:
#== step 1: role explanation
def step_1():
    messages = [
        {"role": "system", "content": "You are an expert job ad annotator. Your task is to infer the seniority information from job descriptions. The seniority label may be present in the set: [intermediate, senior, lead, head, experienced, entry-level, executive, assistant, senior/lead, deputy, director, trainee, associate, graduate, junior, general-manager, coordinator, student, chief, principal, apprentice, qualified, entry-level to intermediate, senior associate, standard, senior assistant, specialist, mid-level, entry level assistant, experienced assistant, manager, graduate/junior, independent, 1st year apprentice, senior-executive, junior assistant, assistant manager, supervisor, second-in-command, associate director, board, 4th year apprentice, mid-senior, regional head, middle-management, advanced, 2nd year apprentice, intermediate apprentice, level 2, assistant head, owner, post-doctoral, owner-operator, middle management, senior head, assistant director, junior-intermediate, sous, intermediate to senior, senior executive] . If not present in the set, then create a label."},
        {"role": "user", "content": "Based on your role, can you briefly explain what seniority means, and what seniority labels look like?"}
    ]

    response = pipe(messages, max_new_tokens=1000)
    messages.append(response[0]["generated_text"][-1])

    return response[0]["generated_text"][-1]["content"], messages

In [9]:
#== few-shot examples
def fewshot(messages):
    # messages.append({
    #     "role": "user",
    #     "content": "I will provide you with some examples on how to accomplish your task"
    # })

    # response = client.chat.completions.create(
    #     model=model,
    #     messages=messages
    # )
    # messages.append({"role": "assistant", "content": response.choices[0].message.content})

    few_shot_indices = [2081,1,5,12,3]

    for i in few_shot_indices:
        desc = {
          "job_title": train_df.iloc[i].job_title,
          "job_summary": train_df.iloc[i].job_summary,
          "job_ad_details": train_df.iloc[i].job_ad_details,
          "classification_name": train_df.iloc[i].classification_name,
          "subclassification_name": train_df.iloc[i].subclassification_name
        }
        desc_str = str(desc)

        # add the description
        messages.append({
          "role": "user",
          "content": desc_str
        })

        label = train_df.iloc[i].y_true
        label_str = str(label)

        # add the output
        messages.append({
            "role": "assistant",
            "content": label_str
        })

    return messages

In [10]:
#== step 2: setting sub-task --> ask for seniority patterns
def step_2(messages):
    messages.append({
        "role": "user",
        "content": "As a seniority label predictor, what are some common phrases or patterns that indicate a seniority label in a job description?"
    })

    response = pipe(messages, max_new_tokens=1000)
    messages.append(response[0]["generated_text"][-1])

    return response[0]["generated_text"][-1]["content"], messages

### IC

In [11]:
#== step 3: presence of seniority (skipped)
def step_3(messages_static, desc_str):
    messages = copy.deepcopy(messages_static)

    messages.append({
        "role": "user",
        "content": f"{desc_str} does this job description include any seniority-related information? Just respond with 'Yes' or 'No'."
    })

    response = pipe(messages, max_new_tokens=1000)
    messages.append(response[0]["generated_text"][-1])

    p3_content = response[0]["generated_text"][-1]["content"]

    # clean and check the response
    response_p3 = p3_content.translate(str.maketrans('', '', string.punctuation))
    response_p3 = response_p3.strip().lower()[:3]
    #print(f"step3 messages: {len(messages)}")
    return response_p3, messages

In [12]:
#== step 4: iterative coaching/finding clues to prevent hallucination
def step_4(response_p3, messages):
    messages.append({
            "role": "user",
            "content": "Extract the seniority-related phrases from the text verbatim. Respond in JSON: {\"Clue\": \"\"}."
        })

    response = pipe(messages, max_new_tokens=1000)
    messages.append(response[0]["generated_text"][-1])
    #print(f"step4 messages: {len(messages)}")

    return response[0]["generated_text"][-1]["content"], messages

In [13]:
#== step 5: use the clue to generate the final output
def step_5(messages):
    messages.append({
        "role": "user",
        "content": (
            "Based on the extracted seniority clue, return a structured seniority label in the format {\"seniority_label\": \"\"}. "
                        
        )
    })

    response = pipe(messages, max_new_tokens=1000)
    answer_str = response[0]["generated_text"][-1]["content"]
    
    # format and print the output
    try:
        answer_str_ = answer_str[answer_str.find('{'):answer_str.find('}') + 1]
        answer_str_ = answer_str_.replace('“', '"')
        answer_str_ = answer_str_.replace('”', '"')
        answer = json.loads(answer_str_)
        
        if 'seniority_label' in answer:
            label = answer['seniority_label']
        else:
            print(f"Failed to parse model output: {answer_str}")
            label = "ERROR " + answer_str
    except json.JSONDecodeError:
        print(f"Failed to parse model output: {answer_str}")
        label = "ERROR " + answer_str

    return label, answer_str

In [14]:
def RBIC_static_messages(verbose=False, add_fewshot=True):
    response, messages = step_1()
    if verbose: print(f"RB step 1: {response}\n")

    response, messages = step_2(messages)
    if verbose: print(f"RB step 2: {response}\n")

    if add_fewshot:
        messages = fewshot(messages)
        if verbose: print(f"Fewshot examples added\n")
    return messages

In [15]:
def RBIC(messages, desc_str, verbose=False):
    response_p3, messages_local = step_3(messages, desc_str)
    if verbose: print(f"IC step 1: {response_p3}\n")

    response, messages_local = step_4(response_p3, messages_local)
    if verbose: print(f"IC step 2: {response}\n")

    label, answer_str = step_5(messages_local)
    if verbose: print(f"IC step 3 (Final): {label}\n")
    if verbose: print(f"IC step 3 (Final Raw): {answer_str}\n")

    return str(label)

# Testing

### Qualitative Tests

In [16]:
messages_static = RBIC_static_messages(verbose=True, add_fewshot=True)

RB step 1: Okay, I understand. I'm ready to analyze job descriptions and infer seniority levels. 

**Here’s a breakdown of what seniority means and some examples of how I’ll label the labels you provided:**

**What is Seniority?**

Seniority refers to an individual's level of responsibility, experience, and authority within an organization. It's essentially a measure of their impact and how much influence they wield. It’s not just about years of experience; it's about *how* you’ve used that experience. It often signifies a higher level of strategic thinking, leadership, and decision-making capability.  It can also encompass mentorship, training, and overseeing others.

**How I'll Label the Seniority Levels:**

I'll use a combination of these strategies:

*   **Context is Key:** I'll analyze the specific duties, responsibilities, and required skills mentioned in the job description.
*   **Experience & Authority:** I’ll consider the amount of experience, direct reports, decision-making p

In [17]:
ind = 28

#[2081,1,5,12,3]

desc = {
    "job_title": train_df.iloc[ind].job_title,
    "job_summary": train_df.iloc[ind].job_summary,
    "job_ad_details": train_df.iloc[ind].job_ad_details,
    "classification_name": train_df.iloc[ind].classification_name,
    "subclassification_name": train_df.iloc[ind].subclassification_name,
  }
desc_str = str(desc)

label = train_df.iloc[ind].y_true
label_str = str(label)

print(f"len of messages_static {len(messages_static)}")
label_pred = RBIC(messages_static, desc_str, verbose=True)


print(f"pred = {label_pred}")
print(f"truth = {label_str}")

len of messages_static 15
IC step 1: yes

IC step 2: ```json
{"Clue": "Responsible for…”\xa0Manage…”\xa0Makes Decisions…”\xa0Develops…”\xa0Influences…”\xa0Leads…”\xa0Utilizes…”\xa0Owns…”\xa0Works with…”\xa0Maintains…”\xa0Develops strategies”\xa0Supports team”\xa0Provides guidance to”\xa0Conducts”\xa0Applies lean thinking”\xa0Demonstrates leadership”\xa0Utilizes ISO 9001”\xa0Spearheads”\xa0Leading and inspiring”\xa0Oversees”\xa0Covers”\xa0Developing policies”\xa0Ensuring systems”\xa0Monitoring”\xa0Advices”\xa0Investigates”\xa0Develops strategy”}
```

IC step 3 (Final): Senior

IC step 3 (Final Raw): {"seniority_label": "Senior"}

pred = Senior
truth = senior


### Quantitative Tests

In [18]:
# df to store model predictions
test_pred_df = pd.DataFrame(columns=["y_pred"])

In [19]:
test_pred_df

Unnamed: 0,y_pred


In [20]:
for i in tqdm(range(len(test_df))):
    desc = {
        "job_title": test_df.iloc[i].job_title,
        "job_summary": test_df.iloc[i].job_summary,
        "job_ad_details": test_df.iloc[i].job_ad_details,
        "classification_name": test_df.iloc[i].classification_name,
        "subclassification_name": test_df.iloc[i].subclassification_name,
    }
    desc_str = str(desc)

    label = test_df.iloc[i].y_true
    label_str = str(label)

    label_pred = RBIC(messages_static, desc_str)

    test_pred_df.loc[len(test_pred_df)] = label_pred.lower()

# export the dataframe to a new csv file
test_pred_df.to_csv('seniority_labelled_test_set_gemma3_rbic_fewshot_preds.csv', index=False)

  0%|          | 1/689 [00:01<22:07,  1.93s/it]

Failed to parse model output: {"Clue": "Senior"}


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
 15%|█▍        | 103/689 [05:16<24:03,  2.46s/it] 

Failed to parse model output: {"Clue": "Yes"}


 25%|██▌       | 173/689 [08:50<23:52,  2.78s/it]

Failed to parse model output: {"Clue": "Senior"}


 37%|███▋      | 258/689 [14:50<20:36,  2.87s/it]  

Failed to parse model output: {"Clue": "Senior"}


 46%|████▌     | 315/689 [18:17<1:32:57, 14.91s/it]

Failed to parse model output: Senior


 59%|█████▉    | 407/689 [24:48<12:34,  2.68s/it]  

Failed to parse model output: {"Clue": "senior"}


 69%|██████▉   | 475/689 [29:19<10:25,  2.92s/it]

Failed to parse model output: {"Clue": "Senior"}


 75%|███████▌  | 518/689 [31:59<07:41,  2.70s/it]

Failed to parse model output: {"Clue": "Senior"}


 81%|████████  | 556/689 [34:13<05:11,  2.35s/it]

Failed to parse model output: {"Clue": "Senior"}


 87%|████████▋ | 600/689 [37:21<21:28, 14.48s/it]

Failed to parse model output: Okay, please provide the text of the job description. I need the text to extract the seniority information and construct the JSON output.


100%|██████████| 689/689 [43:33<00:00,  3.79s/it]


# No few-shot

In [21]:
messages_static = RBIC_static_messages(verbose=True, add_fewshot=False)

RB step 1: Okay, I understand. I'm ready to analyze job descriptions and infer seniority levels based on the provided labels.

**Here's a brief explanation of seniority:**

Seniority, in the context of job roles, refers to an individual's level of experience, responsibility, and influence within an organization. It’s often a subjective measure, but generally implies a higher level of expertise, leadership, and decision-making authority. It’s a way of ranking individuals based on their accomplishments and contributions.

**Here’s a breakdown of the seniority levels I’m expecting to recognize:**

I’ll be looking for a combination of:

*   **Experience:**  A significant amount of relevant work experience (years, projects, responsibilities).
*   **Responsibility:**  The scope of tasks they oversee, the level of strategic impact their work has.
*   **Leadership:**  Ability to guide, mentor, and influence others.
*   **Technical Expertise:**  Depth of knowledge and skill in their field.
*   

In [22]:
# df to store model predictions
test_pred_df = pd.DataFrame(columns=["y_pred"])
test_pred_df

Unnamed: 0,y_pred


In [23]:
for i in tqdm(range(len(test_df))):
    desc = {
        "job_title": test_df.iloc[i].job_title,
        "job_summary": test_df.iloc[i].job_summary,
        "job_ad_details": test_df.iloc[i].job_ad_details,
        "classification_name": test_df.iloc[i].classification_name,
        "subclassification_name": test_df.iloc[i].subclassification_name,
    }
    desc_str = str(desc)

    label = test_df.iloc[i].y_true
    label_str = str(label)

    label_pred = RBIC(messages_static, desc_str)

    test_pred_df.loc[len(test_pred_df)] = label_pred.lower()
    
test_pred_df.to_csv('seniority_labelled_test_set_gemma3_rbic_preds.csv', index=False)

 91%|█████████ | 627/689 [1:04:33<10:52, 10.52s/it]

Failed to parse model output: Please provide me with the text from the job description! I need the text to extract the seniority-related phrases and generate the JSON structure.


100%|██████████| 689/689 [1:11:17<00:00,  6.21s/it]


# Regular prompting

In [24]:
# df to store model predictions
test_pred_df = pd.DataFrame(columns=["y_pred"])

In [25]:
messages_static = [
    {"role": "system", "content": "You are an expert job ad annotator. Your task is to infer the seniority information from job descriptions. The seniority label may be present in the set: [intermediate, senior, lead, head, experienced, entry-level, executive, assistant, senior/lead, deputy, director, trainee, associate, graduate, junior, general-manager, coordinator, student, chief, principal, apprentice, qualified, entry-level to intermediate, senior associate, standard, senior assistant, specialist, mid-level, entry level assistant, experienced assistant, manager, graduate/junior, independent, 1st year apprentice, senior-executive, junior assistant, assistant manager, supervisor, second-in-command, associate director, board, 4th year apprentice, mid-senior, regional head, middle-management, advanced, 2nd year apprentice, intermediate apprentice, level 2, assistant head, owner, post-doctoral, owner-operator, middle management, senior head, assistant director, junior-intermediate, sous, intermediate to senior, senior executive] . If not present in the set, then create a label."},
]

In [26]:
for i in tqdm(range(len(test_df))):
    desc = {
      "job_title": test_df.iloc[i].job_title,
      "job_summary": test_df.iloc[i].job_summary,
      "job_ad_details": test_df.iloc[i].job_ad_details,
      "classification_name": test_df.iloc[i].classification_name,
      "subclassification_name": test_df.iloc[i].subclassification_name
    }
    desc_str = str(desc)

    messages = copy.deepcopy(messages_static)
    messages.append({
        "role": "user",
        "content": (
            f"{desc_str} Extract seniority label from this job description. The seniority label may be present in the set: [intermediate, senior, lead, head, experienced, entry-level, executive, assistant, senior/lead, deputy, director, trainee, associate, graduate, junior, general-manager, coordinator, student, chief, principal, apprentice, qualified, entry-level to intermediate, senior associate, standard, senior assistant, specialist, mid-level, entry level assistant, experienced assistant, manager, graduate/junior, independent, 1st year apprentice, senior-executive, junior assistant, assistant manager, supervisor, second-in-command, associate director, board, 4th year apprentice, mid-senior, regional head, middle-management, advanced, 2nd year apprentice, intermediate apprentice, level 2, assistant head, owner, post-doctoral, owner-operator, middle management, senior head, assistant director, junior-intermediate, sous, intermediate to senior, senior executive] . If not present in the set, then create a label. "
            "Respond in JSON: {\"seniority_label\": \"\"}."
        )
    })
    
    response = pipe(messages, max_new_tokens=1000)
    answer_str = response[0]["generated_text"][-1]["content"]

    # format and print the output
    try:
        answer_str_ = answer_str[answer_str.find('{'):answer_str.find('}') + 1]
        answer_str_ = answer_str_.replace('“', '"')
        answer_str_ = answer_str_.replace('”', '"')
        answer = json.loads(answer_str_)
        
        if 'seniority_label' in answer:
            label = answer['seniority_label']
        else:
            print(f"Failed to parse model output: {answer_str}")
            label = "ERROR " + answer_str
    except json.JSONDecodeError:
        print(f"Failed to parse model output: {answer_str}")
        label = "ERROR " + answer_str

    if type(label) == list:
        label = '/'.join(label)
    test_pred_df.loc[len(test_pred_df)] = label.lower()

# export the dataframe to a new csv file
test_pred_df.to_csv('seniority_labelled_test_set_gemma3_preds.csv', index=False)

100%|██████████| 689/689 [07:55<00:00,  1.45it/s]


# Metrics

In [1]:
import json
import string
import pandas as pd
import numpy as np
import ast

In [2]:
def categories(label):
    lab = str(label).lower()
    if 'entry' in lab:
        return 'Entry'
    elif 'junior' in lab or 'assistant' in lab:
        return 'Junior'
    elif 'intermediate' in lab or 'experienced' in lab or 'mid' in lab:
        return 'Mid'
    elif 'senior' in lab or 'lead' in lab:
        return 'Senior'
    elif any(x in lab for x in ['manager','director','chief','head','executive','principal']):
        return 'Leadership'
    else:
        return 'Other'
    
def process(row):
    if "error" not in row:
        row = row.strip().lower()
        if '[' in row and ']' in row:
            row = ast.literal_eval(row[row.find('['):row.find(']')+1])
            row = '/'.join(row)
        return row
    else:
        row = row.strip().lower()
        try:
            row = row[row.find('{'):row.find('}')+1]
            row_data = json.loads(row)
        except Exception:
            row = row[len('error'):]
            row = row.translate(str.maketrans('', '', string.punctuation))
            row = row.strip()
            if len(row.split(' ')) == 1:
                return row
            return row
        row_data = row_data['clue'] #if 'clue' in row_data else 
        if row_data == 'yes':
            return ""
        return row_data

def get_accuracy(path_to_preds):
    preds = pd.read_csv(path_to_preds)
    test_df = pd.read_csv('data/seniority_labelled_test_set_cleaned.csv')
    
    test_df['y_pred'] = preds.values.reshape(-1)
    test_df = test_df.fillna('')
    
    test_df['y_pred'] = test_df['y_pred'].map(process)
    test_df['y_true'] = test_df['y_true'].map(process)
    
    test_df['y_pred_cat'] = test_df['y_pred'].map(categories)
    test_df['y_true_cat'] = test_df['y_true'].map(categories)
    
    exact = (test_df['y_pred'] == test_df['y_true']).mean() * 100
    cat = ((test_df['y_pred'] != test_df['y_true']) & (test_df['y_pred_cat'] == test_df['y_true_cat'])).mean() * 100
    overall = ((test_df['y_pred'] == test_df['y_true']) | (test_df['y_pred_cat'] == test_df['y_true_cat'])).mean() * 100
    
    exact_count = (test_df['y_pred'] == test_df['y_true']).sum()
    cat_count = ((test_df['y_pred'] != test_df['y_true']) & (test_df['y_pred_cat'] == test_df['y_true_cat'])).sum()
    overall_count = ((test_df['y_pred'] == test_df['y_true']) | (test_df['y_pred_cat'] == test_df['y_true_cat'])).sum()
    
    print(f'Exact: {exact_count}/{test_df.shape[0]}')
    print(f'Similar: {cat_count}/{test_df.shape[0]}')
    print(f'Overall: {overall_count}/{test_df.shape[0]}')
    
    res = pd.DataFrame(
        {
            'Exact': round(exact, 2),
            'Similar': round(cat, 2),
            'Overall': round(overall, 2),
        },
        index=['Accuracy (%)']
    )
    
    return res

In [3]:
path_to_preds = 'seniority_labelled_test_set_gemma3_rbic_fewshot_preds.csv'
get_accuracy(path_to_preds)

Exact: 142/689
Similar: 76/689
Overall: 218/689


Unnamed: 0,Exact,Similar,Overall
Accuracy (%),20.61,11.03,31.64


In [4]:
path_to_preds = 'seniority_labelled_test_set_gemma3_rbic_preds.csv'
get_accuracy(path_to_preds)

Exact: 122/689
Similar: 114/689
Overall: 236/689


Unnamed: 0,Exact,Similar,Overall
Accuracy (%),17.71,16.55,34.25


In [5]:
path_to_preds = 'seniority_labelled_test_set_gemma3_preds.csv'
get_accuracy(path_to_preds)

Exact: 158/689
Similar: 60/689
Overall: 218/689


Unnamed: 0,Exact,Similar,Overall
Accuracy (%),22.93,8.71,31.64
