# This script annotates transcripts of video simulation role-playing exercises from job candidates using large language models

In [18]:
# Install libraries that might not be installed as necessary (adjust manually) through Anaconda prompt

In [1]:
# Import necessary libraries
# import openai          # OpenAI API for AI model access
# from openai import OpenAI  # The main client class for interacting with OpenAI's API
from openai import AzureOpenAI
import requests        # HTTP library for making web requests (API calls, downloading data, etc.)
import re              # Regular expressions
import os              # Operating system interface
import pandas as pd    # DataFrames for tabular data manipulation and analysis
import numpy as np     # Numerical computing
import time            # Time-related functions
import random          # Random number generation
import getpass         # pop-up window for API completion
from tqdm import tqdm  # for showing progress bars

# set seed for reproducibility
random.seed(1337)  # Sets seed for Python's random module

In [9]:

# Load data
test_data = pd.read_excel("input/data_gitp - Copy_for_data_exploration.xlsx")
# test_data = test_data.iloc[:2] # keep only first few rows for pilot data analysis purposes
print(test_data.head())
print(test_data.shape)

# import prompts
from prompts.AD_en_expl_20251221 import promptAD # manually change prompt name as desired from the /prompts directory
print(len(promptAD)) # check that it correctly contains 4 'keys'

# # Advisement 2
# advise = pd.read_csv("data_advise_text.csv")
# # Change management 2
# change_manage = pd.read_csv("data_change_manage_text.csv")
# # Team management 2
# team_manage = pd.read_csv("data_team_manage_text.csv")
# # Team work 2
# team_work = pd.read_csv("data_team_work_text.csv")

  VideoSimulatieType                                 tw_Beoordeling.all  \
0       Advisement_2  Eerst, alles blijft tussen vier ogen dat wat v...   
1       Advisement_2  Ja, nee, ik heb zeker wel verstand van deze za...   
2       Advisement_2  Waar je op dit moment tegenaan loopt, en dan k...   
3       Advisement_2  Is en waarmee kan ik je helpen en wat kunnen w...   
4       Advisement_2  Oh, sorry nou leuk om je te ontmoeten. Ik ben ...   

   AD_PROBL  AD_CREAT  AD_OORDE  AD_ORGANS  AD_total  \
0       1.5      1.50  1.501667   2.333333   1.70875   
1       1.0      2.00  2.000000   2.000000   1.75000   
2       2.0      2.00  2.000000   2.000000   2.00000   
3       2.0      2.00  2.000000   2.330000   2.08250   
4       2.0      2.67  2.670000   1.000000   2.08500   

                               Beoordeling.all_en_tw category  
0  First, everything stays between the NUMERIC_1 ...      low  
1  Yeah, no, I definitely do know about these thi...      low  
2  What you're runni

In [11]:
promptAD

{'AD_PROBL': ('AD_PROBL',
  '\n        **Context of the role-playing exercise:**\nYou are a professional recruiter. Your task is to evaluate a job candidate based on their responses \nin a role-playing exercise. In this exercise, the candidate responds verbally to a fictional colleague named Lara, \nacross four scenes. Lara shares professional frustrations, mistakes with client projects, emotional exhaustion, \nand personal financial concerns, and she asks the candidate for support and advice. In summary:\n\n- **Scene 1**: Lara expresses frustration about being overlooked at work, doing repetitive tasks, \nand lacking opportunities to grow. She asks for advice.\n- **Scene 2**: Lara confesses she mishandled two client projects, causing cost issues and customer dissatisfaction. \nShe is afraid to tell the team leader and asks if the candidate can speak on her behalf.\n- **Scene 3**: Lara reveals emotional exhaustion and financial stress due to her daughter\'s chronic illness. \nShe wonde

# Azure Foundry - API key

In [14]:
# Import Azure OpenAI API key
azure_api_key = getpass.getpass("Enter API key for Azure OpenAI: ") # manually enter API key to the pop-up window

# Import Azure OpenAI endpoint
azure_endpoint = getpass.getpass("Enter Azure OpenAI endpoint URL: ").strip() # manually enter endpoint URL

api_version = "2024-12-01-preview"

client = AzureOpenAI(
    api_version=api_version,
    azure_endpoint=azure_endpoint,
    api_key=azure_api_key,
)

# Open AI - API key

In [7]:
# Import OpenAI API key
gpt_api_key = getpass.getpass("Enter API key for OpenAI: ") # manually enter API key to the pop-up window
# Initialize the OpenAI client with your API key
client = OpenAI(api_key=gpt_api_key)

## Azure Foundry - Function to get rating from OpenAI

In [None]:
# Set default parameters
# model = "gpt-4.1-mini" # manually change the model
deployment = "gpt-4.1-mini" # manually change the model # This is the custom name you gave in Azure AI Studio, e.g., "my-gpt4-deployment"
temperature = 1      # manually change the temperature

# A - Function for explanation (number + explanation in the same cell)
def get_explanation(client, text, prompt):
    """Sends the text to GPT (Azure) and returns both score and explanation."""
    try:
        full_prompt = prompt.format(text=text)

        response = client.chat.completions.create(
            model=deployment,  # Azure deployment name
            messages=[
                {"role": "system", "content": "You are a professional recruiter."},
                {"role": "user", "content": full_prompt}
            ],
            temperature=temperature,
            max_completion_tokens=4000
        )

        return response.choices[0].message.content.strip()

    except Exception as e:
        print("Error:", e)
        return ""


## Function to get ratings from openai API

In [None]:
# set default parameters
model = "gpt-5-mini" # manually change the model as desired
temperature = 1      # manually change the temperature as desired

# A - Function for explanation (number + explanation in the same cell)
def get_explanation(client, text, prompt):
    """Sends the text to GPT and returns both score and explanation."""
    try:
        response = client.responses.create(
            model=model,
            input=prompt.format(text=text),
            temperature=temperature
        )
        return response.output_text.strip()
    except Exception as e:
        print("Error:", e)
        return ""

# B - Function for numerical rating (1-5)
def get_rating(client, text, prompt):
    """Sends the text to GPT and returns a numeric score (1–5, decimals allowed)."""
    try:
        response = client.responses.create(
            model=model,
            input=prompt.format(text=text),
            temperature=temperature
        )
        reply = response.output_text.strip()

        # Extract any numeric value (with optional decimal part)
        import re
        match = re.search(r"\d+(\.\d+)?", reply)
        if match:
            value = float(match.group())  # keep decimals
            return value
        else:
            print("Warning: No numeric value found in reply:", reply)
            return None

    except Exception as e:
        print("Error:", e)
        return None

In [None]:
# Create empty dataframe to store the annotation results
# sed default parameters
model_short = "gpt4.1"
df = test_data
for code in promptAD.keys():
    df[f"{model_short}_{code}_explanation"] = None

print(test_data.columns) # make sure that correct empty columns have been created

Index(['VideoSimulatieType', 'tw_Beoordeling.all', 'AD_PROBL', 'AD_CREAT',
       'AD_OORDE', 'AD_ORGANS', 'AD_total', 'Beoordeling.all_en_tw',
       'category', 'gpt4.1_AD_PROBL_explanation', 'gpt4.1_AD_PROBL_score',
       'gpt4.1_AD_CREAT_explanation', 'gpt4.1_AD_CREAT_score',
       'gpt4.1_AD_OORDE_explanation', 'gpt4.1_AD_OORDE_score',
       'gpt4.1_AD_ORGANS_explanation', 'gpt4.1_AD_ORGANS_score'],
      dtype='object')


In [None]:
# sed default parameters
model_short = "gpt4.1" # manually change model as necessary
prompt_dict = promptAD # change prompt as necessary 
df = test_data # change data as necessary
# Prepare new columns for storing results
for code in prompt_dict.keys():
    df[f"{model_short}_{code}_explanation"] = None
    df[f"{model_short}_{code}_score"] = None

# print(test_data) # make sure that correct empty columns have been created

AttributeError: 'str' object has no attribute 'keys'

# Text annotation

In [None]:
# set default parameters
model_short = "gpt5" # manually change model as necessary
prompt_dict = promptAD # change prompt as necessary 
df = test_data # change data as necessary
# Loop through all rows
for i, row in tqdm(df.iterrows(), total=len(df)):
    text = row['Beoordeling.all_en_tw']
    for code, (trait, prompt_expl, prompt_score) in prompt_dict.items():
        expl = get_explanation(client, text, prompt_expl)
        score = get_rating(client, text, prompt_score)
        df.at[i, f"{model_short}_{code}_explanation"] = expl
        df.at[i, f"{model_short}_{code}_score"] = score

# Preview annotated df
# print(df.head())

100%|██████████| 6/6 [07:33<00:00, 75.66s/it]


In [None]:
# Optional: Save results
test_data.to_excel("en_annotated_test_data_20251112.xlsx", index = False)

# advise.to_csv('annotated_advise.csv', index=False)