# This script translates transcripts of video simulation role-playing exercises from Dutch to English

In [None]:
# Install libraries that might not be installed as necessary (adjust manually) through Anaconda prompt

In [1]:
# Import necessary libraries
# import openai          # OpenAI API for AI model access
# from openai import OpenAI  # The main client class for interacting with OpenAI's API
from openai import AzureOpenAI
import requests        # HTTP library for making web requests (API calls, downloading data, etc.)
import re              # Regular expressions
import os              # Operating system interface
import pandas as pd    # DataFrames for tabular data manipulation and analysis
import numpy as np     # Numerical computing
import openpyxl        # Import excel files
import time            # Time-related functions
import random          # Random number generation
import getpass         # pop-up window for API completion
from tqdm import tqdm  # for showing progress bars

# set seed for reproducibility
random.seed(1337)  # Sets seed for Python's random module

In [3]:
# Load data
test_text = pd.read_csv("input/data_gitp_text.csv")
# test_text = test_text.iloc[:50] # keep only first few rows for pilot data analysis purposes
test_text.head()

# # Text data
# text_data = pd.read_csv("data_gitp_text.csv")

Unnamed: 0,id,Beoordeling.607,Beoordeling.608,Beoordeling.609,Beoordeling.610,Beoordeling.all
0,1460133,"Oh ja, Carina ik kan me voorstellen dat je voo...","Jeetje ik, ik kan me voorstellen dat je dat la...","Ja, dus als ik het zo hoor, dan is er dus eige...","Ik denk dat het goed is als we, zoals ik net a...","Oh ja, Carina ik kan me voorstellen dat je voo..."
1,1462073,"Marine, ja, ik begrijp inderdaad dat jij het d...","Oh, ik had eigenlijk dat beeld niet of collega...","Ja, als jij het gaat presenteren en ik doe het...","Nou, maar, dat dank je wel, in ieder geval, ik...","Marine, ja, ik begrijp inderdaad dat jij het d..."
2,1549902,"Dacht je landen, ja, fijn dat je dat je even l...","Ja, ik kan me voorstellen dat het erg lastig i...","Ja, ik kan me voorstellen dat het dat het last...","Ja, waar we het natuurlijk over hebben gehad, ...","Dacht je landen, ja, fijn dat je dat je even l..."
3,1552130,"Hoi marine, leuk om met je samen te werken. Zo...",Fijn om te horen dat je volgende week meer rui...,Heel goed dat je je eigen kwaliteiten erkent e...,Nou heel vaak dat ze dus het dario er iets and...,"Hoi marine, leuk om met je samen te werken. Zo..."
4,1569467,Hoe landen nou fijn dat we elkaar even spreken...,"Ik begrijp dat oh, ik begrijp. Ik begrijp dat ...","Hoe zou, hoe zou de rest van het team jou kunn...","Nou, allereerst, dank je wel dat je nou zo ope...",Hoe landen nou fijn dat we elkaar even spreken...


# Prompt

In [12]:
# Prompt for Dutch-to-English translation
translation_prompt = """
Translate the following text from Dutch to English.

Context:
- The text comes from a spoken role-playing exercise.
- It was automatically transcribed, so it may contain filler words, hesitation markers, partial sentences, and transcription glitches.

Translation Requirements:
- Convey the intended meaning in fluent English (do NOT translate word-by-word).
- Preserve the *structure* of the utterances: sentence breaks, pacing, repetitions, interruptions, and unfinished thoughts.
- Keep all disfluencies (e.g., "uh", "hm", false starts, repeated words).
- If the transcript contains unclear or broken phrases, translate them as equally unclear or broken in English, without fixing them.
- Do NOT correct grammar, do NOT rewrite into cleaner English, do NOT summarize, and do NOT interpret beyond what is explicitly said.

In other words: translate meaningfully, but keep the *messiness* of spoken language intact.

Text to translate:
```{text}```
"""


# Open AI

In [None]:
# Import OpenAI API key
gpt_api_key = getpass.getpass("Enter API key for OpenAI: ") # manually enter API key to the pop-up window
# Initialize the OpenAI client with your API key
client = OpenAI(api_key=gpt_api_key)

# Azure API key

In [6]:
# Import Azure OpenAI API key
azure_api_key = getpass.getpass("Enter API key for Azure OpenAI: ") # manually enter API key to the pop-up window

# Import Azure OpenAI endpoint
azure_endpoint = getpass.getpass("Enter Azure OpenAI endpoint URL: ") # manually enter endpoint URL

api_version = "2024-12-01-preview"

client = AzureOpenAI(
    api_version=api_version,
    azure_endpoint=azure_endpoint,
    api_key=azure_api_key,
)

In [None]:
# # Import Azure OpenAI API key
# azure_api_key = getpass.getpass("Enter API key for Azure OpenAI: ") # manually enter API key to the pop-up window

# # Import Azure OpenAI endpoint
# azure_endpoint = getpass.getpass("Enter Azure OpenAI endpoint URL: ") # manually enter endpoint URL

# # Initialize the OpenAI client with your Azure API key and endpoint
# client = OpenAI(
#     api_key=azure_api_key,
#     base_url=azure_endpoint.strip()
# )

## OpenAI: Function to translate text from Dutch to English

In [None]:
# set defaul parameters
model = "gpt-4.1-mini" # manually change the model
# deployment = "gpt-4.1-mini" # manually change the model
temperature = 0      # manually change the temperature

# Function to get_translation
def get_translation(client, text, prompt_template, model=model):
    try:
        full_prompt = prompt_template.format(text=text.strip())

        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a translation engine."},
                {"role": "user", "content": full_prompt}
            ],
            temperature=temperature,  # low temperature for more deterministic output
            max_completion_tokens=10000
        )

        return response.choices[0].message.content.strip()

    except Exception as e:
        print(f"Translation error: {e}")
        return None


## Azure: Function to translate text

In [7]:
# Set default parameters
# model = "gpt-4.1-mini" # manually change the model
deployment = "gpt-4.1-mini" # manually change the model # This is the custom name you gave in Azure AI Studio, e.g., "my-gpt4-deployment"
temperature = 0      # manually change the temperature

# Function to get_translation
def get_translation(client, text, prompt_template):
    try:
        full_prompt = prompt_template.format(text=text.strip())

        response = client.chat.completions.create(
            model=deployment,  # Use the deployment name here
            messages=[
                {"role": "system", "content": "You are a translation engine."},
                {"role": "user", "content": full_prompt}
            ],
            temperature=temperature,
            max_completion_tokens=10000              
        )

        return response.choices[0].message.content.strip()

    except Exception as e:
        print(f"Translation error: {e}")
        return None

# Text translation

In [13]:
# Set parameters
columns_to_translate = ["Beoordeling.607", "Beoordeling.608", "Beoordeling.609", "Beoordeling.610"]
df = test_text # manually CHANGE the dataframe to be analyzed

# Add placeholder columns for the translations
for col in columns_to_translate:
    df[f"en_{col}"] = df[col].astype(object)

# Translation loop
for idx, row in df.iterrows():
    for col in columns_to_translate:
        text = row[col]
        if pd.notna(text) and text.strip():
            translated = get_translation(client, text, translation_prompt)
            df.loc[idx, f"en_{col}"] = translated

    if (idx + 1) % 5 == 0:
        print(f"Translated {idx + 1}/{len(df)} rows")
    time.sleep(0.1)

df.head()

Translated 5/893 rows
Translated 10/893 rows
Translated 15/893 rows
Translated 20/893 rows
Translated 25/893 rows
Translated 30/893 rows
Translated 35/893 rows
Translated 40/893 rows
Translated 45/893 rows
Translated 50/893 rows
Translated 55/893 rows
Translated 60/893 rows
Translated 65/893 rows
Translated 70/893 rows
Translated 75/893 rows
Translated 80/893 rows
Translated 85/893 rows
Translated 90/893 rows
Translated 95/893 rows
Translated 100/893 rows
Translated 105/893 rows
Translated 110/893 rows
Translated 115/893 rows
Translated 120/893 rows
Translated 125/893 rows
Translated 130/893 rows
Translated 135/893 rows
Translated 140/893 rows
Translated 145/893 rows
Translated 150/893 rows
Translated 155/893 rows
Translated 160/893 rows
Translated 165/893 rows
Translated 170/893 rows
Translated 175/893 rows
Translated 180/893 rows
Translated 185/893 rows
Translated 190/893 rows
Translated 195/893 rows
Translated 200/893 rows
Translated 205/893 rows
Translated 210/893 rows
Translated 2

Unnamed: 0,id,Beoordeling.607,Beoordeling.608,Beoordeling.609,Beoordeling.610,Beoordeling.all,en_Beoordeling.607,en_Beoordeling.608,en_Beoordeling.609,en_Beoordeling.610
0,1460133,"Oh ja, Carina ik kan me voorstellen dat je voo...","Jeetje ik, ik kan me voorstellen dat je dat la...","Ja, dus als ik het zo hoor, dan is er dus eige...","Ik denk dat het goed is als we, zoals ik net a...","Oh ja, Carina ik kan me voorstellen dat je voo...","Oh yeah, Carina, I can imagine you're facing a...","Wow, I, I can imagine that you find it difficu...","Yeah, so if I hear it like this, then there’s ...","I think it’s good if we, like I just said, sta..."
1,1462073,"Marine, ja, ik begrijp inderdaad dat jij het d...","Oh, ik had eigenlijk dat beeld niet of collega...","Ja, als jij het gaat presenteren en ik doe het...","Nou, maar, dat dank je wel, in ieder geval, ik...","Marine, ja, ik begrijp inderdaad dat jij het d...","Marine, yeah, I do understand that you’re busy...","Oh, I actually didn’t have that image of wheth...","Yeah, if you’re going to present it and I do t...","Well, but, thanks for that, anyway, I’m glad t..."
2,1549902,"Dacht je landen, ja, fijn dat je dat je even l...","Ja, ik kan me voorstellen dat het erg lastig i...","Ja, ik kan me voorstellen dat het dat het last...","Ja, waar we het natuurlijk over hebben gehad, ...","Dacht je landen, ja, fijn dat je dat je even l...","Did you think countries, yeah, nice that you c...","Yeah, I can imagine it’s really difficult when...","Yeah, I can imagine that it that it’s difficul...","Yes, what we of course talked about is that yo..."
3,1552130,"Hoi marine, leuk om met je samen te werken. Zo...",Fijn om te horen dat je volgende week meer rui...,Heel goed dat je je eigen kwaliteiten erkent e...,Nou heel vaak dat ze dus het dario er iets and...,"Hoi marine, leuk om met je samen te werken. Zo...","Hi Marine, nice to work with you. Like you sai...",Nice to hear that next week you seem to have m...,Very good that you recognize your own qualitie...,"Well very often that they, uh, Dario thinks so..."
4,1569467,Hoe landen nou fijn dat we elkaar even spreken...,"Ik begrijp dat oh, ik begrijp. Ik begrijp dat ...","Hoe zou, hoe zou de rest van het team jou kunn...","Nou, allereerst, dank je wel dat je nou zo ope...",Hoe landen nou fijn dat we elkaar even spreken...,"How nice that we’re talking to each other now,...","I understand that oh, I understand. I understa...","How would, how would the rest of the team be a...","Well, first of all, thank you for being so ope..."


In [14]:
# Concatenate translated columns into en_Beoordeling.all
translated_columns = [f"en_{col}" for col in columns_to_translate]

# Replace NaN with empty strings before concatenation
test_text["en_Beoordeling.all"] = test_text[translated_columns].fillna("").agg(" ".join, axis=1).str.strip()

In [15]:
# Optional: Save results
test_text.to_csv('test_text_nl_en.csv', index=False)
test_text.to_excel('test_text_nl_en.xlsx', index=False)