In [33]:
import os
import csv
from dotenv import load_dotenv
from openai import OpenAI
import time
import random

In [None]:
load_dotenv()

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [35]:
def read_questions(csv_path):
    """Read questions into a dict: {question_number: question_text}"""
    questions = {}
    with open(csv_path, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        for row in reader:
            if len(row) >= 2:
                num, question = row[0].strip(), row[1].strip()
                questions[num] = question
    return questions

In [36]:
def read_options(csv_path):
    """Read options into a dict: {question_number: [option1, option2, ...]}"""
    options = {}
    with open(csv_path, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        for row in reader:
            if len(row) >= 2:
                num, answer = row[0].strip(), row[1].strip()
                options.setdefault(num, []).append(answer)
    return options

In [37]:
def read_descriptions(csv_path):
    """Read image descriptions into a dict: {question_number: description_text}"""
    descriptions = {}
    with open(csv_path, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        for row in reader:
            if len(row) >= 2:
                num, desc = row[0].strip(), row[1].strip()
                descriptions[num] = desc

    
    return descriptions

In [None]:
def ask_gpt4o_about_description(description, question, options):
    """Ask GPT-4o-mini about a textual description of an image"""
    user_content = (
        f"Description: {description}\n\n"
        f"Question: {question}\n"
        f"Options: {options if options else 'No options provided'}"
    )

    messages = [
        {
            "role": "system",
            "content": (
                "Answer as a Latvian traffic rules expert. "
                "Analyze the description and return only the correct answer to the question. "
                "Answer should fully match one of the provided options"
                "Base your answer solely on the description content. "
                "If options exist, choose one. "
            )
        },
        {"role": "user", "content": user_content}
    ]

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            max_tokens=500
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Error: {str(e)}"

In [None]:
def main():
    questions_csv = "questions.csv"
    options_csv = "options.csv"
    descriptions_csv = "descriptions.csv"
    
    results_all = []

    descriptions = read_descriptions(descriptions_csv)
    questions = read_questions(questions_csv)
    options = read_options(options_csv)

    all_nums = list(set(descriptions.keys()) & set(questions.keys()))

    random.shuffle(all_nums)

    print(f"Processing {len(all_nums)} descriptions in random order.")

    count = 1
    for num in all_nums:
        description_text = descriptions[num]
        question = questions[num]
        answer_options = options.get(num, [])

        print(f"Processing description {num}... ({count}/{len(all_nums)})")
        answer = ask_gpt4o_about_description(description_text, question, answer_options)
        
        count += 1
        
        results_all.append({
            "image_number": num,
            "model_answer": answer
        })

        print(f"Description {num}: {answer}\n")
        time.sleep(1)

    with open("results_OpenAI_description.csv", "w", newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(
            f,
            fieldnames=["image_number", "question", "options", "model_answer"]
        )
        writer.writeheader()
        for result in results_all:
            num = result["image_number"]
            writer.writerow({
                "image_number": num,
                "question": questions.get(num, ""),
                "options": ";".join(options.get(num, [])),
                "model_answer": result["model_answer"]
            })

    print("Saved all results to: results_OpenAI_description.csv")

In [40]:
if __name__ == "__main__":
    main()

Processing 100 descriptions in random order.
Processing description 24... (1/100)
Description 24: Prohibited, the shown load must be transported in an appropriate vehicle.

Processing description 51... (2/100)
Description 51: I will give way to the tram and the automobile.

Processing description 18... (3/100)
Description 18: The driver of the green car.

Processing description 96... (4/100)
Description 96: It is forbidden.

Processing description 39... (5/100)
Description 39: The cyclist.

Processing description 93... (6/100)
Description 93: 'I will give way to both vehicles.'

Processing description 36... (7/100)
Description 36: You will give way to all cars.

Processing description 20... (8/100)
Description 20: A place where parking is prohibited.

Processing description 7... (9/100)
Description 7: You will give way to the cyclist.

Processing description 69... (10/100)
Description 69: Bus driver.

Processing description 53... (11/100)
Description 53: It is forbidden.

Processing de