In [51]:
!pip install python-dotenv
!pip install requests
!pip install xai-sdk

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [52]:
import os
import csv
import requests
import time
from dotenv import load_dotenv
import random

In [None]:
load_dotenv()

XAI_API_KEY = os.getenv("XAI_API_KEY") 
if not XAI_API_KEY:
    raise ValueError("XAI_API_KEY not found in .env file!")

In [None]:
API_URL = "https://api.x.ai/v1/chat/completions"
MODEL = "grok-2-vision-1212"

HEADERS = {
    "Authorization": f"Bearer {XAI_API_KEY}",
    "Content-Type": "application/json"
}

In [None]:
def read_image_urls(csv_path):
    urls = {}
    with open(csv_path, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        next(reader, None)
        for row in reader:
            if len(row) >= 2:
                img_num = row[0].strip()
                img_url = row[1].strip()
                urls[img_num] = img_url
    return urls

In [56]:
def read_questions(csv_path):
    questions = {}
    with open(csv_path, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        for row in reader:
            if len(row) >= 2:
                num, question = row[0].strip(), row[1].strip()
                questions[num] = question
    return questions

In [57]:
def read_options(csv_path):
    options = {}
    with open(csv_path, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        for row in reader:
            if len(row) >= 2:
                num, option = row[0].strip(), row[1].strip()
                options.setdefault(num, []).append(option)
    return options

In [None]:
def ask_grok_vision(image_url: str, question: str, options_list: list):
    system_prompt = (
        "You are an expert in Latvian traffic rules. "
        "Answer in English only. "
        "Your ONLY job is to output the exact text of the single correct option. "
        "Do NOT explain, do NOT add reasoning, do NOT number it. "
        "Just return the correct option text and nothing else."
    )

    user_prompt = f"Question: {question}\n\nOptions:\n" + "\n".join(options_list)

    payload = {
        "model": MODEL,
        "messages": [
            {"role": "system", "content": system_prompt},
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": image_url,
                            "detail": "high"
                        }
                    },
                    {
                        "type": "text",
                        "text": user_prompt
                    }
                ]
            }
        ],
        "max_tokens": 100,
        "temperature": 0.0,
    }

    try:
        response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=90)
        response.raise_for_status()
        data = response.json()

        raw_answer = data["choices"][0]["message"]["content"].strip()

        for opt in options_list:
            if opt.strip().lower() in raw_answer.lower():
                return opt.strip()
            

        return raw_answer
    
    except requests.exceptions.RequestException as e:
        return f"API Error: {str(e)}"
    except Exception as e:
        return f"Error: {str(e)}"
    

In [None]:
def main():
    images_csv = "image_URLs.csv"
    questions_csv = "questions.csv"
    options_csv = "options.csv"

    image_urls = read_image_urls(images_csv)
    questions = read_questions(questions_csv)
    options = read_options(options_csv)

    all_nums = sorted(set(image_urls.keys()) & set(questions.keys()))

    random.shuffle(all_nums)

    print(f"Processing {len(all_nums)} images in random order.")

    results = []

    count = 1
    for num in all_nums:
        url = image_urls[num]
        q = questions[num]
        opts = options.get(num, [])

        print(f"Processing {num}... ({count}/{len(all_nums)})")

        count += 1

        answer = ask_grok_vision(url, q, opts)

        results.append({
            "image_number": num,
            "question": q,
            "options": opts,
            "model_answer": answer
        })

        print(f"→ {answer}")
        time.sleep(0.8)

    output_file = "results_Grok_Vision_URLs.csv"
    with open(output_file, "w", newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(["image_number", "question", "options", "model_answer"])
        for r in results:
            writer.writerow([
                r["image_number"],
                r["question"],
                "; ".join(r["options"]),
                r["model_answer"]
            ])

    print(f"\nFinished! Results saved to {output_file}")

In [60]:
if __name__ == "__main__":
    main()

Processing 100 images in random order.
Processing 73... (1/100)
→ No.
Processing 65... (2/100)
→ They designate a lane where the driving direction will change to the opposite direction.
Processing 100... (3/100)
→ Straight and to the right.
Processing 14... (4/100)
→ You will give way to the pedestrian.
Processing 92... (5/100)
→ Yes.
Processing 23... (6/100)
→ The bus driver.
Processing 34... (7/100)
→ Allowed.
Processing 57... (8/100)
→ To turn left at the intersection and make a U-turn
Processing 26... (9/100)
→ The truck driver.
Processing 49... (10/100)
→ I will give way to the tram and the red vehicle.
Processing 50... (11/100)
→ I will give way to the tram
Processing 52... (12/100)
→ I will give way to the motorcycle and the tram.
Processing 40... (13/100)
→ B.
Processing 90... (14/100)
→ I will only give way to the emergency vehicle.
Processing 87... (15/100)
→ I will give way to the cyclist.
Processing 24... (16/100)
→ Prohibited, the shown load must be transported in an appro