In [41]:
!pip install python-dotenv
!pip install google-genai
!pip install requests

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [None]:
%pip install google-genai

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [43]:
import os
import csv
import requests
from dotenv import load_dotenv
from google import genai
from google.genai import types
import time
import random

In [None]:
load_dotenv()

client = genai.Client()

In [None]:
def read_image_urls(csv_path):
    """Read image URLs into a dict: {image_number: image_url}"""
    urls = {}
    with open(csv_path, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        next(reader)
        for row in reader:
            if len(row) >= 2:
                img_num = row[0].strip()
                img_url = row[1].strip()
                urls[img_num] = img_url
    return urls


In [46]:
def read_questions(csv_path):
    """Read questions into a dict: {image_number: question_text}"""
    questions = {}
    with open(csv_path, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        for row in reader:
            if len(row) >= 2:
                num, question = row[0].strip(), row[1].strip()
                questions[num] = question
    return questions

In [47]:
def read_options(csv_path):
    """Read options into a dict: {question_number: [option1, option2, ...]}"""
    options = {}
    with open(csv_path, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        for row in reader:
            if len(row) >= 2:
                num, option = row[0].strip(), row[1].strip()
                options.setdefault(num, []).append(option)
    return options

In [None]:
def ask_gemini_about_image_url(image_url, question, options):
    """Ask Gemini about an image via URL, structured for multimodal input."""
    
    try:
        image_response = requests.get(image_url, timeout=30)
        image_response.raise_for_status()
        
        image_bytes = image_response.content
        
        mime_type = image_response.headers.get('Content-Type', 'image/png') 
        if not mime_type.startswith('image/') and not mime_type.startswith('application/'):
             mime_type = 'image/png'
             
    except requests.exceptions.RequestException as e:
        return f"Error fetching image from URL: {str(e)}"

    
    image_part = types.Part.from_bytes(
        data=image_bytes,
        mime_type=mime_type,
    )

    prompt_text = (
        "Based on the image and your expertise, select the ONE correct option.\n"
        f"Question: {question}\n"
        f"Options: {options if options else 'No options provided'}"
    )
    
    contents = [
        image_part,
        prompt_text
    ]

    system_instruction = (
        "You are a specialized **Latvian traffic rules expert**. "
        "Your response **MUST be conducted solely in English**."
        "Please think before providing your answer. Keep the thought process internal, do not output it. "
        "Your entire purpose is to select the single correct answer from the provided options."
        "---"
        "**ABSOLUTE OUTPUT REQUIREMENT:**"
        "1. The **ONLY** text you are permitted to generate is the **exact text of the single correct option**."
        "2. You **MUST NOT** include any reasoning, explanation, numbering, punctuation, or introductory phrases whatsoever."
        "3. The output must be a single, complete line of text."
    )
        
    config = types.GenerateContentConfig(
        system_instruction=system_instruction,
        max_output_tokens=500
    )

    try:
        response = client.models.generate_content(
            model='gemini-2.5-flash', 
            contents=contents,
            config=config,
        )
        
        if response.text is not None:
            return response.text.strip()
        
        if response.candidates and response.candidates[0].finish_reason:
            reason = response.candidates[0].finish_reason.name
            
            if response.prompt_feedback and response.prompt_feedback.block_reason:
                block_reason = response.prompt_feedback.block_reason.name
                return f"Blocked: {block_reason} (Finish Reason: {reason})"
                
            return f"Error: Empty response. Finish Reason: {reason}"
        
        return "Error: Empty response (No finish reason available)."
        
    except Exception as e:
        return f"Error: {str(e)}"

In [None]:
def main():
    images_csv = "image_URLs.csv"
    questions_csv = "questions.csv"
    options_csv = "options.csv"

    results_all = []

    image_urls = read_image_urls(images_csv)
    questions = read_questions(questions_csv)
    options = read_options(options_csv)
    
    all_nums = sorted(list(set(image_urls.keys()) & set(questions.keys())))

    random.shuffle(all_nums)

    print(f"Processing {len(all_nums)} images in random order.")
    
    count = 1
    for img_num in all_nums:
        if img_num not in image_urls or img_num not in questions:
            continue
            
        image_url = image_urls[img_num]
        question = questions[img_num]
        answer_options = options.get(img_num, [])

        print(f"Processing image {img_num}... ({count}/{len(all_nums)})")
        
        answer = ask_gemini_about_image_url(image_url, question, answer_options) 
        
        count += 1

        results_all.append({
            "image_number": img_num,
            "model_answer": answer
        })

        print(f"Image {img_num}: {answer}\n")

    output_csv_file = "results_Gemini_image_URL.csv"
    with open(output_csv_file, "w", newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(
            f,
            fieldnames=["image_number", "question", "options", "model_answer"]
        )
        writer.writeheader()
        for result in results_all:
            img_num = result["image_number"]
            
            writer.writerow({
                "image_number": img_num,
                "question": questions.get(img_num, ""),
                "options": ";".join(options.get(img_num, [])),
                "model_answer": result["model_answer"]
            })

    print(f"Saved all results to: {output_csv_file}")

In [50]:
if __name__ == "__main__":
    main()

Processing 100 images in random order.
Processing image 3... (1/100)
Image 3: An intersection.

Processing image 64... (2/100)
Image 64: I will cross first.

Processing image 26... (3/100)
Image 26: The passenger car driver.

Processing image 37... (4/100)
Image 37: That road works are being carried out on the road.

Processing image 50... (5/100)
Image 50: I will cross first.

Processing image 1... (6/100)
Image 1: Prohibited.

Processing image 80... (7/100)
Image 80: It is forbidden.

Processing image 55... (8/100)
Image 55: Both vehicles are parked incorrectly.

Processing image 75... (9/100)
Image 75: 50 km/h.

Processing image 73... (10/100)
Image 73: No.

Processing image 2... (11/100)
Image 2: Prohibited.

Processing image 30... (12/100)
Image 30: You will turn on the indicator, move into the right lane, and then reduce speed.

Processing image 93... (13/100)
Image 93: I will give way to both vehicles.

Processing image 82... (14/100)
Image 82: No.

Processing image 65... (15/10