In [None]:
import os
import json
import time
import random
import re
import logging
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed
from PIL import Image
import google.generativeai as genai
from ultralytics import YOLO

# List of API Keys
API_KEYS = [

]

# Configure logging
logging.basicConfig(filename='api_errors.log', level=logging.ERROR)

# Load YOLO model
yolo_model = YOLO("yolov8n.pt")  # Using YOLOv8 nano

# Function to get list of image files from directory
def get_image_files(base_dir):
    """Get a list of image files from the directory."""
    return [os.path.join(root, file) for root, _, files in os.walk(base_dir) for file in files if file.lower().endswith((".jpg", ".png", ".jpeg"))]

# Function to check if an image is valid
def check_image(image_path):
    """Check if the image is valid."""
    try:
        with Image.open(image_path) as img:
            img.verify()  # Verify image integrity
        return True
    except Exception as e:
        print(f"Invalid image: {image_path} - {e}")
        return False

# Function to clean JSON response
def clean_json_response(response):
    """Clean the JSON response from the API."""
    response = response.strip()
    response = re.sub(r'```json|```', '', response)
    return response

# Function to detect vehicle type using YOLO
def detect_vehicle(image_path):
    """Detect the type of vehicle in the image using YOLO."""
    results = yolo_model(image_path)
    for result in results:
        for box in result.boxes:
            class_id = int(box.cls)
            class_name = yolo_model.names[class_id]
            if class_name in ["car", "motorcycle", "bicycle", "train", "airplane", "ship"]:
                return class_name
    return None

# Function to call Gemini API and generate Q&A
def generate_qa_with_api(api_key, image_path, vehicle_type, retries=5):
    """Call Gemini API to generate questions and answers"""

    for attempt in range(retries):
        try:
            genai.configure(api_key=api_key)
            model = genai.GenerativeModel("models/gemini-2.0-flash")

            # Prompt
            prompt = f"""
                Analyze the image and return the following information:
                - "questions": 10 questions related to the type of vehicle, color, quantity, and context of the vehicle in the image.
                - "answers": Corresponding answers for each question (limit to 5-10 words).

                Requirements:
                - Return only JSON, no extra text.
                - Do not use Markdown (no ```json).
                - JSON structure must be standard, no extra characters.
                - Answers must be concise, within 10-20 words.
                - Ensure accurate identification of the vehicle type as {vehicle_type}.
                - Questions must be diverse, including simple, inferential, comparative, and predictive questions.

                Additional Information:
                - The vehicle type is {vehicle_type}.

                Example JSON:
                {{
                    "questions": [
                        "What type of vehicle is this?",
                        "What is the color of the vehicle?",
                        "How many vehicles are in the image?",
                        "What is the maximum capacity of this vehicle?",
                        "What is the vehicle doing?"
                    ],
                    "answers": [
                        "This is a {vehicle_type}.",
                        "The vehicle is black.",
                        "There is 1 vehicle in the image.",
                        "The vehicle can carry up to 5 people.",
                        "The vehicle is parked."
                    ]
                }}
            """
            response = model.generate_content(prompt).text.strip()
            response = clean_json_response(response)  # Clean the response

            # Process the API response
            try:
                qa_data = json.loads(response)
                return qa_data
            except json.JSONDecodeError:
                logging.error(f"JSON parsing error from API. Response: {response}")
                return None
        except Exception as e:
            logging.error(f"API error (attempt {attempt + 1}/{retries}): {e}")
            if attempt < retries - 1:
                time.sleep(2 ** attempt)  # Exponential backoff
            else:
                return None
    return None

# Function to process an image
def process_image(image_path, api_key):
    """Process an image."""
    if not check_image(image_path):
        return None

    # Detect vehicle type using YOLO
    vehicle_type = detect_vehicle(image_path)
    if not vehicle_type:
        print(f"Could not detect vehicle in image: {image_path}")
        return None


    # Call Gemini API with vehicle type and color
    qa_data = generate_qa_with_api(api_key, image_path, vehicle_type)
    return qa_data

# Function to process an image with retry mechanism
def process_image_with_retry(image_path, retries=5):
    """Process an image with a retry mechanism."""
    for attempt in range(retries):
        api_key = API_KEYS[attempt % len(API_KEYS)]
        qa_data = process_image(image_path, api_key)
        if qa_data:
            return qa_data
        else:
            logging.warning(f"Retrying attempt {attempt + 1} with a different API key for image: {image_path}")
            print(f"Retrying attempt {attempt + 1} with a different API key.")
    logging.error(f"Skipping image {image_path} after {retries} attempts.")
    print(f"Skipping image {image_path} after {retries} attempts.")
    return None

# Function to process multiple images concurrently
def process_images_concurrently(image_files, max_workers=5):
    """Process multiple images concurrently using multithreading."""
    data = {}
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(process_image_with_retry, image_path): image_path for image_path in image_files}
        for future in as_completed(futures):
            image_path = futures[future]
            try:
                qa_data = future.result()
                if qa_data:
                    data[os.path.basename(image_path)] = {
                        "questions": qa_data["questions"],
                        "answers": qa_data["answers"]
                    }
            except Exception as e:
                logging.error(f"Error processing image {image_path}: {e}")

    return data

# Function to save data to a JSON file
def save_to_json(data, output_file):
    """Save data to a JSON file."""
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
    print(f"Data saved to file {output_file}")

# Directory containing images
DATA_PATH = "data/Vehicles"  # Replace with your directory path
OUTPUT_FILE = "data/vqadata.json"  # Output JSON file name

# Get list of image files
image_files = get_image_files(DATA_PATH)

# Randomly select 20 images from the list
sample_images = random.sample(image_files, min(20, len(image_files)))

# Process images concurrently
data = process_images_concurrently(sample_images, max_workers=5)

# Save data to JSON file
save_to_json(data, OUTPUT_FILE)

  from .autonotebook import tqdm as notebook_tqdm



Ultralytics 8.3.94  Python-3.11.9 torch-2.6.0+cpu CPU (11th Gen Intel Core(TM) i3-1115G4 3.00GHz)

Ultralytics 8.3.94  Python-3.11.9 torch-2.6.0+cpu CPU (11th Gen Intel Core(TM) i3-1115G4 3.00GHz)

Ultralytics 8.3.94  Python-3.11.9 torch-2.6.0+cpu CPU (11th Gen Intel Core(TM) i3-1115G4 3.00GHz)

Ultralytics 8.3.94  Python-3.11.9 torch-2.6.0+cpu CPU (11th Gen Intel Core(TM) i3-1115G4 3.00GHz)

Ultralytics 8.3.94  Python-3.11.9 torch-2.6.0+cpu CPU (11th Gen Intel Core(TM) i3-1115G4 3.00GHz)
YOLOv8n summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs
YOLOv8n summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs
YOLOv8n summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs
YOLOv8n summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs
YOLOv8n summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs
image 1/1 d:\TDTU\III - HK2\Deep Learning\Midterm\data\Vehicles\Train (555).png: 640x640 1 trai