In [None]:
import os
import json
import time
from glob import glob
from tqdm import tqdm
import google.generativeai as genai

###############################################################################
#                          CONFIGURATION / CONSTANTS                          #
###############################################################################

# Configure Gemini API
genai.configure(api_key="-vIfeKqEl4")

# Define model and generation configuration
MODEL = "gemini-1.5-flash"
GENERATION_CONFIG = genai.GenerationConfig(
    max_output_tokens=1500,
    temperature=0.7
)

# Paths
INPUT_DIR = "G:/Samples HAD/9 samples/json"  # Your folder with .json files
OUTPUT_DIR = "G:/Samples HAD/9 samples/output"  # Folder where output .json files will be saved
os.makedirs(OUTPUT_DIR, exist_ok=True)

###############################################################################
#                       SCENARIO EXTRACTION AND PROMPT                        #
###############################################################################

def parse_frame(frame):
    """
    Extract relevant details from the frame for use in constructing the prompt.
    Returns a dictionary with all needed info.
    """
    return {
        "caption": frame.get("caption", ""),
        "speed": frame.get("speed"),
        "steering": frame.get("steering"),
        "maneuver": frame.get("maneuver", ""),
        "safe": frame.get("safe"),
        "goal_oriented": frame.get("goal-oriented"),
        "action_suggestions": frame.get("Action Suggestions"),
        "traffic_reg_suggestions": frame.get("Traffic Regulations Suggestions"),
        "nodes": frame.get("graph", {}).get("nodes", []),
        "edges": frame.get("graph", {}).get("edges", [])
    }

def generate_caption_and_maneuver_prompt(frame_info):
    """
    Build a prompt asking Gemini to provide:
      - An improved, context-rich caption summarizing the situation.
      - A recommended maneuver that is goal-oriented and safe.
    """
    # Prepare objects info
    objects_descriptions = []
    for node_data in frame_info["nodes"]:
        if len(node_data) == 2:
            obj_id, obj_content = node_data
            obj_name = obj_content.get("obj_name", "unknown object")
            obj_type = obj_content.get("object_type", "")
            status = ", ".join(obj_content.get("Status", [])) or "N/A"
            object_safety = ", ".join(obj_content.get("Object_Safety", [])) or "N/A"
            positions = ", ".join(obj_content.get("position", [])) or "N/A"
            importance = obj_content.get("importance_ranking", "N/A")

            objects_descriptions.append(
                f"- ID: {obj_id}, Name: {obj_name}, Type: {obj_type}, "
                f"Status: {status}, Safety: {object_safety}, "
                f"Positions: {positions}, Importance: {importance}"
            )
    objects_text = "\n".join(objects_descriptions) if objects_descriptions else "No objects detected."

    # Prepare relations info
    if frame_info["edges"]:
        relations_text = "Relations:\n" + "\n".join(
            [f"- {src} --> {tgt}: {rel_data.get('relation', '')}" 
             for (src, tgt, rel_data) in frame_info["edges"] if len(rel_data) > 0]
        )
    else:
        relations_text = "No explicit relations found."
    
    # Build the user message for Gemini
    user_prompt = f"""
You are an expert in autonomous vehicles, traffic scenarios, and driving environments.

Below is a scenario extracted from a vehicle's video feed. 
Please produce:
1. 'caption' A refined, context-rich 'caption' summarizing the scene clearly.
2. 'maneuver' recommendation that helps the vehicle remain safe and aligned with its goals.
-----------------------
SAFE: {frame_info['safe']}
GOAL-ORIENTED: {frame_info['goal_oriented']}
ACTION SUGGESTIONS: {frame_info['action_suggestions']}
TRAFFIC REGULATIONS SUGGESTIONS: {frame_info['traffic_reg_suggestions']}

SPEED: {frame_info['speed']}
STEERING: {frame_info['steering']}

OBJECTS IN SCENE:
{objects_text}
{relations_text}
-----------------------
Please return valid JSON with exactly key:
"caption" (a string),
"maneuver" (a string),

No extra commentaryâ€”only JSON.
    """.strip()

    return user_prompt

###############################################################################
#                              API CALL LOGIC                                 #
###############################################################################

def call_gemini_api(prompt):
    """
    Call the Gemini API to generate a response for the given prompt.
    Returns the response text.
    """
    response = genai.GenerativeModel(MODEL).generate_content(
        prompt,
        generation_config=GENERATION_CONFIG
    )
    return response.text

###############################################################################
#                                  MAIN LOGIC                                  #
###############################################################################

def main():
    # Process all JSON files in the input directory
    json_files = glob(os.path.join(INPUT_DIR, "*.json"))
    for json_file in json_files:
        print(f"Processing file: {json_file}")
        file_name_without_ext = os.path.splitext(os.path.basename(json_file))[0]
        output_file_name = f"output_{file_name_without_ext}.json"
        output_file_path = os.path.join(OUTPUT_DIR, output_file_name)

        # Load the input JSON
        with open(json_file, "r", encoding="utf-8") as f:
            data = json.load(f)

        # Iterate over frames
        for frame_idx, frame in enumerate(tqdm(data, desc="Frames")):
            # Extract scenario info
            frame_info = parse_frame(frame)

            # Generate the prompt for improved caption and maneuver
            user_prompt = generate_caption_and_maneuver_prompt(frame_info)

            # Call Gemini to get the improved caption and maneuver
            response = call_gemini_api(user_prompt)

            # Try parsing the JSON response
            try:
                result_json = json.loads(response)
                # Overwrite the frame's caption and maneuver with the improved ones
                frame["caption"] = result_json.get("caption", "")
                frame["maneuver"] = result_json.get("maneuver", "")
            except json.JSONDecodeError:
                print("DEBUG: Failed to decode JSON. Response was:")
                print(response)
                # If parsing fails, set them to empty strings
                frame["caption"] = ""
                frame["maneuver"] = ""

            # Small delay to avoid hitting rate limits too quickly
            time.sleep(0.5)

        # Save output to a new JSON file
        with open(output_file_path, "w", encoding="utf-8") as out_f:
            json.dump(data, out_f, ensure_ascii=False, indent=2)

        print(f"Saved processed frames with improved caption and maneuver to: {output_file_path}")

if __name__ == "__main__":
    main()
