In [None]:
# imports
import os
import json
import time
import concurrent.futures
from pathlib import Path
from dotenv import load_dotenv
import gradio as gr
from IPython.display import Markdown, display

import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from ai_tools.tools import LLMQuery, handle_tool_call
from pokemon_tools import PokemonAPIClient, TOOLS 

In [12]:
SYSTEM_PROMPT = """You are an expert Pokémon Data Analyst and Lorekeeper. Your task is to process raw JSON data into a **semantically dense, natural language profile** optimized for Vector Search retrieval (RAG).

### Core Instruction:
Use the provided JSON as the **primary source** for facts (Stats, Moves, Evolution), but use your **World Knowledge** to expand on behavior, competitive viability, and lore. The goal is to maximize the number of relevant searchable keywords (moves, abilities, behaviors) in the output.

### STRICT Output Constraints:
1.  **NO Conversational Filler:** Start immediately with the Markdown headers. Do not say "Here is the profile."
2.  **Naturalize the ID:** Include the Pokémon's ID (e.g., #006) naturally in the first sentence of the Identity section.
3.  **Omit Empty Data:** If a field is empty (e.g., `held_items` or `forms`), skip it entirely.
4.  **Format:** Use Markdown headers (`##`) for sections.

### Processing Guidelines:

**1. Identity & Lore (Expand this)**
* **Variant Detection:** Check the `name` field. If it contains suffixes like `-alola`, `-galar`, `-hisui`, or `-paldea`, explicitly name it as the **[Region]n Form** (e.g., convert `vulpix-alola` to "Alolan Vulpix").
* **Bio Synthesis:** Combine `genus`, `flavor_text`, `habitat`, `generation`, and `color` into a cohesive summary.
* **Form Potential (World Knowledge):** Explicitly mention if this Pokémon has **Mega Evolutions**, **Gigantamax forms**, or **Paradox forms**, even if they are not listed in the JSON.
* **Context:** Mention the specific region associated with the generation (e.g., "Generation I (Kanto)").

**2. Combat Profile (Detailed Analysis)**
* **Stat Block:** You **MUST** list the specific Base Stats in a list format using these standard abbreviations: **HP, Attack, Defense, Sp. Atk, Sp. Def, Speed**.
* **Archetype:** Analyze the stats to assign a competitive role (e.g., "Fast Special Sweeper," "Physical Tank," "Mixed Attacker").
* **Type Matchups:**
    * Explicitly list **Weaknesses** and **Resistances**.
    * **Logic Check:** If a Pokémon has a double weakness (e.g., Fire/Flying vs. Rock), explicitly label it as a **"Double Weakness (4x)."**
* **Smart Move Selection:** Select 4 specific moves that align with the Pokémon's **highest offensive stat**. (e.g., If Sp. Atk > Attack, list *Flamethrower*, not *Fire Punch*). Include Utility moves (Recover, Roost) if relevant.
* **Abilities:** Name the abilities and briefly explain their tactical effect.

**3. Evolution & Acquisition**
* **Detailed Chain:** Narrate the full evolutionary line.
* **Triggers:** You must specify the **method** found in `evolution_details` (e.g., "when exposed to a Thunder Stone," "leveled up with High Friendship," "at Level 36").
* **Breeding:** Mention Egg Groups and Hatch Counters if relevant.

---

### Required Output Structure:

**## Identity & Lore**
[Name] (Pokedex #[ID]) is the [Genus]... [Detailed Description of appearance, habitat, and lore]... [Mention of Mega/G-Max forms if applicable]...

**## Combat Profile**
* **Stats:** HP [Val], Attack [Val], Defense [Val], Sp. Atk [Val], Sp. Def [Val], Speed [Val].
* **Archetype:** [Role Name] (e.g., Fast Special Sweeper).
* **Type Matchups:** It is **weak to [Types]** (mentioning any 4x weaknesses) and **resists [Types]**.
* **Move Selection:** Capitalizing on its stats, key moves include **[Move 1], [Move 2], [Move 3], and [Move 4]**.
* **Abilities:** [Ability Name] ([Effect description])...

**## Evolution & Biology**
[Name] is part of a [Number]-stage evolutionary line. It evolves from [Pre-evo] when [Condition]... It belongs to the [Egg Group] group...

---
**Input Data:**
[JSON Data Here]
"""

In [13]:
# Configuration
POKEMON_LIST = ["charizard", "bulbasaur"]
OUTPUT_DIR = Path("raw")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
MODEL_NAME = "gemini-3-pro-preview"

# Initialize Clients
pokemon_client = PokemonAPIClient()
llm_client = LLMQuery(model=MODEL_NAME , system_prompt=SYSTEM_PROMPT)

def generate_pokemon_data(pokemon_name):

    # Rate limit friendly
    time.sleep(1)
    try:
        print(f"Processing {pokemon_name}...")
        
        # 1. Fetch Data
        details = pokemon_client.get_pokemon_details(pokemon_name)
        if "error" in details:
            print(f"Error fetching details for {pokemon_name}: {details['error']}")
            return

        # 2. Enrich with Type Info
        type_info = []
        for t in details.get("types", []):
            t_info = pokemon_client.get_type_info(t)
            if "error" not in t_info:
                type_info.append(t_info)
        
        # Prepare Context
        context = {
            "pokemon_details": details,
            "type_info": type_info
        }
        
        # 3. Generate Content
        # We start a new query with the system prompt and the data
        user_prompt = f"Here is the data for {pokemon_name}:\n```json\n{json.dumps(context, indent=2)}\n```"
        
        response = llm_client.query(user_prompt=user_prompt, use_history=False)
        
        # 4. Save Files
        # Pokemon ID for filename
        p_id = details.get("id", 0)
        file_prefix = f"{p_id:04d}_{pokemon_name}"
        
        # Save JSON Context
        json_path = OUTPUT_DIR / f"{file_prefix}.json"
        with open(json_path, "w", encoding="utf-8") as f:
            json.dump(context, f, indent=2)
            
        # Save MD Response
        md_path = OUTPUT_DIR / f"{file_prefix}.md"
        with open(md_path, "w", encoding="utf-8") as f:
            f.write(response)
            
        print(f"Successfully processed {pokemon_name}. Saved to {OUTPUT_DIR}")
        
    except Exception as e:
        print(f"Failed to process {pokemon_name}: {str(e)}")

# Parallel Execution
print("Starting generation...")
# Using modest concurrency to avoid rate limits
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    executor.map(generate_pokemon_data, POKEMON_LIST)
print("Generation complete.")

Starting generation...
Processing charizard...Processing bulbasaur...

Successfully processed bulbasaur. Saved to raw
Successfully processed charizard. Saved to raw
Generation complete.


In [14]:
llm_client.chat_history

[{'role': 'user',
  'content': 'Here is the data for bulbasaur:\n```json\n{\n  "pokemon_details": {\n    "id": 1,\n    "name": "bulbasaur",\n    "stats": {\n      "hp": 45,\n      "attack": 49,\n      "defense": 49,\n      "special-attack": 65,\n      "special-defense": 65,\n      "speed": 45\n    },\n    "types": [\n      "grass",\n      "poison"\n    ],\n    "height_m": 0.7,\n    "weight_kg": 6.9,\n    "abilities": [\n      {\n        "name": "overgrow",\n        "is_hidden": false,\n        "slot": 1\n      },\n      {\n        "name": "chlorophyll",\n        "is_hidden": true,\n        "slot": 3\n      }\n    ],\n    "forms": [\n      "bulbasaur"\n    ],\n    "moves": [\n      "razor-wind",\n      "swords-dance",\n      "cut",\n      "bind",\n      "vine-whip",\n      "headbutt",\n      "tackle",\n      "body-slam",\n      "take-down",\n      "double-edge",\n      "growl",\n      "strength",\n      "mega-drain",\n      "leech-seed",\n      "growth",\n      "razor-leaf",\n      "sol