In [1]:
# imports
import os
import json
import time
import concurrent.futures
from pathlib import Path
import threading
import requests

import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from ai_tools.tools import LLMQuery
from pokemon_tools import PokemonAPIClient 

In [2]:
SYSTEM_PROMPT = """You are an expert Pokémon Data Analyst and Lorekeeper. Your task is to process raw JSON data into a **semantically dense, natural language profile** optimized for Vector Search retrieval (RAG).

### Core Instruction:
Use the provided JSON as the **primary source** for facts (Stats, Moves, Evolution), but use your **World Knowledge** to expand on behavior, competitive viability, and lore. The goal is to maximize the number of relevant searchable keywords (moves, abilities, behaviors) in the output.

### STRICT Output Constraints:
1.  **NO Conversational Filler:** Start immediately with the Markdown headers. Do not say "Here is the profile."
2.  **Naturalize the ID:** Include the Pokémon's ID (e.g., #006) naturally in the first sentence of the Identity section.
3.  **Omit Empty Data:** If a field is empty (e.g., `held_items` or `forms`), skip it entirely.
4.  **Format:** Use Markdown headers (`##`) for sections.

### Processing Guidelines:

**1. Identity & Lore (Expand this)**
* **Variant Detection:** Check the `name` field. If it contains suffixes like `-alola`, `-galar`, `-hisui`, or `-paldea`, explicitly name it as the **[Region]n Form** (e.g., convert `vulpix-alola` to "Alolan Vulpix").
* **Bio Synthesis:** Combine `genus`, `flavor_text`, `habitat`, `generation`, and `color` into a cohesive summary.
* **Form Potential (World Knowledge):** Explicitly mention if this Pokémon has **Mega Evolutions**, **Gigantamax forms**, or **Paradox forms**, even if they are not listed in the JSON.
* **Context:** Mention the specific region associated with the generation (e.g., "Generation I (Kanto)").

**2. Combat Profile (Detailed Analysis)**
* **Stat Block:** You **MUST** list the specific Base Stats in a list format using these standard abbreviations: **HP, Attack, Defense, Sp. Atk, Sp. Def, Speed**.
* **Archetype:** Analyze the stats to assign a competitive role (e.g., "Fast Special Sweeper," "Physical Tank," "Mixed Attacker").
* **Type Matchups:**
    * Explicitly list **Weaknesses** and **Resistances**.
    * **Logic Check:** If a Pokémon has a double weakness (e.g., Fire/Flying vs. Rock), explicitly label it as a **"Double Weakness (4x)."**
* **Smart Move Selection:** Select 4 specific moves that align with the Pokémon's **highest offensive stat**. (e.g., If Sp. Atk > Attack, list *Flamethrower*, not *Fire Punch*). Include Utility moves (Recover, Roost) if relevant.
* **Abilities:** Name the abilities and briefly explain their tactical effect.

**3. Evolution & Acquisition**
* **Detailed Chain:** Narrate the full evolutionary line.
* **Triggers:** You must specify the **method** found in `evolution_details` (e.g., "when exposed to a Thunder Stone," "leveled up with High Friendship," "at Level 36").
* **Breeding:** Mention Egg Groups and Hatch Counters if relevant.

**4. Evolution & Biology**
* **Detailed Chain:** Narrate the full evolutionary line.
* **Triggers:** You must specify the **method** found in `evolution_details` (e.g., "when exposed to a Thunder Stone," "leveled up with High Friendship," "at Level 36").
* **Breeding:** Mention Egg Groups and Hatch Counters if relevant.

---

### Required Output Structure:

**## Identity & Lore**
[Name] (Pokedex #[ID]) is the [Genus]... [Detailed Description of appearance, habitat, and lore]... [Mention of Mega/G-Max forms if applicable]...

**## Combat Profile**
* **Stats:** HP [Val], Attack [Val], Defense [Val], Sp. Atk [Val], Sp. Def [Val], Speed [Val].
* **Archetype:** [Role Name] (e.g., Fast Special Sweeper).
* **Type Matchups:** It is **weak to [Types]** (mentioning any 4x weaknesses) and **resists [Types]**.
* **Move Selection:** Capitalizing on its stats, key moves include **[Move 1], [Move 2], [Move 3], and [Move 4]**.
* **Abilities:** [Ability Name] ([Effect description])...

**## Evolution & Biology**
[Name] is part of a [Number]-stage evolutionary line. It evolves from [Pre-evo] when [Condition]... It belongs to the [Egg Group] group...

---
**Input Data:**
[JSON Data provided in the user prompt]
"""

In [None]:
LIST_CACHE_FILE = Path("pokemon_list.json")

def get_all_pokemon():
    if LIST_CACHE_FILE.exists():
        print("Loading Pokemon list from cache...")
        try:
            with open(LIST_CACHE_FILE, "r") as f:
                return json.load(f)
        except Exception as e:
            print(f"Error reading cache, fetching fresh list. Error: {e}")

    print("Fetching list of all Pokemon from API...")
    url = "https://pokeapi.co/api/v2/pokemon?limit=10000"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        pokemon_list = [p["name"] for p in data["results"]]
        
        # Save to cache
        with open(LIST_CACHE_FILE, "w") as f:
            json.dump(pokemon_list, f)
        print(f"Saved {len(pokemon_list)} Pokemon to {LIST_CACHE_FILE}")
        return pokemon_list
    except Exception as e:
        print(f"Error fetching Pokemon list: {e}")
        return []

len(get_all_pokemon())

In [None]:


# Configuration
OUTPUT_DIR = Path("raw/pokemon")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
MODEL_NAME = "gemini-3-flash-preview"
MAX_WORKERS = 10
ABORT_LIMIT = 20

# Global state
error_count = 0
error_lock = threading.Lock()
abort_event = threading.Event()



def generate_pokemon_data(pokemon_name):
    global error_count
    
    if abort_event.is_set():
        return

    # Optimistic existing file check
    existing = list(OUTPUT_DIR.glob(f"*_{pokemon_name}.md"))
    if existing:
        # print(f"Skipping {pokemon_name} (File exists)")
        return

    # Initialize Clients (Thread-safe)
    pokemon_client = PokemonAPIClient(enable_cache=False)
    llm_client = LLMQuery(model=MODEL_NAME, system_prompt=SYSTEM_PROMPT)
    
    time.sleep(0.5)
    
    try:
        print(f"Processing {pokemon_name}...")
        details = pokemon_client.get_pokemon_details(pokemon_name)
        if "error" in details:
            raise Exception(f"API Error: {details.get('error')}")
            
        type_info = []
        for t in details.get("types", []):
            t_info = pokemon_client.get_type_info(t)
            if "error" not in t_info:
                type_info.append(t_info)

        context = {"pokemon_details": details, "type_info": type_info}
        user_prompt = f"Here is the data for {pokemon_name}:\n```json\n{json.dumps(context, indent=2)}\n```"
        
        response = None
        for attempt in range(3):
            try:
                response = llm_client.query(user_prompt=user_prompt, use_history=False)
                if response: break
            except Exception as e:
                if attempt == 2: raise e
                time.sleep(2 ** attempt)

        p_id = details.get("id", 0)
        file_prefix = f"{p_id:04d}_{pokemon_name}"
        
        with open(OUTPUT_DIR / f"{file_prefix}.json", "w", encoding="utf-8") as f:
            json.dump(context, f, indent=2)
        with open(OUTPUT_DIR / f"{file_prefix}.md", "w", encoding="utf-8") as f:
            f.write(response)
            
        print(f"Saved {file_prefix}.md")
        
    except Exception as e:
        with error_lock:
            error_count += 1
            print(f"Error processing {pokemon_name}: {e} (Count: {error_count})")
            if error_count >= ABORT_LIMIT:
                print("Aborting generation due to error limit.")
                abort_event.set()

# Main Execution
try:
    POKEMON_LIST = get_all_pokemon()
    if POKEMON_LIST:
        print(f"Starting generation for {len(POKEMON_LIST)} Pokemon with {MAX_WORKERS} threads...")
        with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
            list(executor.map(generate_pokemon_data, POKEMON_LIST))
    else:
        print("No Pokemon found to process.")
    print("Generation complete.")
except KeyboardInterrupt:
    print("Generation interrupted.")
except Exception as e:
    print(f"Fatal error: {e}")

In [16]:
SYSTEM_PROMPT_MOVES = """You are an expert Pokémon Battle Mechanics Analyst. Your task is to process raw JSON data for a specific **Move** into a **semantically dense, comprehensive profile** optimized for Vector Search retrieval (RAG).

### Core Instruction:
Use the JSON as the **primary source** for stats, effects, and the learner list. Use **World Knowledge** to interpret competitive viability, compare it to rival moves, and **expand on the visual/thematic description** to ensure the text is rich with searchable keywords.

### STRICT Output Constraints:
1.  **No Chatter:** Start immediately with the Markdown headers.
2.  **Smart Filtering:** Do NOT list every Pokémon in `learned_by`. Filter the list to **Prominent Users** (Starters, Legendaries, Competitive Staples).
3.  **Terminology:** Use standard competitive terms (STAB, Coverage, Check, Nuke, Pivot, Wallbreaker).

### Processing Guidelines:

**1. Identity & Flavor (Enriched)**
* **Basics:** Combine `name`, `type`, `damage_class`, and `generation`.
* **Visual Description:** Combine the provided `flavor_text` with World Knowledge to describe the **visual execution** of the move. (e.g., "The user clenches its fist to deliver a high-impact blow..."). *Constraint: Describe only the visual, do not invent new mechanics.*
* **Mechanics:** Interpret `effect_description`. Explicitly label **Secondary Effects** (e.g., "10% chance to Burn") and **Mechanics** (e.g., "Bypasses Accuracy," "Sound-based").

**2. Competitive Profile**
* **Stat Block:** List **Power, Accuracy, PP** in a bullet point.
* **Archetype:** Assign a role based on stats:
    * *Power > 100 / Low Acc:* "High-Risk Nuke" / "Wallbreaker."
    * *Power 80-95 / 100 Acc:* "Reliable STAB" / "Consistency Option."
    * *Status Move:* "Utility" / "Setup."
    * *Outclassed:* If the move is strictly worse than another (e.g., Mega Punch vs Return), label it as "Early-game Filler" or "Niche."
* **Context:** Compare it to its main rival (e.g., Flamethrower vs. Fire Blast).
* **Attributes:** Mention if the move makes **Contact** or has tags like **Punching, Biting, Sound, or Slice**. Explicitly name Abilities that interact with it (e.g., "Boosted by Iron Fist").

**3. Distribution & Users**
* **Learner Analysis:** Analyze the `learned_by` list.
    * **Prominent Users:** 2-3 iconic STAB users.
    * **Synergy Users:** **Crucial.** If the move interacts with an Ability (Iron Fist, Strong Jaw, etc.), list Pokémon with that ability who learn it.
    * **Coverage Users:** Non-STAB users who carry it for specific threats.
* **Acquisition:** Mention if it is a common **TM, TR, or Move Tutor** move.

---

### Required Output Structure:

**## Identity & Mechanics**
**[Move Name]** is a [Damage Class] [Type]-type move introduced in [Generation].
* **Description:** [Expanded visual/thematic description for RAG searchability].
* **Effect:** [Detailed description].
* **Secondary Effect:** [Chance]% chance to [Status/Stat Drop] (or "None").
* **Targeting:** [Single/Spread/Self].

**## Competitive Profile**
* **Stats:** Power [Val], Accuracy [Val]%, PP [Val].
* **Role:** [Archetype] (e.g., Reliable STAB / Niche).
* **Usage:** [Comparison to rivals]. It deals **Super Effective** damage to **[Weakness Types]** types.
* **Attributes:** [Contact/Sound/Punching tags]. [Interaction with Abilities if applicable].

**## Distribution & Notable Users**
It has a [Wide/Narrow] distribution ([Total] learners).
* **Prominent Users:** Commonly utilized by **[User 1]**, **[User 2]**, and **[User 3]**.
* **Synergy Users:** [If applicable, users with boosting Abilities like Iron Fist/Strong Jaw].
* **Coverage Utility:** Frequently seen on non-[Type] Pokémon like **[User 4]** to counter [Counter-Type] threats.
* **Acquisition:** Widely available via **[TM/Tutor]** in [Generations].

---
**Input Data:**
[Provided by User]"""

In [17]:
MOVES_LIST_CACHE_FILE = Path("moves_list.json")

def get_all_moves():
    if MOVES_LIST_CACHE_FILE.exists():
        print("Loading Moves list from cache...")
        try:
            with open(MOVES_LIST_CACHE_FILE, "r") as f:
                return json.load(f)
        except Exception as e:
            print(f"Error reading cache, fetching fresh list. Error: {e}")

    print("Fetching list of all Moves from API...")
    url = "https://pokeapi.co/api/v2/move?limit=10000"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        moves_list = [m["name"] for m in data["results"]]
        
        # Save to cache
        with open(MOVES_LIST_CACHE_FILE, "w") as f:
            json.dump(moves_list, f)
        print(f"Saved {len(moves_list)} Moves to {MOVES_LIST_CACHE_FILE}")
        return moves_list
    except Exception as e:
        print(f"Error fetching Move list: {e}")
        return []


In [19]:

MOVES_OUTPUT_DIR = Path("raw/moves")
MOVES_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "gemini-3-flash-preview"
MAX_WORKERS = 10
ABORT_LIMIT = 20

# Global state for moves
moves_error_count = 0
moves_error_lock = threading.Lock()
moves_abort_event = threading.Event()



def generate_move_data(move_name):
    global moves_error_count
    
    if moves_abort_event.is_set():
        return

    # Optimistic existing file check
    existing = list(MOVES_OUTPUT_DIR.glob(f"*_{move_name}.md"))
    if existing:
        # print(f"Skipping {move_name} (File exists)")
        return

    # Initialize Clients (Thread-safe)
    pokemon_client = PokemonAPIClient(enable_cache=False)
    llm_client = LLMQuery(model=MODEL_NAME, system_prompt=SYSTEM_PROMT_MOVES)
    
    time.sleep(0.5)
    
    try:
        print(f"Processing Move: {move_name}...")
        details = pokemon_client.get_move_details(move_name, learned_by_limit=-1)
        if "error" in details:
            raise Exception(f"API Error: {details.get('error')}")
            
        context = {"move_details": details}
        user_prompt = f"Here is the data for {move_name}:\n```json\n{json.dumps(context, indent=2)}\n```"
        
        response = None
        for attempt in range(3):
            try:
                response = llm_client.query(user_prompt=user_prompt, use_history=False)
                if response: break
            except Exception as e:
                if attempt == 2: raise e
                time.sleep(2 ** attempt)

        m_id = details.get("id", 0)
        file_prefix = f"{m_id:04d}_{move_name}"
        
        with open(MOVES_OUTPUT_DIR / f"{file_prefix}.json", "w", encoding="utf-8") as f:
            json.dump(context, f, indent=2)
        with open(MOVES_OUTPUT_DIR / f"{file_prefix}.md", "w", encoding="utf-8") as f:
            f.write(response)
            
        print(f"Saved {file_prefix}.md")
        
    except Exception as e:
        with moves_error_lock:
            moves_error_count += 1
            print(f"Error processing {move_name}: {e} (Count: {moves_error_count})")
            if moves_error_count >= ABORT_LIMIT:
                print("Aborting moves generation due to error limit.")
                moves_abort_event.set()

# Main Execution for Moves
try:
    MOVES_LIST = get_all_moves()
    if MOVES_LIST:
        print(f"Starting generation for {len(MOVES_LIST)} Moves with {MAX_WORKERS} threads...")
        with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
            list(executor.map(generate_move_data, MOVES_LIST))
    else:
        print("No Moves found to process.")
    print("Moves Generation complete.")
except KeyboardInterrupt:
    print("Moves Generation interrupted.")
except Exception as e:
    print(f"Fatal error in Moves Generation: {e}")

Loading Moves list from cache...
Starting generation for 937 Moves with 10 threads...
Processing Move: pay-day...
Processing Move: fire-punch...
Processing Move: vice-grip...
Processing Move: scratch...
Processing Move: ice-punch...
Processing Move: thunder-punch...
Processing Move: guillotine...
Processing Move: cut...
Processing Move: swords-dance...
Processing Move: razor-wind...
Saved 0014_swords-dance.md
Processing Move: gust...
Saved 0011_vice-grip.md
Saved 0008_ice-punch.md
Saved 0015_cut.md
Saved 0006_pay-day.md
Processing Move: wing-attack...
Saved 0007_fire-punch.md
Processing Move: whirlwind...
Processing Move: fly...Processing Move: bind...

Processing Move: slam...
Saved 0010_scratch.md
Saved 0012_guillotine.md
Processing Move: vine-whip...
Saved 0013_razor-wind.md
Processing Move: stomp...
Processing Move: double-kick...
Saved 0009_thunder-punch.md
Processing Move: mega-kick...
Saved 0016_gust.md
Saved 0017_wing-attack.md
Processing Move: jump-kick...
Processing Move: rol