In [None]:
import requests
import pandas as pd
import time
import json
from datetime import datetime
from typing import List, Dict, Optional
from dotenv import load_dotenv
import os

## Imports

# Riot API Scraper for NattyNatt Rengar Analysis

This notebook scrapes League of Legends match data from the Riot API for detailed gameplay analysis.

In [None]:
load_dotenv()

API_KEY = os.getenv("RIOT_API_KEY")
if not API_KEY:
    raise ValueError("RIOT_API_KEY not found in .env file")

SUMMONER_NAME = "NattyNatt"
SUMMONER_TAG = "RANK1"
REGION = "kr"
ROUTING_REGION = "asia"

BASE_URL_SUMMONER = f"https://{REGION}.api.riotgames.com"
BASE_URL_MATCH = f"https://{ROUTING_REGION}.api.riotgames.com"
BASE_URL_ACCOUNT = f"https://{ROUTING_REGION}.api.riotgames.com"

RATE_LIMIT_PER_1_SEC = 20
RATE_LIMIT_PER_2_MIN = 100
request_times = []

## Configuration

Load API credentials and set up rate limiting parameters.

In [None]:
def rate_limit_check():
    current_time = time.time()
    
    request_times[:] = [t for t in request_times if current_time - t < 120]
    
    requests_last_second = [t for t in request_times if current_time - t < 1]
    if len(requests_last_second) >= RATE_LIMIT_PER_1_SEC:
        oldest_in_window = requests_last_second[0]
        sleep_time = 1.01 - (current_time - oldest_in_window)
        if sleep_time > 0:
            print(f"1-second rate limit reached ({len(requests_last_second)}/20). Sleeping for {sleep_time:.2f} seconds...")
            time.sleep(sleep_time)
            current_time = time.time()
            request_times[:] = [t for t in request_times if current_time - t < 120]
    
    if len(request_times) >= RATE_LIMIT_PER_2_MIN:
        oldest_request = request_times[0]
        sleep_time = 120.01 - (current_time - oldest_request)
        if sleep_time > 0:
            print(f"2-minute rate limit reached ({len(request_times)}/100). Sleeping for {sleep_time:.1f} seconds...")
            time.sleep(sleep_time)
            current_time = time.time()
            request_times[:] = [t for t in request_times if current_time - t < 120]
    
    request_times.append(current_time)

In [None]:
def get_match_details(match_id: str) -> Optional[Dict]:
    url = f"{BASE_URL_MATCH}/lol/match/v5/matches/{match_id}"
    headers = {"X-Riot-Token": API_KEY}
    
    rate_limit_check()
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        return response.json()
    elif response.status_code == 404:
        return None
    else:
        print(f"Error getting match {match_id}: {response.status_code}")
        return None

def get_match_timeline(match_id: str) -> Optional[Dict]:
    url = f"{BASE_URL_MATCH}/lol/match/v5/matches/{match_id}/timeline"
    headers = {"X-Riot-Token": API_KEY}
    
    rate_limit_check()
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        return response.json()
    elif response.status_code == 404:
        return None
    else:
        print(f"Error getting timeline {match_id}: {response.status_code}")
        return None

In [None]:
def extract_gank_timings(timeline: Dict, participant_id: int, max_time: int = 300) -> Dict:
    frames = timeline.get("info", {}).get("frames", [])
    
    gank_times = []
    
    for frame in frames:
        timestamp = frame.get("timestamp", 0) / 1000
        if timestamp > max_time:
            break
            
        events = frame.get("events", [])
        for event in events:
            if event.get("type") == "CHAMPION_KILL":
                if event.get("killerId") == participant_id or participant_id in event.get("assistingParticipantIds", []):
                    gank_times.append(timestamp)
    
    first_gank_time = gank_times[0] if gank_times else None
    
    return {
        "first_gank_time": first_gank_time,
        "ganks_by_5min": len([t for t in gank_times if t <= 300])
    }

In [None]:
def extract_skill_order(timeline: Dict, participant_id: int) -> Dict:
    frames = timeline.get("info", {}).get("frames", [])
    
    skill_order = []
    
    for frame in frames:
        events = frame.get("events", [])
        
        for event in events:
            if event.get("type") == "SKILL_LEVEL_UP":
                if event.get("participantId") == participant_id:
                    skill_slot = event.get("skillSlot")
                    skill_map = {1: "Q", 2: "W", 3: "E", 4: "R"}
                    skill = skill_map.get(skill_slot, str(skill_slot))
                    skill_order.append(skill)
    
    skill_counts = {"Q": 0, "W": 0, "E": 0}
    max_order = []
    
    for skill in skill_order[:9]:
        if skill in skill_counts:
            skill_counts[skill] += 1
            if skill_counts[skill] == 5 and skill not in max_order:
                max_order.append(skill)
    
    return {
        "skill_order": "->".join(skill_order[:18]) if skill_order else None,
        "max_order": "->".join(max_order) if max_order else None,
    }

In [None]:
def extract_item_timeline(timeline: Dict, participant_id: int) -> Dict:
    frames = timeline.get("info", {}).get("frames", [])
    
    item_purchases = []
    control_wards_bought = 0
    
    for frame in frames:
        timestamp = frame.get("timestamp", 0) / 1000
        events = frame.get("events", [])
        
        for event in events:
            if event.get("type") == "ITEM_PURCHASED" and event.get("participantId") == participant_id:
                item_id = event.get("itemId")
                item_purchases.append(f"{item_id}@{int(timestamp)}s")
                
                if item_id == 2055:
                    control_wards_bought += 1
    
    build_order = "->".join(item_purchases[:15]) if item_purchases else None
    
    mythic_time = None
    mythic_ids = {6630, 6631, 6632, 6653, 6655, 6656, 6662, 6664, 6665, 6671, 6672, 6673, 6691, 6692, 6693}
    for purchase in item_purchases:
        item_info = purchase.split("@")
        if int(item_info[0]) in mythic_ids:
            mythic_time = item_info[1]
            break
    
    return {
        "item_build_order": build_order,
        "mythic_completion_time": mythic_time,
        "control_wards_bought": control_wards_bought,
        "total_items_purchased": len(item_purchases)
    }

In [None]:
def extract_runes_and_spells(participant: Dict) -> Dict:
    perks = participant.get("perks", {})
    styles = perks.get("styles", [])
    
    primary_style = styles[0] if len(styles) > 0 else {}
    secondary_style = styles[1] if len(styles) > 1 else {}
    
    primary_selections = primary_style.get("selections", [])
    secondary_selections = secondary_style.get("selections", [])
    
    stat_perks = perks.get("statPerks", {})
    
    return {
        "primary_tree": primary_style.get("style"),
        "keystone": primary_selections[0].get("perk") if len(primary_selections) > 0 else None,
        "rune_1": primary_selections[1].get("perk") if len(primary_selections) > 1 else None,
        "rune_2": primary_selections[2].get("perk") if len(primary_selections) > 2 else None,
        "rune_3": primary_selections[3].get("perk") if len(primary_selections) > 3 else None,
        "secondary_tree": secondary_style.get("style"),
        "rune_4": secondary_selections[0].get("perk") if len(secondary_selections) > 0 else None,
        "rune_5": secondary_selections[1].get("perk") if len(secondary_selections) > 1 else None,
        "stat_shard_1": stat_perks.get("defense"),
        "stat_shard_2": stat_perks.get("flex"),
        "stat_shard_3": stat_perks.get("offense"),
        "summoner_1": participant.get("summoner1Id"),
        "summoner_2": participant.get("summoner2Id"),
    }

## Build and Runes Extraction

Functions to extract runes, summoner spells, item builds, and skill orders.

In [None]:
def extract_camp_sequence(timeline: Dict, participant_id: int, max_time: int = 300) -> Dict:
    frames = timeline.get("info", {}).get("frames", [])
    
    camp_kills = []
    monster_types = {
        "SRU_Baron": "BARON",
        "SRU_Dragon": "DRAGON",
        "SRU_RiftHerald": "HERALD",
        "SRU_Red": "RED_BUFF",
        "SRU_Blue": "BLUE_BUFF",
        "SRU_Gromp": "GROMP",
        "SRU_Murkwolf": "WOLVES",
        "SRU_Razorbeak": "RAPTORS",
        "SRU_Krug": "KRUGS",
        "SRU_Crab": "SCUTTLE"
    }
    
    for frame in frames:
        timestamp = frame.get("timestamp", 0) / 1000
        if timestamp > max_time:
            break
            
        events = frame.get("events", [])
        for event in events:
            if event.get("type") == "MONSTER_KILL":
                if event.get("killerId") == participant_id:
                    monster_type = event.get("monsterType", "")
                    monster_subtype = event.get("monsterSubType", "")
                    
                    camp_name = None
                    if monster_type in monster_types:
                        camp_name = monster_types[monster_type]
                    elif monster_subtype in monster_types:
                        camp_name = monster_types[monster_subtype]
                    
                    if camp_name:
                        camp_kills.append({
                            "camp": camp_name,
                            "timestamp": timestamp
                        })
    
    camp_kills.sort(key=lambda x: x["timestamp"])
    
    camp_sequence = "->".join([c["camp"] for c in camp_kills])
    first_camp = camp_kills[0]["camp"] if camp_kills else None
    first_camp_time = camp_kills[0]["timestamp"] if camp_kills else None
    
    camps_by_5min = len([c for c in camp_kills if c["timestamp"] <= 300])
    
    scuttle_kills = [c for c in camp_kills if c["camp"] == "SCUTTLE"]
    first_scuttle_time = scuttle_kills[0]["timestamp"] if scuttle_kills else None
    
    return {
        "camp_sequence": camp_sequence,
        "first_camp": first_camp,
        "first_camp_time": first_camp_time,
        "camps_cleared_by_5min": camps_by_5min,
        "first_scuttle_time": first_scuttle_time
    }

In [None]:
def extract_cs_at_minutes(timeline: Dict, participant_id: int, minutes: List[int] = [1,2,3,4,5]) -> Dict:
    frames = timeline.get("info", {}).get("frames", [])
    cs_data = {}
    
    for minute in minutes:
        frame_index = minute
        if frame_index < len(frames):
            frame = frames[frame_index]
            participant_frame = frame.get("participantFrames", {}).get(str(participant_id), {})
            cs_data[f"cs_at_{minute}min"] = participant_frame.get("minionsKilled", 0) + \
                                            participant_frame.get("jungleMinionsKilled", 0)
        else:
            cs_data[f"cs_at_{minute}min"] = None
    
    return cs_data

## Jungle-Specific Extraction Functions

Functions to extract jungle camp sequences, CS progression, and gank timings.

In [None]:
def get_match_ids(puuid: str, count: int = 100, queue_id: int = None, start_time: int = None) -> List[str]:
    all_match_ids = []
    start_index = 0
    batch_size = 100
    
    while True:
        url = f"{BASE_URL_MATCH}/lol/match/v5/matches/by-puuid/{puuid}/ids"
        params = {"start": start_index, "count": batch_size}
        if queue_id:
            params["queue"] = queue_id
        if start_time:
            params["startTime"] = start_time
        
        headers = {"X-Riot-Token": API_KEY}
        
        rate_limit_check()
        response = requests.get(url, headers=headers, params=params)
        
        if response.status_code == 200:
            match_ids = response.json()
            if not match_ids:
                break
            
            all_match_ids.extend(match_ids)
            print(f"  Fetched {len(match_ids)} match IDs (total: {len(all_match_ids)})")
            
            if len(match_ids) < batch_size:
                break
            
            if count and len(all_match_ids) >= count:
                all_match_ids = all_match_ids[:count]
                break
            
            start_index += batch_size
        else:
            print(f"Error getting match IDs: {response.status_code}")
            break
    
    return all_match_ids

In [None]:
def get_summoner_puuid(game_name: str, tag_line: str = None) -> Optional[str]:
    import urllib.parse
    
    if not tag_line:
        print("Tag line is required for Account-V1 API")
        return None
    
    encoded_game_name = urllib.parse.quote(game_name)
    encoded_tag = urllib.parse.quote(tag_line)
    url = f"{BASE_URL_ACCOUNT}/riot/account/v1/accounts/by-riot-id/{encoded_game_name}/{encoded_tag}"
    headers = {"X-Riot-Token": API_KEY}
    
    rate_limit_check()
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        data = response.json()
        puuid = data.get("puuid")
        print(f"✓ Found PUUID: {puuid[:8]}...")
        return puuid
    else:
        print(f"Error getting PUUID: {response.status_code}")
        try:
            error_data = response.json()
            print(f"Error details: {error_data}")
        except:
            print(f"Response text: {response.text}")
        return None

## Core API Functions

Functions to fetch player PUUID, match IDs, match details, and timelines.

## Rate Limiting

Manages API rate limits: 20 requests per second and 100 requests per 2 minutes.

In [None]:
print("="*80)
print("MAIN SCRAPER - Fetch All Matches with Comprehensive Data")
print("="*80)

csv_path = "data/nattynatt_rengar_matches.csv"

if os.path.exists(csv_path):
    existing_df = pd.read_csv(csv_path)
    print(f"\n✓ Match data already exists: {csv_path}")
    print(f"  - {len(existing_df)} matches found")
    print(f"  - Columns: {len(existing_df.columns)}")
    print("\nSkipping scraper. Delete the file to re-scrape.")
    print("Or set FORCE_RESCRAPE = True below to overwrite.\n")
    
    FORCE_RESCRAPE = False
    
    if not FORCE_RESCRAPE:
        print("✓ Using existing data. Proceed to Cell 4 for frame extraction!")
    else:
        print("⚠️  FORCE_RESCRAPE = True - Re-scraping all matches...")
        df = scrape_all_matches(SUMMONER_NAME, SUMMONER_TAG, months_back=4)
        os.makedirs("data", exist_ok=True)
        df.to_csv(csv_path, index=False)
        print(f"\n✓ Re-scraped and saved {len(df)} matches to {csv_path}")
else:
    print("\n⚠️  No existing data found. Starting scraper...")
    print("\nThis will scrape all matches from the last 4 months with:")
    print("  - Basic stats (KDA, CS, gold, damage)")
    print("  - Runes and summoner spells")  
    print("  - Item build timeline")
    print("  - Frame progression (gold/xp at 5/10/15/20 min)")
    print("  - Objectives (dragons, herald, baron, turrets)")
    print("  - Vision control")
    print("  - Positioning and pathing")
    print("  - Skill order")
    print("  - Combat stats")
    print("  - Team composition and matchups")
    print("\n⚠️  This will take a while and use API rate limits!\n")
    
    df = scrape_all_matches(SUMMONER_NAME, SUMMONER_TAG, months_back=4)
    
    os.makedirs("data", exist_ok=True)
    
    df.to_csv(csv_path, index=False)
    print(f"\n✓ Saved {len(df)} matches to {csv_path}")


MAIN SCRAPER - Fetch All Matches with Comprehensive Data

✓ Match data already exists: data/nattynatt_rengar_matches.csv
  - 317 matches found
  - Columns: 117

Skipping scraper. Delete the file to re-scrape.
Or set FORCE_RESCRAPE = True below to overwrite.

✓ Using existing data. Proceed to Cell 4 for frame extraction!


In [11]:
# Test extraction on a single match to verify all functions work
print("Testing new extraction functions on a single match...")

# Get PUUID
puuid = get_summoner_puuid(SUMMONER_NAME, SUMMONER_TAG)

if puuid:
    # Get first match
    print("Fetching a test match...")
    match_ids = get_match_ids(puuid, count=1, queue_id=420, start_time=None)
    
    if match_ids:
        test_match_id = match_ids[0]
        print(f"Testing with match: {test_match_id}")
        
        # Process the match
        match_result = process_match(test_match_id, puuid)
        
        if match_result:
            print(f"\n✓ Successfully extracted {len(match_result)} fields!")
            print("\nSample of new fields:")
            print(f"  - Primary tree: {match_result.get('primary_tree')}")
            print(f"  - Keystone: {match_result.get('keystone')}")
            print(f"  - Summoners: {match_result.get('summoner_1')}, {match_result.get('summoner_2')}")
            print(f"  - Gold at 10min: {match_result.get('gold_at_10min')}")
            print(f"  - Level at 10min: {match_result.get('level_at_10min')}")
            print(f"  - Dragons killed: {match_result.get('dragons_killed')}")
            print(f"  - Wards placed: {match_result.get('wards_placed')}")
            print(f"  - Kill participation: {match_result.get('kill_participation_pct')}%")
            print(f"  - Enemy jungle: {match_result.get('enemy_jungle')}")
            print(f"  - Ally top: {match_result.get('ally_top')}")
            print(f"  - Skill order: {match_result.get('skill_order')[:50]}..." if match_result.get('skill_order') else "  - Skill order: None")
            print(f"  - Item build: {match_result.get('item_build_order')[:80]}..." if match_result.get('item_build_order') else "  - Item build: None")
            print("\n✓ All extraction functions working correctly!")
        else:
            print("✗ Match processing returned None (might not be a Rengar game)")
    else:
        print("✗ No matches found")
else:
    print("✗ Failed to get PUUID")


Testing new extraction functions on a single match...
✓ Found PUUID: DefvgEBJ...
Fetching a test match...
  Fetched 100 match IDs (total: 100)
Testing with match: KR_7907383471

✓ Successfully extracted 117 fields!

Sample of new fields:
  - Primary tree: 8000
  - Keystone: 8010
  - Summoners: 4, 11
  - Gold at 10min: 4403
  - Level at 10min: 7
  - Dragons killed: 1
  - Wards placed: 3
  - Kill participation: 65.0%
  - Enemy jungle: Elise
  - Ally top: Olaf
  - Skill order: Q->W->E->Q->Q->R->Q->E->Q->E->R...
  - Item build: 1102@60s->3340@60s->2003@60s->1036@240s->1036@240s->3134@420s->6690@420s->3364@4...

✓ All extraction functions working correctly!


In [12]:
# Cell 4: Extract Frame-by-Frame Data for All Players
print("="*80)
print("FRAME-BY-FRAME DATA EXTRACTION (All 10 Players)")
print("="*80)
print("\nThis extracts minute-by-minute position, gold, XP, CS, and damage data")
print("for all 10 players in each match. Use for behavioral analysis and")
print("comparing NattyNatt vs enemy jungler pathing.\n")

# Read existing match IDs
matches_csv = "data/nattynatt_rengar_matches.csv"
if not os.path.exists(matches_csv):
    print(f"Error: {matches_csv} not found. Run Cell 2 first to scrape matches.")
else:
    matches_df = pd.read_csv(matches_csv)
    match_ids_to_process = matches_df['match_id'].tolist()
    
    print(f"Found {len(match_ids_to_process)} matches to process")
    print(f"Estimated frames: ~{len(match_ids_to_process) * 10 * 25} (10 players × 25 min avg)")
    print("\nStarting extraction...")
    print("-"*80)
    
    all_frames = []
    processed_count = 0
    error_count = 0
    start_time = time.time()
    
    # Get PUUID for target player
    puuid = get_summoner_puuid(SUMMONER_NAME, SUMMONER_TAG)
    
    if not puuid:
        print("Error: Could not get PUUID")
    else:
        for idx, match_id in enumerate(match_ids_to_process):
            try:
                # Fetch match data and timeline
                match_data = get_match_details(match_id)
                if not match_data:
                    print(f"[{idx+1}/{len(match_ids_to_process)}] ✗ {match_id}: No match data")
                    error_count += 1
                    continue
                
                timeline_data = get_match_timeline(match_id)
                if not timeline_data:
                    print(f"[{idx+1}/{len(match_ids_to_process)}] ✗ {match_id}: No timeline data")
                    error_count += 1
                    continue
                
                # Extract frames for all players
                frames = extract_all_frames_multi_player(match_data, timeline_data, puuid)
                all_frames.extend(frames)
                processed_count += 1
                
                # Progress update every 10 matches
                if (idx + 1) % 10 == 0:
                    elapsed = time.time() - start_time
                    avg_time = elapsed / (idx + 1)
                    remaining = avg_time * (len(match_ids_to_process) - idx - 1)
                    print(f"[{idx+1}/{len(match_ids_to_process)}] Processed: {processed_count} | "
                          f"Frames: {len(all_frames)} | "
                          f"ETA: {int(remaining/60)}m {int(remaining%60)}s")
                
            except Exception as e:
                print(f"[{idx+1}/{len(match_ids_to_process)}] ✗ {match_id}: Error - {str(e)}")
                error_count += 1
                continue
        
        # Convert to DataFrame and save
        if all_frames:
            frames_df = pd.DataFrame(all_frames)
            
            # Save to CSV
            output_path = "data/nattynatt_rengar_frames.csv"
            frames_df.to_csv(output_path, index=False)
            
            elapsed_total = time.time() - start_time
            print("\n" + "="*80)
            print("FRAME EXTRACTION COMPLETE")
            print("="*80)
            print(f"Processed matches: {processed_count}/{len(match_ids_to_process)}")
            print(f"Errors: {error_count}")
            print(f"Total frames extracted: {len(all_frames):,}")
            print(f"Unique players tracked: {frames_df['participant_id'].nunique()}")
            print(f"Total time: {int(elapsed_total/60)}m {int(elapsed_total%60)}s")
            print(f"\nSaved to: {output_path}")
            print(f"File size: {os.path.getsize(output_path) / (1024*1024):.1f} MB")
            print("\nFrame data columns:")
            print(f"  - {len(frames_df.columns)} columns")
            print(f"  - Key fields: position_x/y, gold, xp, level, cs, damage")
            print(f"  - Flags: is_nattynatt, is_ally, is_enemy_jungler")
        else:
            print("\nNo frames extracted!")


FRAME-BY-FRAME DATA EXTRACTION (All 10 Players)

This extracts minute-by-minute position, gold, XP, CS, and damage data
for all 10 players in each match. Use for behavioral analysis and
comparing NattyNatt vs enemy jungler pathing.

Found 317 matches to process
Estimated frames: ~79250 (10 players × 25 min avg)

Starting extraction...
--------------------------------------------------------------------------------
✓ Found PUUID: DefvgEBJ...
[10/317] Processed: 10 | Frames: 2550 | ETA: 7m 44s
[20/317] Processed: 20 | Frames: 4980 | ETA: 7m 32s
[30/317] Processed: 30 | Frames: 7230 | ETA: 7m 8s
[40/317] Processed: 40 | Frames: 9860 | ETA: 6m 52s
2-minute rate limit reached (100/100). Sleeping for 46.3 seconds...
Error getting timeline KR_7883625192: 429
[48/317] ✗ KR_7883625192: No timeline data
2-minute rate limit reached (100/100). Sleeping for 0.0 seconds...
[50/317] Processed: 49 | Frames: 12200 | ETA: 10m 48s
[60/317] Processed: 59 | Frames: 14830 | ETA: 9m 45s
[70/317] Processed: 6

In [13]:
# Cell 5: Extract Event Data with Precise Timestamps
print("="*80)
print("EVENT DATA EXTRACTION (Precise Timestamps)")
print("="*80)
print("\nThis extracts all game events with millisecond precision:")
print("  - Jungle camp clears (for pathing reconstruction)")
print("  - Champion kills/deaths/assists")
print("  - Item purchases and sales")
print("  - Ward placements and destructions")
print("  - Objective takes (dragons, herald, baron)")
print("  - Turret/inhibitor kills")
print("\nUse for second-by-second behavioral analysis.\n")

# Read existing match IDs
matches_csv = "data/nattynatt_rengar_matches.csv"
if not os.path.exists(matches_csv):
    print(f"Error: {matches_csv} not found. Run Cell 2 first to scrape matches.")
else:
    matches_df = pd.read_csv(matches_csv)
    match_ids_to_process = matches_df['match_id'].tolist()
    
    print(f"Found {len(match_ids_to_process)} matches to process")
    print(f"Estimated events: ~{len(match_ids_to_process) * 300} (avg 300 events/game)")
    print("\nStarting extraction...")
    print("-"*80)
    
    all_events = []
    processed_count = 0
    error_count = 0
    start_time = time.time()
    
    # Get PUUID for target player
    puuid = get_summoner_puuid(SUMMONER_NAME, SUMMONER_TAG)
    
    if not puuid:
        print("Error: Could not get PUUID")
    else:
        for idx, match_id in enumerate(match_ids_to_process):
            try:
                # Fetch match data and timeline
                match_data = get_match_details(match_id)
                if not match_data:
                    print(f"[{idx+1}/{len(match_ids_to_process)}] ✗ {match_id}: No match data")
                    error_count += 1
                    continue
                
                timeline_data = get_match_timeline(match_id)
                if not timeline_data:
                    print(f"[{idx+1}/{len(match_ids_to_process)}] ✗ {match_id}: No timeline data")
                    error_count += 1
                    continue
                
                # Extract all events
                events = extract_all_events(match_data, timeline_data, puuid)
                all_events.extend(events)
                processed_count += 1
                
                # Progress update every 10 matches
                if (idx + 1) % 10 == 0:
                    elapsed = time.time() - start_time
                    avg_time = elapsed / (idx + 1)
                    remaining = avg_time * (len(match_ids_to_process) - idx - 1)
                    print(f"[{idx+1}/{len(match_ids_to_process)}] Processed: {processed_count} | "
                          f"Events: {len(all_events):,} | "
                          f"ETA: {int(remaining/60)}m {int(remaining%60)}s")
                
            except Exception as e:
                print(f"[{idx+1}/{len(match_ids_to_process)}] ✗ {match_id}: Error - {str(e)}")
                error_count += 1
                continue
        
        # Convert to DataFrame and save
        if all_events:
            events_df = pd.DataFrame(all_events)
            
            # Save to CSV
            output_path = "data/nattynatt_rengar_events.csv"
            events_df.to_csv(output_path, index=False)
            
            elapsed_total = time.time() - start_time
            print("\n" + "="*80)
            print("EVENT EXTRACTION COMPLETE")
            print("="*80)
            print(f"Processed matches: {processed_count}/{len(match_ids_to_process)}")
            print(f"Errors: {error_count}")
            print(f"Total events extracted: {len(all_events):,}")
            print(f"Total time: {int(elapsed_total/60)}m {int(elapsed_total%60)}s")
            print(f"\nSaved to: {output_path}")
            print(f"File size: {os.path.getsize(output_path) / (1024*1024):.1f} MB")
            
            # Event type breakdown
            print("\nEvent type breakdown:")
            event_counts = events_df['event_type'].value_counts()
            for event_type, count in event_counts.head(10).items():
                print(f"  - {event_type}: {count:,}")
            
            # NattyNatt specific stats
            natty_events = events_df[events_df['is_nattynatt'] == True]
            print(f"\nNattyNatt's events: {len(natty_events):,}")
            if len(natty_events) > 0:
                print(f"  - MONSTER_KILL: {len(natty_events[natty_events['event_type'] == 'MONSTER_KILL']):,}")
                print(f"  - CHAMPION_KILL: {len(natty_events[natty_events['event_type'] == 'CHAMPION_KILL']):,}")
                print(f"  - ITEM_PURCHASED: {len(natty_events[natty_events['event_type'] == 'ITEM_PURCHASED']):,}")
                print(f"  - WARD_PLACED: {len(natty_events[natty_events['event_type'] == 'WARD_PLACED']):,}")
        else:
            print("\nNo events extracted!")


EVENT DATA EXTRACTION (Precise Timestamps)

This extracts all game events with millisecond precision:
  - Jungle camp clears (for pathing reconstruction)
  - Champion kills/deaths/assists
  - Item purchases and sales
  - Ward placements and destructions
  - Objective takes (dragons, herald, baron)
  - Turret/inhibitor kills

Use for second-by-second behavioral analysis.

Found 317 matches to process
Estimated events: ~95100 (avg 300 events/game)

Starting extraction...
--------------------------------------------------------------------------------
✓ Found PUUID: DefvgEBJ...
[10/317] Processed: 10 | Events: 10,047 | ETA: 8m 5s
[20/317] Processed: 20 | Events: 19,154 | ETA: 7m 39s
2-minute rate limit reached (100/100). Sleeping for 0.2 seconds...
2-minute rate limit reached (100/100). Sleeping for 0.0 seconds...
2-minute rate limit reached (100/100). Sleeping for 0.2 seconds...
2-minute rate limit reached (100/100). Sleeping for 0.0 seconds...
2-minute rate limit reached (100/100). Slee

In [14]:
# Cell 6: Test All Extraction Functions on Single Match
print("="*80)
print("COMPREHENSIVE EXTRACTION TEST")
print("="*80)
print("\nTesting all extraction functions on a single match...\n")

# Get PUUID
puuid = get_summoner_puuid(SUMMONER_NAME, SUMMONER_TAG)

if puuid:
    # Get a single match
    print("Fetching a test match...")
    match_ids = get_match_ids(puuid, count=1, queue_id=420, start_time=None)
    
    if match_ids:
        test_match_id = match_ids[0]
        print(f"Testing with match: {test_match_id}\n")
        print("-"*80)
        
        # Fetch match data
        match_data = get_match_details(test_match_id)
        timeline_data = get_match_timeline(test_match_id)
        
        if match_data and timeline_data:
            # Test 1: Summary extraction (existing)
            print("\n1. SUMMARY EXTRACTION (Cell 2)")
            match_result = process_match(test_match_id, puuid)
            if match_result:
                print(f"   ✓ Extracted {len(match_result)} summary fields")
                print(f"   - Win: {match_result.get('win')}")
                print(f"   - KDA: {match_result.get('kills')}/{match_result.get('deaths')}/{match_result.get('assists')}")
                print(f"   - Enemy jungle: {match_result.get('enemy_jungle')}")
                print(f"   - Primary rune: {match_result.get('primary_tree')}")
            else:
                print("   ✗ Not a Rengar game or error")
            
            # Test 2: Frame extraction (all players)
            print("\n2. FRAME EXTRACTION (Cell 4 - All 10 Players)")
            frames = extract_all_frames_multi_player(match_data, timeline_data, puuid)
            print(f"   ✓ Extracted {len(frames)} frame records")
            print(f"   - Players tracked: {len(set(f['participant_id'] for f in frames))}")
            print(f"   - Minutes covered: {len(set(f['minute'] for f in frames))}")
            
            # Show sample frame data
            natty_frames = [f for f in frames if f['is_nattynatt']]
            if natty_frames:
                sample_frame = natty_frames[5] if len(natty_frames) > 5 else natty_frames[0]
                print(f"\n   Sample (NattyNatt at minute {sample_frame['minute']}):")
                print(f"     - Position: ({sample_frame['position_x']}, {sample_frame['position_y']})")
                print(f"     - Gold: {sample_frame['total_gold']}")
                print(f"     - Level: {sample_frame['level']}")
                print(f"     - CS: {sample_frame['total_cs']}")
            
            # Show enemy jungler comparison
            enemy_jg_frames = [f for f in frames if f['is_enemy_jungler']]
            if enemy_jg_frames:
                enemy_champ = enemy_jg_frames[0]['champion']
                print(f"\n   Enemy Jungler: {enemy_champ}")
                print(f"     - Frames tracked: {len(enemy_jg_frames)}")
            
            # Test 3: Event extraction
            print("\n3. EVENT EXTRACTION (Cell 5 - Precise Timestamps)")
            events = extract_all_events(match_data, timeline_data, puuid)
            print(f"   ✓ Extracted {len(events)} events")
            
            # Event type breakdown
            event_types = {}
            for event in events:
                et = event['event_type']
                event_types[et] = event_types.get(et, 0) + 1
            
            print(f"   - Event types: {len(event_types)}")
            print("\n   Top event types:")
            for et, count in sorted(event_types.items(), key=lambda x: x[1], reverse=True)[:5]:
                print(f"     - {et}: {count}")
            
            # NattyNatt's events
            natty_events = [e for e in events if e['is_nattynatt']]
            print(f"\n   NattyNatt's events: {len(natty_events)}")
            monster_kills = [e for e in natty_events if e['event_type'] == 'MONSTER_KILL']
            print(f"     - Jungle camps cleared: {len(monster_kills)}")
            if monster_kills:
                first_camp = monster_kills[0]
                print(f"     - First camp: {first_camp.get('monster_subtype', 'N/A')} at {first_camp['timestamp']:.1f}s")
            
            champion_kills = [e for e in natty_events if e['event_type'] == 'CHAMPION_KILL']
            print(f"     - Champion kills: {len(champion_kills)}")
            
            items_purchased = [e for e in natty_events if e['event_type'] == 'ITEM_PURCHASED']
            print(f"     - Items purchased: {len(items_purchased)}")
            
            print("\n" + "="*80)
            print("✓ ALL EXTRACTION FUNCTIONS WORKING CORRECTLY!")
            print("="*80)
            print("\nYou can now run:")
            print("  - Cell 2: Re-scrape all matches with comprehensive data")
            print("  - Cell 4: Extract frame-by-frame data for all players")
            print("  - Cell 5: Extract all events with precise timestamps")
            
        else:
            print("✗ Could not fetch match/timeline data")
    else:
        print("✗ No matches found")
else:
    print("✗ Failed to get PUUID")


COMPREHENSIVE EXTRACTION TEST

Testing all extraction functions on a single match...

✓ Found PUUID: DefvgEBJ...
Fetching a test match...
  Fetched 100 match IDs (total: 100)
Testing with match: KR_7907383471

--------------------------------------------------------------------------------

1. SUMMARY EXTRACTION (Cell 2)
   ✓ Extracted 117 summary fields
   - Win: True
   - KDA: 7/1/6
   - Enemy jungle: Elise
   - Primary rune: 8000

2. FRAME EXTRACTION (Cell 4 - All 10 Players)
   ✓ Extracted 170 frame records
   - Players tracked: 10
   - Minutes covered: 17

   Sample (NattyNatt at minute 5):
     - Position: (8311, 2426)
     - Gold: 1843
     - Level: 4
     - CS: 32

   Enemy Jungler: Elise
     - Frames tracked: 17

3. EVENT EXTRACTION (Cell 5 - Precise Timestamps)
   ✓ Extracted 600 events
   - Event types: 17

   Top event types:
     - ITEM_PURCHASED: 145
     - ITEM_DESTROYED: 106
     - SKILL_LEVEL_UP: 93
     - LEVEL_UP: 81
     - WARD_PLACED: 72

   NattyNatt's events: 67