In [None]:
from metanalysis import RiotAPI
import pandas as pd
import requests
import time
import pandas as pd
import os
from datetime import datetime
import random

def continuous_match_collection(api_key, csv_path="static/top_match_data.csv", region="kr", 
                               save_interval=50, max_players_per_cycle=5, matches_per_player=3):
    """
    Continuously collect match data, running indefinitely
    
    Parameters:
    - api_key: Your Riot API key
    - csv_path: Path to the CSV file
    - region: Server region
    - save_interval: Number of new matches after which to save the CSV
    - max_players_per_cycle: Maximum number of top players to check per cycle
    - matches_per_player: Number of matches to retrieve per player
    """
    api = RiotAPI(api_key, region)
    
    # Load existing DataFrame if it exists
    if os.path.exists(csv_path):
        existing_df = pd.read_csv(csv_path)
        print(f"Loaded existing DataFrame with {len(existing_df)} rows")
        # Get the set of existing match IDs to avoid duplicates
        existing_match_ids = set(existing_df['match_id'])
    else:
        existing_df = pd.DataFrame()
        existing_match_ids = set()
        print("No existing DataFrame found. Creating new one.")
    
    # Initialize tracking variables
    new_match_data = []
    matches_since_last_save = 0
    total_matches_collected = len(existing_match_ids)
    total_cycles = 0
    
    # Create timestamp for logging
    start_time = time.time()
    
    print(f"\n{'='*50}")
    print(f"Starting continuous match collection at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"{'='*50}")
    
    try:
        while True:  # Run indefinitely
            total_cycles += 1
            cycle_start_time = time.time()
            
            print(f"\n{'-'*50}")
            print(f"Starting collection cycle #{total_cycles} at {datetime.now().strftime('%H:%M:%S')}")
            
            try:
                # Get Challenger league players
                challenger_league = api.get_challenger_league()
                if not challenger_league or 'entries' not in challenger_league:
                    print("Failed to retrieve Challenger league data, waiting 60 seconds to retry")
                    time.sleep(60)
                    continue
                
                # Sort players by LP
                top_players = sorted(challenger_league['entries'], key=lambda x: x['leaguePoints'], reverse=True)
                
                # Randomly select a subset of players to diversify data collection over time
                if len(top_players) > max_players_per_cycle:
                    # For diversity, choose a random slice of the top players each cycle
                    # Sometimes get the absolute top players, sometimes get players from elsewhere in Challenger
                    if random.random() < 0.7:  # 70% chance to get top players
                        start_idx = 0
                    else:  # 30% chance to get players from elsewhere in the list
                        max_start = len(top_players) - max_players_per_cycle
                        start_idx = random.randint(0, max_start)
                    
                    selected_players = top_players[start_idx:start_idx + max_players_per_cycle]
                else:
                    selected_players = top_players
                
                print(f"Selected {len(selected_players)} players from positions {start_idx+1}-{start_idx+len(selected_players)}")
                
                # Process each player
                for player_idx, player in enumerate(selected_players, 1):
                    summoner_id = player.get('summonerId')
                    if not summoner_id:
                        continue
                        
                    player_name = player.get('summonerName', 'Unknown')
                    lp = player.get('leaguePoints', 0)
                    
                    print(f"[{player_idx}/{len(selected_players)}] Processing {player_name} ({lp} LP)")
                    
                    summoner = api.get_summoner_by_id(summoner_id)
                    if not summoner or 'puuid' not in summoner:
                        continue
                    
                    # Get recent match IDs
                    puuid = summoner.get('puuid')
                    if not puuid:
                        continue
                        
                    match_ids = api.get_match_ids(puuid, count=matches_per_player, queue=420)
                    if not match_ids:
                        continue
                    
                    # Get details for each match that we haven't seen before
                    for match_id in match_ids:
                        if match_id in existing_match_ids:
                            continue
                            
                        print(f"  Retrieving new match: {match_id}")
                        match_details = api.get_match_details(match_id)
                        
                        if not match_details or 'info' not in match_details:
                            continue
                        
                        # Extract team information
                        team_blue = []
                        team_red = []
                        winner = None
                        game_date = match_details['info'].get('gameCreation', 0)
                        game_duration = match_details['info'].get('gameDuration', 0) / 60  # in minutes
                        
                        for team in match_details['info'].get('teams', []):
                            if team.get('win'):
                                winner = team.get('teamId')
                        
                        # Get role-ordered list of champions
                        blue_champions = {0: None, 1: None, 2: None, 3: None, 4: None}
                        red_champions = {0: None, 1: None, 2: None, 3: None, 4: None}
                        
                        # Map game positions to indices
                        position_mapping = {
                            'TOP': 0,
                            'JUNGLE': 1, 
                            'MIDDLE': 2, 
                            'BOTTOM': 3, 
                            'UTILITY': 4
                        }
                        
                        for participant in match_details['info'].get('participants', []):
                            champion = participant.get('championName', 'Unknown')
                            team_id = participant.get('teamId')
                            position = participant.get('teamPosition', '')
                            
                            # Get position index, default to appending if position not recognized
                            pos_idx = position_mapping.get(position, None)
                            
                            if team_id == 100:  # Blue team
                                if pos_idx is not None:
                                    blue_champions[pos_idx] = champion
                                else:
                                    team_blue.append(champion)
                            else:  # Red team
                                if pos_idx is not None:
                                    red_champions[pos_idx] = champion
                                else:
                                    team_red.append(champion)
                        
                        # Add any champions without position data
                        for champ in team_blue:
                            for i in range(5):
                                if blue_champions[i] is None:
                                    blue_champions[i] = champ
                                    break
                                    
                        for champ in team_red:
                            for i in range(5):
                                if red_champions[i] is None:
                                    red_champions[i] = champ
                                    break
                        
                        # Add to results
                        new_match_data.append({
                            'match_id': match_id,
                            'game_date': pd.to_datetime(game_date, unit='ms'),
                            'game_duration': round(game_duration, 2),
                            'blue_top': blue_champions[0],
                            'blue_jungle': blue_champions[1],
                            'blue_mid': blue_champions[2],
                            'blue_adc': blue_champions[3],
                            'blue_support': blue_champions[4],
                            'red_top': red_champions[0],
                            'red_jungle': red_champions[1],
                            'red_mid': red_champions[2],
                            'red_adc': red_champions[3],
                            'red_support': red_champions[4],
                            'winner': 'Blue' if winner == 100 else 'Red'
                        })
                        
                        # Mark this match as seen
                        existing_match_ids.add(match_id)
                        matches_since_last_save += 1
                        total_matches_collected += 1
                        
                        # Save periodically
                        if matches_since_last_save >= save_interval:
                            print(f"\nReached {matches_since_last_save} new matches, saving to CSV...")
                            
                            # Create DataFrame with new data
                            new_df = pd.DataFrame(new_match_data)
                            
                            # Combine with existing data
                            if not existing_df.empty:
                                combined_df = pd.concat([existing_df, new_df], ignore_index=True)
                            else:
                                combined_df = new_df
                            
                            # Create a backup of the current file if it exists
                            if os.path.exists(csv_path):
                                backup_path = f"{csv_path}.bak"
                                try:
                                    os.replace(csv_path, backup_path)
                                    print(f"Created backup at {backup_path}")
                                except Exception as e:
                                    print(f"Failed to create backup: {e}")
                            
                            # Save to CSV
                            combined_df.to_csv(csv_path, index=False)
                            print(f"Saved {len(combined_df)} total matches to '{csv_path}'")
                            
                            # Update tracking
                            existing_df = combined_df
                            new_match_data = []
                            matches_since_last_save = 0
                
                # End of player processing
                cycle_duration = time.time() - cycle_start_time
                total_duration = time.time() - start_time
                
                print(f"\nCompleted cycle #{total_cycles} in {cycle_duration:.1f} seconds")
                print(f"Total running time: {total_duration/60:.1f} minutes")
                print(f"Total matches collected: {total_matches_collected}")
                
                # Cooldown between cycles to reduce load on API
                cooldown = random.randint(5, 15)
                print(f"Cooling down for {cooldown} seconds before next cycle...")
                time.sleep(cooldown)
                
            except Exception as e:
                # Catch any unexpected errors but keep the script running
                print(f"Error in collection cycle: {str(e)}")
                print("Waiting 60 seconds before continuing...")
                time.sleep(60)
        
    except KeyboardInterrupt:
        # Handle user interruption (Ctrl+C)
        print("\n\nUser interrupted the collection process.")
        
        # Save any remaining new matches
        if new_match_data:
            print(f"Saving {len(new_match_data)} final matches to CSV...")
            
            # Create DataFrame with new data
            new_df = pd.DataFrame(new_match_data)
            
            # Combine with existing data
            if not existing_df.empty:
                combined_df = pd.concat([existing_df, new_df], ignore_index=True)
            else:
                combined_df = new_df
            
            # Save to CSV
            combined_df.to_csv(csv_path, index=False)
            print(f"Final save completed. Total: {len(combined_df)} matches in '{csv_path}'")
            
            return combined_df
        else:
            return existing_df

In [None]:

api_key = "RGAPI-7ee536ba-73c7-4ee1-ab98-95ec7a1fb36e"
    
    
continuous_match_collection(
        api_key=api_key,
        csv_path="top_match_data.csv",
        region="kr",
        save_interval=25,        # Save after every 25 new matches
        max_players_per_cycle=20, # Process 5 top players per cycle
        matches_per_player=10     # Get 3 matches per player
    )

Loaded existing DataFrame with 716 rows

Starting continuous match collection at 2025-04-04 10:46:00

--------------------------------------------------
Starting collection cycle #1 at 10:46:00
Error: 400, URL: https://kr.api.riotgames.com/lol/league/v4/challengerleagues/by-queue/RANKED_SOLO_5x5
Failed to retrieve Challenger league data, waiting 60 seconds to retry


User interrupted the collection process.


Unnamed: 0,match_id,game_date,game_duration,blue_top,blue_jungle,blue_mid,blue_adc,blue_support,red_top,red_jungle,red_mid,red_adc,red_support,winner
0,KR_7571681247,2025-03-23 17:05:27.552,15.87,Ambessa,Vi,Aurora,MissFortune,Elise,Poppy,Viego,Azir,Ezreal,Neeko,Blue
1,KR_7571647391,2025-03-23 16:18:17.604,30.30,Jayce,Vi,Azir,Caitlyn,Braum,Fiora,XinZhao,Viktor,Draven,Pyke,Red
2,KR_7571553795,2025-03-23 15:00:11.440,25.73,Aatrox,Naafiri,Xerath,Ezreal,Poppy,Vayne,Gwen,Jayce,Kaisa,Leona,Red
3,KR_7571508665,2025-03-23 14:27:56.995,26.68,Akali,RekSai,Hwei,Zeri,Neeko,Gwen,Shaco,Sylas,Ezreal,Karma,Blue
4,KR_7571411934,2025-03-23 13:34:31.628,33.35,Jayce,Vi,Ahri,Kaisa,Rell,Ryze,Talon,TwistedFate,Ezreal,Rakan,Blue
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
711,KR_7567234066,2025-03-20 13:08:16.445000,15.20,Riven,Nocturne,Ahri,Jhin,Lux,Singed,Vi,Varus,Seraphine,Rell,Blue
712,KR_7567111348,2025-03-20 11:51:21.317000,19.22,Gwen,LeeSin,Galio,Kaisa,Rell,Ambessa,Karthus,Zed,Caitlyn,Lux,Blue
713,KR_7566969979,2025-03-20 09:48:36.112000,24.10,Sion,Viego,Aurora,MissFortune,Neeko,Gwen,LeeSin,Galio,Ashe,Karma,Blue
714,KR_7571656904,2025-03-23 16:36:40.164000,27.53,Jax,FiddleSticks,Azir,Lucian,Yuumi,Teemo,Skarner,Akshan,Kalista,Nautilus,Blue
