In [None]:
import cassiopeia as cass
from cassiopeia.core.league import ChallengerLeague, GrandmasterLeague, MasterLeague
from cassiopeia.datastores.riotapi.common import APIError
from dotenv import load_dotenv
import os
from itertools import islice
import json
import time
import pickle
import io
import gzip
load_dotenv() 


In [None]:
curr_dir = os.getcwd()
data_dir = os.path.join(curr_dir, "..", "match_data")

In [None]:
cass.set_riot_api_key(os.getenv("RIOT_API_KEY"))


In [None]:
# lazy loader is bugged, so this method is needed to extract raw data
def extract_raw_data(obj):
    data = obj._data 
    raw_data = data[next(iter(data))] 
    return raw_data

In [None]:
# gives you all the puuids in a league (challenger, grandmaster, master)
def get_all_league_entries(league):
    uuids = []

    for entry in league:
        raw_data = extract_raw_data(entry)
        uuids.append(raw_data.puuid)
    return uuids

In [None]:
challenger_league = cass.get_challenger_league(queue=cass.Queue.ranked_solo_fives, region="NA")
grandmaster_league = cass.get_grandmaster_league(queue=cass.Queue.ranked_solo_fives, region="NA")
master_league = cass.get_master_league(queue=cass.Queue.ranked_solo_fives, region="NA")

# Test if rank 1 is accurate
rank1 = get_all_league_entries(challenger_league)[0]

summ = cass.get_summoner(puuid=rank1, region="EUW")
summ.account.name_with_tagline

In [None]:
# takes around 2 hours to win
# for NA challenger matches
# all_chall_players_puuid =  get_all_league_entries(challenger_league)
# all_matches = []
# for puuid in all_chall_players_puuid:
#     all_matches.extend(cass.get_match_history(puuid=puuid, continent="EUROPE"))
# with open("na_chall_matches.pkl", "wb") as f:
#     pickle.dump(all_matches, f)
# for NA grandmaster matches
# all_gm_players_puuid =  get_all_league_entries(grandmaster_league)
# all_gm_matches = []
# for puuid in all_gm_players_puuid:
#     all_gm_matches.extend(cass.get_match_history(puuid=puuid, continent="AMERICAS"))
# with open("na_gm_matches.pkl", "wb") as f:
#     pickle.dump(all_gm_matches, f)

In [None]:
na_chall_matches = pickle.load(open("chall_matches.pkl", "rb"))  
na_gm_matches = pickle.load(open("gm_matches.pkl", "rb"))

In [None]:
# Verify data was loaded
print(f"Total Matches Collected:"
      f"\nNA Challenger Matches: {len(na_chall_matches)}"
      f"\nNA Grandmaster Matches: {len(na_gm_matches)}")


In [None]:
total_matches = na_chall_matches + na_gm_matches
print(f"Total Matches: {len(total_matches)}")

In [None]:
def chunks(iterable, size):
    it = iter(iterable)
    while True:
        chunk = list(islice(it, size))
        if not chunk:
            break
        yield chunk

visited = set()
BATCH_SIZE = 2000
RETRY_DELAY = 300  

CHECKPOINT_FILE = "all_checkpoint.txt"

with open(CHECKPOINT_FILE, "r") as f:
    last_batch = f.read().strip()

for batch_num, batch in enumerate(chunks(total_matches, BATCH_SIZE), start=1):
    print(f"\nBatch {batch_num} ({len(batch)} matches):")
    if batch_num <= int(last_batch):
        print(f"Skipping batch {batch_num} as it has already been processed.")
        continue

    batch_data = []
    for match in batch:
        try:
            q_type = str(match.queue)
            remake = match.is_remake
            match_id = match.id
            participants = match.participants

        except KeyError:
            print(f"Skipping match: Unknown queue ID")
            continue
        except AttributeError:
            print(f"Skipping match: Missing queue info")
            continue
        except APIError as e:
            print(f"Riot API error encountered: {e}. Skipping...")
            time.sleep(RETRY_DELAY)
            continue
        except Exception as e:
            print(f"Skipping match : Unexpected error getting queue ({e})")
            time.sleep(RETRY_DELAY)
            continue
        try: 
            if (q_type == "Queue.ranked_solo_fives" and not remake):
                if match_id not in visited:
                    dict_data = json.loads(match.to_json())
                    batch_data.append(dict_data)
                    visited.add(match.id)
        except Exception as e:
            print(f"Error processing match {match.id}: {e}")
            print("Skipping this match...")
            time.sleep(RETRY_DELAY)
            continue

 
    # except Exception as e:
    #     print(f"Error in batch {batch_num}: {e}")
    #     print(f"Moving onto next batch after 2.5 minutes...")
    #     time.sleep(RETRY_DELAY)