In [None]:
import requests
import time
import json
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from functools import lru_cache

from concurrent.futures import ThreadPoolExecutor

In [None]:
def setupGdrive(location):
    from google.colab import drive
    from os.path import join
    ROOT = '/content/drive' # default for the drive
    PROJ = 'My Drive/' + location
    drive.mount(ROOT) # we mount the drive at /content/drive
    PROJECT_PATH = join(ROOT, PROJ)
    return PROJECT_PATH

PROJECT_PATH = setupGdrive('LoL Data')

%cd {PROJECT_PATH}

# Set up dir structure
!mkdir players
!mkdir matches
!mkdir inputs
!mkdir labels
!mkdir summary_inputs

queue_type = 'RANKED_SOLO_5x5'

ranks = ['IRON', 'BRONZE', 'SILVER', 'GOLD', 'PLATINUM', 'EMERALD', 'DIAMOND']
divisions = ['IV', 'III', 'II', 'I']
peak_ranks = ['MASTER', 'GRANDMASTER', 'CHALLENGER']

all_ranks = []
for rank in ranks:
    for div in divisions:
        all_ranks.append(rank + ' ' + div)
all_ranks.extend(peak_ranks)

def rank_to_id(rank):
    tier = rank.split(' ')[0]
    if tier in peak_ranks:
        rank = tier

    return all_ranks.index(rank)

def id_to_rank(id):
    return all_ranks[id]

In [None]:
#####################
#  REQUEST HELPERS  #
#####################

API_KEY = os.environ['RIOT_API_KEY'] # Your key here

def get_header(key):
    return  {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
        "Accept-Language": "en-US,en;q=0.9",
        "Accept-Charset": "application/x-www-form-urlencoded; charset=UTF-8",
        "Origin": "https://developer.riotgames.com",
        "X-Riot-Token": key,
    }

def get_with_retry(url,  max_backoff=32):
    backoff = 1

    while True:
        response = requests.get(url, headers=get_header(API_KEY))

        if response.status_code == 200:
            return response.json()

        elif response.status_code == 429 or response.status_code == 503:
            if response.status_code == 503:
                print("Service Down")
            # print("Rate limit hit.")
            backoff = min(backoff * 2, max_backoff)  # Double the backoff, capped at max_backoff
            # print(f"Waiting for {backoff} seconds before retrying...")
            time.sleep(backoff)

        else:
            # Something else really wrong, raise.
            response.raise_for_status()

def get_players(rank, division, page):
    url = f'https://na1.api.riotgames.com/lol/league/v4/entries/RANKED_SOLO_5x5/{rank}/{division}?page={page}'
    response = get_with_retry(url)
    return response

def get_peak_players(rank):
    url = f'https://na1.api.riotgames.com//lol/league/v4/{rank.lower()}leagues/by-queue/RANKED_SOLO_5x5'
    response = get_with_retry(url)
    return response

@lru_cache(maxsize=15000)
def get_puuid(summoner_id):
    url = f'https://na1.api.riotgames.com/lol/summoner/v4/summoners/{summoner_id}'
    response = get_with_retry(url)
    return response['puuid']

@lru_cache(maxsize=15000)
def get_rank(summoner_id):
    url = f'https://na1.api.riotgames.com/lol/league/v4/entries/by-summoner/{summoner_id}'
    try:
        response = get_with_retry(url)[0]
    except:
        return 'UNRANKED'
    if response['tier'] in peak_ranks:
        return response['tier']
    return response['tier'] + ' ' + response['rank']

def get_all_ranks(summoner_ids):
    with ThreadPoolExecutor() as executor:
        return list(executor.map(get_rank, summoner_ids))

def get_ranked_matches(puuid, count=3):
    """
    Up to 100 ranked solo matches per player since the start of S13
    """
    ranked_solo_queue_id = 420
    s13_start_timestamp = 1673413200
    url = f'https://americas.api.riotgames.com/lol/match/v5/matches/by-puuid/{puuid}/ids?startTime={s13_start_timestamp}&queue={ranked_solo_queue_id}&type=ranked&start=0&count={count}'
    response = get_with_retry(url)
    return response

def get_match_summary(match_id):
    url = f'https://americas.api.riotgames.com/lol/match/v5/matches/{match_id}'
    response = get_with_retry(url)
    return response

def get_match_data(match_id):
    timeline_url = f'https://americas.api.riotgames.com/lol/match/v5/matches/{match_id}/timeline'
    timeline = get_with_retry(timeline_url)

    match_url = f'https://americas.api.riotgames.com/lol/match/v5/matches/{match_id}'
    m = get_with_retry(match_url)

    combined_data = {
        "timeline": timeline,
        "summary": m,
    }

    return combined_data

In [None]:
r = get_players('IRON', 'I', 1)
print(len(r))
display(r[0])

In [None]:
!pwd

In [None]:
players_per_divison = 5000

# Generate player lists for each division
for rank in ranks:
    for div in divisions:
        player_file = f'players/{rank}_{div}.txt'
        # Store previous max file in case of breakage
        max_page_file = f'players/{rank}_{div}_max_page.txt'

        !touch {player_file}
        !touch {max_page_file}

        page = 1
        with open(max_page_file, 'r') as f:
            try:
                page = int(f.read())
            except:
                pass

        player_count = 0
        with open(player_file, 'a') as f:
            while player_count < players_per_divison:
                r = get_players(rank, div, page)
                if not r:
                    # Ran out of players (e.g. for Dia 1)
                    break

                for player_data in r:
                    if player_data['wins'] + player_data['losses'] < 20:
                        continue
                    player_count += 1
                    f.write(f'{player_data["summonerId"]}\n')

                page += 1

        print(f'Finished {player_file} with {player_count} players and {page} pages')


        with open(max_page_file, 'w') as f:
            f.write(str(page))



In [None]:
# Generate player lists for each PEAK RANK
for rank in peak_ranks:
    player_file = f'players/{rank}.txt'
    # Store previous max file in case of breakage
    max_page_file = f'players/{rank}_max_page.txt'

    !touch {player_file}
    !touch {max_page_file}

    player_count = 0
    with open(player_file, 'a') as f:
        r = get_peak_players(rank)
        if not r:
            break

        for player_data in r['entries']:
            if player_data['wins'] + player_data['losses'] < 15:
                continue
            player_count += 1
            f.write(f'{player_data["summonerId"]}\n')

    print(f'Finished {player_file} with {player_count} players and {page} pages')


    with open(max_page_file, 'w') as f:
        f.write(str(page))



In [None]:
m = get_match_data('NA1_5139158682')['timeline']
m_summary = get_match_data('NA1_5174543118')['summary']

In [None]:
len(m['info']['frames'])

In [None]:
m['info']['frames'][40]

In [None]:
EVENTS_ENUM = {
    'CHAMPION_KILL': 0,
    'BUILDING_KILL': 1,
    'ELITE_MONSTER_KILL': 2,
    'LEVEL_UP': 3,
    'WARD_PLACED': 4,
    'WARD_KILL': 5,
    # 'SUMMARY': 6,
}

MONSTERS_ENUM = {
    'HORDE': 0,
    'RIFTHERALD': 1,
    'BARON_NASHOR': 2,
    'DRAGON': 3,
    'SOUL_DRAGON': 4, # Soul Drag = 4th drag, Elder Drag is drag subclass, both need extra pre-processing
    'ELDER_DRAGON': 5,
}

class Datapoint:
    def __init__(self, event):
        # Generally fixed
        self.timestamp = event['timestamp']
        self.event_type = event['type']
        self.event_type_vec = [0] * 6
        self.event_type_vec[EVENTS_ENUM[self.event_type]] = 1

        if 'position' in event:
            self.x_pos = event['position']['x']
            self.y_pos = event['position']['y']
        else:
            self.x_pos = -1
            self.y_pos = -1

        self.main_id = [0] * 10
        self.participant_id = [0] * 10
        self.victim_id = [0] * 10

        self.bounty = 0
        self.monster_type = [0] * 6

        self.team_id = 0

        self._try_customize(event) # Sets main_id, part_id, victim_id, bounty, monster_type, team_id

        self.gold = 0
        self.cs = 0
        self.xp = 0

        assert sum(self.main_id) == 1, f'Invalid main_id: {self.main_id}'

    def _try_customize(self, event):
        if 'killerId' in event and event['killerId'] == 0:
            raise Exception(f'Invalid killerId: {event}')

        if self.event_type in ['CHAMPION_KILL', 'BUILDING_KILL', 'ELITE_MONSTER_KILL']:
            self.main_id[event['killerId'] - 1] = 1
            if 'assistingParticipantIds' in event:
                for p_id in event['assistingParticipantIds']:
                    self.participant_id[p_id - 1] = 1

            if self.event_type == 'CHAMPION_KILL':
                self.victim_id[event['victimId'] - 1] = 1
                self.bounty = event['shutdownBounty']
            elif self.event_type == 'BUILDING_KILL':
                self.bounty = event['bounty']
            else:
                monster_type = event['monsterType']
                if monster_type == 'DRAGON':
                    if event['monsterSubType'] in ['ELDER_DRAGON', 'SOUL_DRAGON']:
                        monster_type = event['monsterSubType']
                self.monster_type[MONSTERS_ENUM[monster_type]] = 1
                self.bounty = event['bounty']

        elif self.event_type == 'LEVEL_UP':
            self.main_id[event['participantId'] - 1] = 1

        elif self.event_type == 'WARD_PLACED':
            self.main_id[event['creatorId'] - 1] = 1

        elif self.event_type == 'WARD_KILL':
            self.main_id[event['killerId'] - 1] = 1

        else:
            raise Exception(f'Invalid event type: {self.event_type}')

        if sum(self.main_id) != 1:
            raise Exception(f'Invalid main_id: {self.main_id}')

        self.team_id = 0 if self.main_id.index(1) < 5 else 1

    def _set_non_event_data(self, gold, cs, xp):
        # REMEMBER TO DO THIS!!!
        self.gold = gold
        self.cs = cs
        self.xp = xp

    def serialize_to_np(self):
        serialized = [self.timestamp, self.x_pos, self.y_pos, self.bounty, self.gold, self.cs, self.xp, self.team_id] # len 8
        serialized += self.event_type_vec # len 6
        serialized += self.main_id # len 10
        serialized += self.participant_id # len 10
        serialized += self.victim_id # len 10
        serialized += self.monster_type # len 6

        serialized = np.array(serialized)
        assert serialized.shape == (50,), f'Invalid serialized shape: {np.shape(serialized)}'

        return serialized

def convert_level_to_xp(level):
    level_map = {
        1:  0,
        2:  280,
        3:  660,
        4:  1140,
        5:  1720,
        6:  2400,
        7:  3180,
        8:  4060,
        9:  5040,
        10: 6120,
        11: 7300,
        12:	8580,
        13: 9960,
        14: 11440,
        15: 13020,
        16: 14700,
        17: 16480,
        18: 18360,
    }
    return level_map[level]

def interpolate(prev, cur, time_diff, time_range):
    return prev + (cur - prev) * time_diff / (time_range + 1)

def create_datapoints_for_frame(prev_frame, frame):
    datapoints = []

    # Gold + CS purely interpolated based on minute-level frames
    prev_gold = {i: prev_frame['participantFrames'][str(i)]['totalGold'] for i in range(1, 11)}
    cur_gold = {i: frame['participantFrames'][str(i)]['totalGold'] for i in range(1, 11)}

    prev_cs = {i: prev_frame['participantFrames'][str(i)]['minionsKilled'] + prev_frame['participantFrames'][str(i)]['jungleMinionsKilled'] for i in range(1, 11)}
    cur_cs = {i: frame['participantFrames'][str(i)]['minionsKilled'] + frame['participantFrames'][str(i)]['jungleMinionsKilled'] for i in range(1, 11)}

    # xp interpolated between each levelup
    prev_xp = {i: prev_frame['participantFrames'][str(i)]['xp'] for i in range(1, 11)}
    prev_xp_timestamps = {i: prev_frame['timestamp'] for i in range(1, 11)}
    cur_xp = {i: frame['participantFrames'][str(i)]['xp'] for i in range(1, 11)}

    for i, event in enumerate(frame['events']):
        if event['type'] not in EVENTS_ENUM:
            continue

        # Handle special soul dragon case (not explicitly named by Riot API)
        if event['type'] == 'ELITE_MONSTER_KILL' and event['monsterType'] == 'DRAGON':
            for next_event in frame['events'][i+1:i+4]:
                if next_event['type'] == 'DRAGON_SOUL_GIVEN':
                    event['monsterSubType'] = 'SOUL_DRAGON'

        try:
            dp = Datapoint(event)
        except:
            continue

        main_id = dp.main_id.index(1) + 1

        # GOLD AND CS HANDLING
        gold_cs_timediff = dp.timestamp - prev_frame['timestamp']

        gold = interpolate(prev_gold[main_id], cur_gold[main_id], gold_cs_timediff, 60000)
        cs = interpolate(prev_cs[main_id], cur_cs[main_id], gold_cs_timediff, 60000) # one minute frames (approx.)

        # XP AND LEVEL-UP HANDLING
        if event['type'] == 'LEVEL_UP':
            xp = convert_level_to_xp(event['level'])
            prev_xp[main_id] = xp
            prev_xp_timestamps[main_id] = dp.timestamp
        else:
            xp_timediff = dp.timestamp - prev_xp_timestamps[main_id]
            xp = interpolate(prev_xp[main_id], cur_xp[main_id], xp_timediff, frame['timestamp'] - prev_xp_timestamps[main_id])

        dp._set_non_event_data(gold, cs, xp)

        datapoints.append(dp.serialize_to_np())

    return datapoints


In [None]:
create_datapoints_for_frame(m['info']['frames'][39], m['info']['frames'][40])

In [None]:
m['info']['frames'][3]

In [None]:
[key for key in m_summary['info']['participants'][0].keys() if 'ping' in key.lower()]

In [None]:
summary_attrs = [
    # Scoreline
    'kills',
    'deaths',
    'assists',

    # Team/position
    'teamId',
    'teamPosition',

    # Various pings
    'allInPings',
    'assistMePings',
    'basicPings',
    'commandPings',
    'dangerPings',
    'enemyMissingPings',
    'enemyVisionPings',
    'getBackPings',
    'holdPings',
    'needVisionPings',
    'onMyWayPings',
    'pushPings',
    'retreatPings',
    'visionClearedPings',

    # Important stats
    'goldEarned',
    'champLevel',
    'champExperience',
    'totalMinionsKilled',

    # Misc.
    'longestTimeSpentLiving',
    'gameEndedInSurrender',
    'visionScore',
    'visionWardsBoughtInGame',
    'wardsKilled',
    'wardsPlaced',
]

def process_summary(match_summary):
    feats = {attr: None for attr in summary_attrs}
    for attr in summary_attrs:
        summoner_stats = []
        for i in range(10):
            if attr in match_summary['info']['participants'][i]:
                summoner_stats.append(match_summary['info']['participants'][i][attr])
            else:
                summoner_stats.append(-1)
        feats[attr] = summoner_stats

    df = pd.DataFrame(feats, index=[f'summoner{i+1}' for i in range(10)])
    return df

def process_labels(match_summary):
    match_id = match_summary['metadata']['matchId']

    labels = {"match_id": match_id}

    summoner_ids = [match_summary['info']['participants'][i]['summonerId'] for i in range(10)]
    ranks = get_all_ranks(summoner_ids)

    for i, rank in enumerate(ranks):
        if rank not in all_ranks:
            labels[f'summoner_{i+1}'] = -1
        else:
            labels[f'summoner_{i+1}'] = rank_to_id(rank)
    return labels

def process_match(match_json):
    try:
        match_summary = match_json['summary']
        if match_summary['info']['gameDuration'] < 600:
            # Less than 10 minutes probably a remake/AFK
            return None, None, None

        match_timeline = match_json['timeline']

        events = []
        prev_frame = match_timeline['info']['frames'][0]
        for frame in match_timeline['info']['frames'][1:]:
            events += create_datapoints_for_frame(prev_frame, frame)
            prev_frame = frame
        events = np.array(events)

        events_df = pd.DataFrame(events)
        events_df.columns = ['timestamp'] + [f'feature_{i}' for i in range(1, events_df.shape[1])]

        return events_df, process_summary(match_summary), process_labels(match_summary)

    except:
        print(f'Error processing match {match_summary["metadata"]["matchId"]}')
        return None, None, None


In [None]:
%%time
processed_match = process_match(get_match_data('NA1_5139158682'))

In [None]:
processed_match[2]

In [None]:
matches_per_division = 3500

all_files = !ls -1 players/

cur_rank = 'IRON'

player_files = [f'{cur_rank}_I.txt']

label_file = f'labels/{cur_rank}.csv'

for f in player_files:
    match_count = 0

    with open(f'players/{f}', 'r') as f:
        players = f.read().splitlines()

    for i, player in enumerate(players):
        # per division

        if match_count % 250 == 0:
            print(f'Finished {match_count}/{matches_per_division}')

        puuid = get_puuid(player)
        matches = get_ranked_matches(puuid, count=3)
        if not matches:
            continue

        for m in matches:
            match_file = f'inputs/{m}.parquet'
            exp_match_file = f'inputs_exp/{m}.parquet'
            summary_file = f'summary_inputs/{m}.parquet'

            if os.path.exists(match_file) or os.path.exists(exp_match_file):
                # Already handled, maybe we failed?
                # Reconstruct labels since we don't save them until later
                # summary = get_match_data(m)['summary']
                # labels = process_labels(summary)
                # batched_labels.append(labels)

                match_count += 1
                continue

            events_df, summary_df, labels = process_match(get_match_data(m))
            if events_df is None:
                continue

            # Save as parquet
            events_df.to_parquet(match_file, index=False)
            summary_df.to_parquet(summary_file, index=False)

            # Labels as csv
            label_df = pd.DataFrame(labels, index=[0])
            label_df.to_csv(label_file, mode='a', index=False, header=not os.path.exists(label_file))

            match_count += 1

        if match_count >= matches_per_division:
            break


    print(f'Finished {f} with {match_count} matches')


In [None]:
stored_matches = sorted(os.listdir('inputs'))
# 5 Keys = 5 partitions
partition_num = 0
partition_size = len(stored_matches) // 5 + 1
print(partition_size)

label_file = 'labels/split_0.csv'
batched_labels = []

for i, match_file in enumerate(stored_matches[partition_num * partition_size:(partition_num + 1) * partition_size]):
    match_id = match_file.split('.')[0]
    if i % 500 == 0:
        print(f'Finished {i}/{len(stored_matches)}')

    summary = get_match_summary(match_id)
    labels = process_labels(summary)

    # Batch labels to write them together
    batched_labels.append(labels)
    if len(batched_labels) > 100:
        labels_df = pd.DataFrame(batched_labels)
        labels_df.to_csv(label_file, mode='a', index=False, header=not os.path.exists(label_file))
        batched_labels = []

In [None]:
if batched_labels:
    labels_df = pd.DataFrame(batched_labels)
    labels_df.to_csv(label_file, mode='a', index=False, header=not os.path.exists(label_file))

In [None]:
[id_to_rank(i) for i in range(3)]

In [None]:
all_ranks.index('EMERALD IV')

In [None]:
os.path.exists('players/BRONZE_I.txt')

In [None]:
df = pd.read_parquet('summary_inputs/NA1_5175370180.parquet', engine="pyarrow")

In [None]:
def load_parquet(input_file):
    return pd.read_parquet('summary_inputs/' + input_file, engine="pyarrow")

def get_combined_df(input_files):
    with ThreadPoolExecutor(max_workers=10) as executor:
        dfs = executor.map(load_parquet, input_files)
    df = pd.concat(dfs, ignore_index=True)
    return df

ping_cols = [
    'allInPings',
    'assistMePings',
    'basicPings',
    'commandPings',
    'dangerPings',
    'enemyMissingPings',
    'enemyVisionPings',
    'getBackPings',
    'holdPings',
    'needVisionPings',
    'onMyWayPings',
    'pushPings',
    'retreatPings',
    'visionClearedPings',
]

position_map = {
    'TOP': 1,
    'JUNGLE': 2,
    'MIDDLE': 3,
    'BOTTOM': 4,
    'UTILITY': 5,
}

normalizing_columns = [
    'kills',
    'deaths',
    'assists',
    'goldEarned',
    'champExperience',
    'totalMinionsKilled',
    'longestTimeSpentLiving',
    'gameEndedInSurrender',
    'visionScore',
    'visionWardsBoughtInGame',
    'wardsKilled',
    'wardsPlaced',
    'totalPings',
]

dataset_max = np.zeros(len(normalizing_columns))
dataset_min = np.array([np.inf] * len(normalizing_columns))

files = os.listdir('summary_inputs')
count = 0

for i in range(0, len(files), 10):
    df = get_combined_df(files[i:i+10])

    df['teamPosition'] = df['teamPosition'].map(position_map)
    df['gameEndedInSurrender'] = df['gameEndedInSurrender'].astype(int)
    df['totalPings'] = df[ping_cols].sum(axis=1)

    max_vals = df[normalizing_columns].max().to_numpy()
    min_vals = df[normalizing_columns].min().to_numpy()

    dataset_max = np.maximum(dataset_max, max_vals)
    dataset_min = np.minimum(dataset_min, min_vals)

    if i % 500 == 0:
        print(f'Finished {i}/{len(os.listdir("summary_inputs"))}')
        print(list(dataset_max))
        print(list(dataset_min))

    count += 10


print(dataset_max)
print(dataset_min)
