In [2]:
import requests
import json
import gzip
import shutil
import time
import os
from io import BytesIO
import pandas as pd
import numpy as np

In [830]:
S3_BUCKET_URL = "https://power-rankings-dataset-gprhack.s3.us-west-2.amazonaws.com"


def download_gzip_and_write_to_json(file_name):
   # If file already exists locally do not re-download game
   if os.path.isfile(f"{file_name}.json"):
       return

   response = requests.get(f"{S3_BUCKET_URL}/{file_name}.json.gz")
   if response.status_code == 200:
       try:
           gzip_bytes = BytesIO(response.content)
           with gzip.GzipFile(fileobj=gzip_bytes, mode="rb") as gzipped_file:
               with open(f"{file_name}.json", 'wb') as output_file:
                   shutil.copyfileobj(gzipped_file, output_file)
               print(f"{file_name}.json written")
       except Exception as e:
           print("Error:", e)
   else:
       print(f"Failed to download {file_name}")


def download_esports_files():
   directory = "esports-data"
   if not os.path.exists(directory):
       os.makedirs(directory)

   esports_data_files = ["leagues", "tournaments", "players", "teams", "mapping_data","tournaments_without_game_data","unfiltered_players","unfiltered_teams"]
   for file_name in esports_data_files:
       download_gzip_and_write_to_json(f"{directory}/{file_name}")





if __name__ == "__main__":
    download_esports_files()



esports-data/tournaments_without_game_data.json written
esports-data/unfiltered_players.json written
esports-data/unfiltered_teams.json written


First issue the read_json of pandas cutted ids because the number were too long

In [12]:
def json_to_df(path):
    with open(path, 'r') as j:
        contents = json.loads(j.read())

    df = pd.json_normalize(contents)
    return df

In [13]:
tournaments = json_to_df('esports-data/tournaments.json')
tournaments2 = json_to_df('esports-data/tournaments_without_game_data.json')
players = json_to_df('esports-data/unfiltered_players.json')
#unfiltered_players = json_to_df('esports-data/unfiltered_players.json')
leagues = json_to_df('esports-data/leagues.json')
teams = json_to_df('esports-data/unfiltered_teams.json')
mapping_data = json_to_df('esports-data/mapping_data.json')
new_leagues = pd.read_csv('new_leagues.csv')
new_leagues = new_leagues.drop('Unnamed: 0', axis = 1)
tournaments = pd.concat([tournaments, tournaments2], ignore_index=True)
tournaments.sort_values(by =['startDate'], inplace =True)
players = players.drop_duplicates(subset=['player_id'])

In [845]:
new_leagues

Unnamed: 0,id,slug,ELO
0,98767991299243165,lcs,1300
1,109511549831443335,north_american_challenger_league,1100
2,109518549825754242,lcs_challengers_qualifiers,1000
3,107898214974993351,college_championship,1000
4,98767991332355509,cblol-brazil,1100
5,98767991310872058,lck,1500
6,98767991355908944,lcl,1100
7,105709090213554609,lco,1100
8,98767991302996019,lec,1400
9,98767991349978712,ljl-japan,1100


In [828]:
mapping_data.head()

Unnamed: 0,esportsGameId,platformGameId,teamMapping.200,teamMapping.100,participantMapping.3,participantMapping.5,participantMapping.10,participantMapping.2,participantMapping.1,participantMapping.9,participantMapping.7,participantMapping.8,participantMapping.6,participantMapping.4
0,110378429158160389,ESPORTSTMNT01:3416295,105550026570060790,105550033967461806,107492116585043595,105388980252039870,110730240651882119,108366332471078988,108205130568869560,106267600924403194,106267599829820917,107492063150003806,105501816646382923,102483272156027229
1,110471139171602607,ESPORTSTMNT02:3228341,105550005698683818,109485335453835911,107560281476330464,107559633217576287,105397238669368241,103478281341350420,103478281338008082,109519061710277829,109519057410044019,103461966873003900,105397207118950800,101383793082615993
2,110535609417029691,ESPORTSTMNT04:2693199,108352305932141947,109696092218588987,105536902415377888,109696579630268337,107564428297402857,99566406317824602,109696535754261145,105548605376090785,105548731617719496,103963733877026827,109705412728201213,109696576870241602
3,110847390580148354,ESPORTSTMNT01:3413275,99566406332987990,107700204561086446,98767991808793901,109642948838393879,101383792831678607,99566406443271739,99566406483827119,99566406296347493,104668539132702977,105709404500072628,100160799381721105,107705431471339985
4,110413246204026226,ESPORTSTMNT01:3412587,105550001032913831,109485335453835911,107560281476330464,107559633217576287,103743593842085398,103478281341350420,103478281338008082,108395451349202875,110434822594728434,103980682928812378,107559597273805278,101383793082615993


In [863]:
mask = tournaments['startDate'].str.startswith('2023')
tournaments[mask]

Unnamed: 0,id,leagueId,name,slug,sport,startDate,endDate,stages
352,109761195185432372,108001239847565215,SuperBrawl,tft_events_2023,lol,2023-01-01,2023-12-30,"[{'name': 'Regular Season', 'type': None, 'slu..."
376,109505898644288466,109505812702106261,Season 2023 Kickoff,season_2023_kickoff,lol,2023-01-08,2023-01-13,"[{'name': 'Regular Season', 'type': None, 'slu..."
25,109659890853124371,107581050201097472,Opening 2023,ddh_opening_2023,lol,2023-01-11,2023-04-15,"[{'name': 'Regular Season', 'type': None, 'slu..."
38,109672646548487788,107603541524308819,Opening 2023,golden_league_opening_2023,lol,2023-01-12,2023-03-30,"[{'name': 'Round 1', 'type': None, 'slug': 'ro..."
255,109669600527985422,98767991314006698,Spring 2023,lpl_spring_2023,lol,2023-01-12,2023-04-16,"[{'name': 'Regular Season', 'type': None, 'slu..."
94,109660182592761313,107581669166925444,Opening 2023,el_opening_2023,lol,2023-01-12,2023-03-28,"[{'name': 'Regular Season', 'type': None, 'slu..."
99,109539822190865259,105266088231437431,Spring 2023,ultraliga_spring_2023,lol,2023-01-14,2023-04-04,"[{'name': 'Regular Season', 'type': None, 'slu..."
13,109625523800645158,98767991335774713,Spring 2023,lck_challengers_spring_2023,lol,2023-01-14,2023-04-10,"[{'name': 'Regular Season', 'type': None, 'slu..."
41,109467209705050129,105266098308571975,Spring 2023,nlc_spring_2023,lol,2023-01-15,2023-03-31,"[{'name': 'Regular Season', 'type': None, 'slu..."
52,109467087406180264,105266074488398661,Spring 2023,superliga_spring_2023,lol,2023-01-15,2023-04-03,"[{'name': 'Regular Season', 'type': None, 'slu..."


In [10]:
ELO_players = pd.DataFrame(columns = ['id', 'handle', 'ELO'])
ELO_players

Unnamed: 0,id,handle,ELO


In [21]:
ELO = new_leagues.loc[new_leagues['id'] == 107407335299756365]['ELO'].item()
ELO

1000

In [24]:
def start_elo(player_id, league_id, ELO_players, new_leagues):
    handle = players.loc[players['player_id'] == player_id]['handle'].item()
    ELO = new_leagues.loc[new_leagues['id'] == int(league_id)]['ELO'].item()
    elop = pd.DataFrame([{'id': player_id, 'handle' : handle, 'ELO': ELO}])
    ELO_players = pd.concat([ELO_players, elop], ignore_index = True)
    return ELO_players

In [16]:
def get_elo(game_id,league_id, ELO_players,new_leagues):
    elo_blue_side = 0
    elo_red_side = 0
    players_blue = []
    players_red = []
    for i in range(1, 6, 1):
        participant_mapping = 'participantMapping.{}'.format(i)
        player_id = mapping_data.loc[mapping_data['esportsGameId'] == game_id][participant_mapping].item()
        if player_id not in players.values:
            continue
        players_blue.append(player_id)
        if player_id not in ELO_players.values :
            ELO_players = start_elo(player_id, league_id, ELO_players, new_leagues)
        elo_blue_side += ELO_players.loc[ELO_players['id'] == player_id]['ELO'].item()
    for i in range(6, 11, 1):
        participant_mapping = 'participantMapping.{}'.format(i)
        player_id = mapping_data.loc[mapping_data['esportsGameId'] == game_id][participant_mapping].item()
        if player_id not in players.values:
            continue
        players_red.append(player_id)
        if player_id not in ELO_players.values :
            ELO_players =start_elo(player_id, league_id, ELO_players, new_leagues)
        elo_red_side += ELO_players.loc[ELO_players['id'] == player_id]['ELO'].item()
    if len(players_blue) == 0:
        len_players_blue = 1
    else: 
        len_players_blue = len(players_blue)
    if len(players_red) == 0:
        len_players_red = 1
    else: 
        len_players_red = len(players_red)
    
    return elo_blue_side/len_players_blue, elo_red_side/len_players_red, players_blue, players_red, ELO_players

In [17]:
def elo_calc(r_a, r_b, k, result, name):
    e_a = 1 / (1 + pow(10, ((r_b - r_a)/400)))
    e_b = 1 / (1 + pow(10, ((r_a - r_b)/400)))
    if result == 'win':
        r = 1
    else:
        r = 0
    change_a = int(k * (r - e_a)) + 10 * (name == 'knockouts')
    change_b = int(k * (1 - r - e_b)) + 10 * (name == 'knockouts')
    return change_a, change_b

In [18]:
def elo_change(elo_change_blue, elo_change_red, ELO_players, players_blue, players_red, elo_blue, elo_red):
    for player in players_blue:
        elo = ELO_players.loc[ELO_players['id'] == player]['ELO'].item()
        #diff = elo - elo_blue
        #if (diff / elo) > (0.2 * elo):
        #    elo *= 0.9
        new_elo = elo + elo_change_blue
        ELO_players.loc[ELO_players['id'] == player, 'ELO'] = new_elo
    for player in players_red:
        elo = ELO_players.loc[ELO_players['id'] == player]['ELO'].item()
        #diff = elo - elo_red
        #if (diff / elo) > (0.2 * elo):
        #    elo *= 0.9
        new_elo = elo + elo_change_red
        ELO_players.loc[ELO_players['id'] == player, 'ELO'] = new_elo
    return ELO_players

In [8]:
def advance_elo_one_month(year_month, ELO_players, new_leagues):
    for index, tournament in tournaments.iterrows():
        start_date = tournament.get("startDate", "")
        if start_date.startswith(year_month):
            if int(tournament['leagueId']) not in new_leagues.values:
                print(tournament['slug'] ,' not in leagues')
                continue
            print(f"Processing {tournament['slug']}")
            for stage in tournament["stages"]:
                for section in stage["sections"]:
                    for match in section["matches"]:
                        for game in match["games"]:
                            if game['id'] not in mapping_data.values:
                                    continue
                            if game["state"] == "completed":
                                elo_blue_side, elo_red_side, players_blue, players_red ,ELO_players = get_elo(game['id'], tournament['leagueId'], ELO_players,new_leagues)
                                k = 30 + 20 * (match['strategy']['count'] > 4)
                                result_blue_side = game['teams'][0]['result']['outcome']
                                elo_change_blue, elo_change_red = elo_calc(elo_blue_side, elo_red_side, k, result_blue_side, stage['name'])
                                ELO_players = elo_change(elo_change_blue, elo_change_red, ELO_players, players_blue, players_red, elo_blue_side, elo_red_side)
    
    return ELO_players

In [15]:
%%time
ELO_players = advance_elo_one_month('2020-01', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2020-02', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2020-03', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2020-04', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2020-05', ELO_players, new_leagues)
ELO_players['ELO_06_2020'] = ELO_players['ELO']
#ELO_players['ELO'] = ELO_players['ELO'].apply(lambda x: x*0.9)
ELO_players = advance_elo_one_month('2020-06', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2020-07', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2020-08', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2020-09', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2020-10', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2020-11', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2020-12', ELO_players, new_leagues)
ELO_players['ELO_12_2020'] = ELO_players['ELO']
#ELO_players['ELO'] = ELO_players['ELO'].apply(lambda x: x*0.9)

Processing lpl_spring_2020
Processing lcs_spring_2020


NameError: name 'get_elo' is not defined

In [851]:
%%time
ELO_players = advance_elo_one_month('2021-01', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2021-02', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2021-03', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2021-04', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2021-05', ELO_players, new_leagues)
ELO_players['ELO_06_2021'] = ELO_players['ELO']
#ELO_players['ELO'] = ELO_players['ELO'].apply(lambda x: x*0.9)
ELO_players = advance_elo_one_month('2021-06', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2021-07', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2021-08', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2021-09', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2021-10', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2021-11', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2021-12', ELO_players, new_leagues)
ELO_players['ELO_12_2021'] = ELO_players['ELO']
#ELO_players['ELO'] = ELO_players['ELO'].apply(lambda x: x*0.9)

Processing lec_spring_2021
Processing lpl_spring_2021
Processing greek_legends_league_spring_2021
belgian_league_2021_split1  not in leagues
dutch_league_2021_split1  not in leagues
Processing pg_nationals_spring_2021
Processing lck_spring_2021
Processing lcs_lock_in_2021
baltic_masters_2021_spring  not in leagues
Processing cblol_split_1_2021
Processing superliga_spring_2021
Processing lck_challengers_spring_2021
Processing prime_league_spring_2021
Processing cblol_academy_split_1_2021
Processing nlc_2021_split1
Processing ultraliga_spring_2021
Processing lfl_2021_spring
lcs_academy_spring_2021  not in leagues
tal_winter_2021  not in leagues
Processing ljl_spring_2021
Processing tcl_winter_2021
Processing hitpoint_masters_spring_2021
Processing liga_portuguesa_spring_2021
Processing esports_balkan_league_spring_2021
Processing lla_opening_2021
Processing lcs_spring_2021
Processing lcl_spring_2021
Processing pcs_spring_2021
Processing lco_split_1_2021
Processing mss_2021
Processing eur

In [852]:
%%time
ELO_players = advance_elo_one_month('2022-01', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2022-02', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2022-03', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2022-04', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2022-05', ELO_players, new_leagues)
ELO_players['ELO_06_2022'] = ELO_players['ELO']
#ELO_players['ELO'] = ELO_players['ELO'].apply(lambda x: x*0.9)
ELO_players = advance_elo_one_month('2022-06', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2022-07', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2022-08', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2022-09', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2022-10', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2022-11', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2022-12', ELO_players, new_leagues)
ELO_players['ELO_12_2022'] = ELO_players['ELO']
#ELO_players['ELO'] = ELO_players['ELO'].apply(lambda x: x*0.9)

Processing ultraliga_spring_2022
Processing nlc_spring_2022
Processing prime_league_spring_2022
Processing lec_spring_2022
Processing lfl_2022_spring
Processing lck_challengers_spring_2022
Processing superliga_spring_2022
Processing lpl_spring_2022
Processing lck_spring_2022
Processing lcs_lock_in_2022
Processing elite_series_spring_2022
Processing esports_balkan_league_spring_2022
Processing greek_legends_league_spring_2022
Processing liga_portuguesa_spring_2022
Processing pg_nationals_spring_2022
Processing hitpoint_masters_spring_2022
honor_division_opening_2022  not in leagues
lcs_academy_spring_2022  not in leagues
tal_winter_2022  not in leagues
Processing tcl_winter_2022
Processing cblol_split_1_2022
Processing lco_split_1_2022
Processing cblol_academy_split_1_2022
honor_league_opening_2022  not in leagues
elements_league_opening_2022  not in leagues
volcano_league_opening_2022  not in leagues
golden_league_opening_2022  not in leagues
master_flow_league_opening_2022  not in lea

In [25]:
%%time
#ELO_players = advance_elo_one_month('2023-01', ELO_players, new_leagues)
#ELO_players = advance_elo_one_month('2023-02', ELO_players, new_leagues)
#ELO_players = advance_elo_one_month('2023-03', ELO_players, new_leagues)
#ELO_players = advance_elo_one_month('2023-04', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2023-05-02', ELO_players, new_leagues)
ELO_players['ELO_05_2023'] = ELO_players['ELO']
#ELO_players['ELO'] = ELO_players['ELO'].apply(lambda x: x*0.9)
ELO_players = advance_elo_one_month('2023-05-28', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2023-06', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2023-07', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2023-08', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2023-09', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2023-10', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2023-11', ELO_players, new_leagues)
ELO_players = advance_elo_one_month('2023-12', ELO_players, new_leagues)
ELO_players['ELO_12_2023'] = ELO_players['ELO']
#ELO_players['ELO'] = ELO_players['ELO'].apply(lambda x: x*0.9)

Processing msi_2023
Processing ultraliga_summer_2023
Processing superliga_summer_2023
Processing lpl_summer_2023
Processing lck_challengers_summer_2023
Processing liga_portuguesa_summer_2023
Processing hitpoint_masters_summer_2023
Processing gll_summer_2023
Processing nlc_summer_2023
Processing lla_closing_2023
Processing elite_series_summer_2023
Processing lco_split_2_2023
Processing ebl_summer_2023
Processing tcl_summer_2023
Processing lck_summer_2023
Processing pg_nationals_summer_2023
Processing nacl_summer_2023
Processing ljl_summer_2023
Processing cblol_2023_split_2
Processing cblol_academy_2023_split_2
Processing arabian_league_summer_2023
Processing nacl_qualifiers_1_summer_2023
Processing lec_summer_2023


ValueError: can only convert an array of size 1 to a Python scalar

In [14]:
ELO_players.sort_values(by =['ELO'], ascending = False, inplace =True)

In [16]:
pd.set_option('display.max_rows', None)

In [677]:
player_id = '103495716738607011'
print(players.loc[players['player_id'] == player_id]['handle'].item(), ' not in players')


Lot  not in players


In [17]:
ELO_players

Unnamed: 0,id,handle,ELO,ELO_05_2023,ELO_12_2023
40,102186485482484390,Doran,1835,1730.0,1835
41,98767975916458257,Peanut,1835,1730.0,1835
42,99871276342168416,Chovy,1835,1730.0,1835
44,105501709748188393,Delight,1835,1730.0,1835
43,107492068702410338,Peyz,1835,1730.0,1835
64,100205573984889078,MISSING,1830,1900.0,1830
60,101388912796120370,369,1830,1900.0,1830
61,101671284628761661,Kanavi,1830,1900.0,1830
63,98767975906852059,Ruler,1830,1900.0,1830
62,99566404803543690,knight,1821,1900.0,1821


In [887]:
teams.loc[teams['team_id'] == "99566404850008779"]

Unnamed: 0,team_id,name,acronym,slug
1398,99566404850008779,Suzhou LNG Esports,LNG,lng-esports


In [244]:
teams.sort_values(by =['team_id'], inplace =True)105511293284046174

In [890]:
players.loc[players['player_id'] == "110547957837987327"]

Unnamed: 0,player_id,handle,first_name,last_name,home_team_id
4491,110547957837987327,0N,WENJUN,LUO,99566404853854212


In [893]:
players.loc[players['handle'] == "ON"]

Unnamed: 0,player_id,handle,first_name,last_name,home_team_id
5156,105516578130811296,ON,Wen-Jun,Luo,


In [638]:

teams.loc[teams['team_id'] == "102235771678061291"]

Unnamed: 0,team_id,name,acronym,slug
592,102235771678061291,DenizBank İstanbul Wildcats,IW,fastpay-wildcats


In [180]:
mapping_data.loc[mapping_data['esportsGameId'] == "108998961199895793"]['participantMapping.3'].item()

'98767991747728851'

In [888]:
mapping_data.loc[mapping_data['esportsGameId'] == "110428848767941212"]

Unnamed: 0,esportsGameId,platformGameId,teamMapping.200,teamMapping.100,participantMapping.3,participantMapping.5,participantMapping.10,participantMapping.2,participantMapping.1,participantMapping.9,participantMapping.7,participantMapping.8,participantMapping.6,participantMapping.4
3935,110428848767941212,LPL_A:343741,99566404853854212,99566404850008779,98767975951139628,103817065229388803,110547957837987327,99871276342823057,108477153941144313,106368692529261498,105516152837078491,99566404798393925,103478281356330346,101388912814247335


In [294]:
players['ELO'] = np.nan

In [329]:
new_leagues

Unnamed: 0,id,slug,ELO
0,98767991299243165,lcs,1300
1,109511549831443335,north_american_challenger_league,1100
2,109518549825754242,lcs_challengers_qualifiers,1000
3,107898214974993351,college_championship,1000
4,98767991332355509,cblol-brazil,1100
5,98767991310872058,lck,1600
6,98767991355908944,lcl,1100
7,105709090213554609,lco,1100
8,98767991302996019,lec,1400
9,98767991349978712,ljl-japan,1100


In [469]:
if '103536921518408674' not in ELO_players.values :
    ELO_players = start_elo('103536921518408674','98767991302996019',ELO_players)
playerss = ['103536921518408674','103536921518408674','103536921518408674','103536921518408674','103536921518408674']

In [459]:
ELO_players

Unnamed: 0,id,handle,ELO
0,103536921518408674,LIMIT,1450
