In [2]:
import requests
import json
import gzip
import shutil
import time
import os
from io import BytesIO
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [3]:
S3_BUCKET_URL = "https://power-rankings-dataset-gprhack.s3.us-west-2.amazonaws.com"


def download_gzip_and_write_to_json(file_name):
   # If file already exists locally do not re-download game
   if os.path.isfile(f"{file_name}.json"):
       return

   response = requests.get(f"{S3_BUCKET_URL}/{file_name}.json.gz")
   if response.status_code == 200:
       try:
           gzip_bytes = BytesIO(response.content)
           with gzip.GzipFile(fileobj=gzip_bytes, mode="rb") as gzipped_file:
               with open(f"{file_name}.json", 'wb') as output_file:
                   shutil.copyfileobj(gzipped_file, output_file)
               print(f"{file_name}.json written")
       except Exception as e:
           print("Error:", e)
   else:
       print(f"Failed to download {file_name}")


def download_esports_files():
   directory = "esports-data"
   if not os.path.exists(directory):
       os.makedirs(directory)

   esports_data_files = ["leagues", "tournaments", "players", "teams", "mapping_data","tournaments_without_game_data","unfiltered_players","unfiltered_teams"]
   for file_name in esports_data_files:
       download_gzip_and_write_to_json(f"{directory}/{file_name}")





if __name__ == "__main__":
    download_esports_files()

In [5]:
def json_to_df(path):
    with open(path, 'r') as j:
        contents = json.loads(j.read())

    df = pd.json_normalize(contents)
    return df

In [6]:
tournaments = json_to_df('esports-data/tournaments.json')
tournaments2 = json_to_df('esports-data/tournaments_without_game_data.json')
players = json_to_df('esports-data/unfiltered_players.json')
#unfiltered_players = json_to_df('esports-data/unfiltered_players.json')
leagues = json_to_df('esports-data/leagues.json')
teams = json_to_df('esports-data/unfiltered_teams.json')
mapping_data = json_to_df('esports-data/mapping_data.json')
new_leagues = pd.read_csv('new_leagues.csv')
new_leagues = new_leagues.drop('Unnamed: 0', axis = 1)
tournaments = pd.concat([tournaments, tournaments2], ignore_index=True)
tournaments.sort_values(by =['startDate'], inplace =True)
players = players.drop_duplicates(subset=['player_id'])

In [9]:
mask = tournaments['startDate'].str.startswith('2023')
tournaments[mask]

Unnamed: 0,id,leagueId,name,slug,sport,startDate,endDate,stages
352,109761195185432372,108001239847565215,SuperBrawl,tft_events_2023,lol,2023-01-01,2023-12-30,"[{'name': 'Regular Season', 'type': None, 'slu..."
376,109505898644288466,109505812702106261,Season 2023 Kickoff,season_2023_kickoff,lol,2023-01-08,2023-01-13,"[{'name': 'Regular Season', 'type': None, 'slu..."
25,109659890853124371,107581050201097472,Opening 2023,ddh_opening_2023,lol,2023-01-11,2023-04-15,"[{'name': 'Regular Season', 'type': None, 'slu..."
38,109672646548487788,107603541524308819,Opening 2023,golden_league_opening_2023,lol,2023-01-12,2023-03-30,"[{'name': 'Round 1', 'type': None, 'slug': 'ro..."
255,109669600527985422,98767991314006698,Spring 2023,lpl_spring_2023,lol,2023-01-12,2023-04-16,"[{'name': 'Regular Season', 'type': None, 'slu..."
94,109660182592761313,107581669166925444,Opening 2023,el_opening_2023,lol,2023-01-12,2023-03-28,"[{'name': 'Regular Season', 'type': None, 'slu..."
99,109539822190865259,105266088231437431,Spring 2023,ultraliga_spring_2023,lol,2023-01-14,2023-04-04,"[{'name': 'Regular Season', 'type': None, 'slu..."
13,109625523800645158,98767991335774713,Spring 2023,lck_challengers_spring_2023,lol,2023-01-14,2023-04-10,"[{'name': 'Regular Season', 'type': None, 'slu..."
41,109467209705050129,105266098308571975,Spring 2023,nlc_spring_2023,lol,2023-01-15,2023-03-31,"[{'name': 'Regular Season', 'type': None, 'slu..."
52,109467087406180264,105266074488398661,Spring 2023,superliga_spring_2023,lol,2023-01-15,2023-04-03,"[{'name': 'Regular Season', 'type': None, 'slu..."


In [23]:
ELO_teams = pd.DataFrame(columns = ['id', 'name', 'ELO'])
ELO_teams

Unnamed: 0,id,name,ELO


In [10]:
def advance_elo(year_month, ELO_teams, new_leagues):
    for index, tournament in tournaments.iterrows():
        start_date = tournament.get("startDate", "")
        if start_date.startswith(year_month):
            if int(tournament['leagueId']) not in new_leagues.values:
                print(tournament['slug'] ,' not in leagues')
                continue
            print(f"Processing {tournament['slug']}")
            for stage in tournament["stages"]:
                for section in stage["sections"]:
                    for match in section["matches"]:
                        if match['teams'][0]['id'] not in teams.values or match['teams'][1]['id'] not in teams.values:
                            continue
                        elo_blue_side, elo_red_side, ELO_teams = get_elo(match['teams'], tournament['leagueId'], ELO_teams,new_leagues)
                        n_games = match['strategy']['count']
                        result_blue_side = match['teams'][0]['result']['gameWins']
                        blue_wins = result_blue_side / n_games
                        k = 30 + 70 * (n_games > 4)
                        elo_change_blue, elo_change_red = elo_calc(elo_blue_side, elo_red_side, k, blue_wins)
                        ELO_teams = elo_change(elo_change_blue, elo_change_red, ELO_teams, match['teams'])
    return ELO_teams

In [11]:
def get_elo(teams, league_id, ELO_teams, new_leagues):
    teams_elo = []
    for team in teams:
        team_id = team['id']
        if team_id not in ELO_teams.values:
            ELO_teams = start_elo(team_id, league_id, ELO_teams, new_leagues)
        teams_elo.append(ELO_teams.loc[ELO_teams['id'] == team_id]['ELO'].item())
    return teams_elo[0], teams_elo[1], ELO_teams

In [12]:
def start_elo(team_id, league_id, ELO_teams, new_leagues):
    elo = new_leagues.loc[new_leagues['id'] == int(league_id)]['ELO'].item()
    slug = teams.loc[teams['team_id'] == team_id]['slug'].item()
    elot = pd.DataFrame({'id': [team_id], 'name': [slug], 'ELO':[elo]})
    ELO_teams = pd.concat([ELO_teams, elot], ignore_index=True)
    return ELO_teams

In [13]:
def elo_calc(r_a, r_b, k, r):
    e_a = 1 / (1 + pow(10, ((r_b - r_a)/400)))
    e_b = 1 / (1 + pow(10, ((r_a - r_b)/400)))
    change_a = int(k * (r - e_a))
    change_b = int(k * (1 - r - e_b))
    return change_a, change_b

In [14]:
def elo_change(elo_change_blue, elo_change_red, ELO_teams, teams):
    team_blue_side = teams[0]['id']
    elo = ELO_teams.loc[ELO_teams['id'] == team_blue_side]['ELO'].item()
    new_elo = elo + elo_change_blue
    ELO_teams.loc[ELO_teams['id'] == team_blue_side, 'ELO'] = new_elo
    
    team_red_side = teams[1]['id']
    elo = ELO_teams.loc[ELO_teams['id'] == team_red_side]['ELO'].item()
    new_elo = elo + elo_change_red
    ELO_teams.loc[ELO_teams['id'] == team_red_side, 'ELO'] = new_elo
    return ELO_teams

In [13]:
%%time
for i in range(11):
    ELO_teams = advance_elo('2022-0{}'.format(i), ELO_teams, new_leagues)

ultraliga_spring_2022  not in leagues
nlc_spring_2022  not in leagues
prime_league_spring_2022  not in leagues
lec_spring_2022  not in leagues
lfl_2022_spring  not in leagues
lck_challengers_spring_2022  not in leagues
superliga_spring_2022  not in leagues
lpl_spring_2022  not in leagues
lck_spring_2022  not in leagues
lcs_lock_in_2022  not in leagues
elite_series_spring_2022  not in leagues
esports_balkan_league_spring_2022  not in leagues
greek_legends_league_spring_2022  not in leagues
liga_portuguesa_spring_2022  not in leagues
pg_nationals_spring_2022  not in leagues
hitpoint_masters_spring_2022  not in leagues
honor_division_opening_2022  not in leagues
lcs_academy_spring_2022  not in leagues
tal_winter_2022  not in leagues
tcl_winter_2022  not in leagues
cblol_split_1_2022  not in leagues
lco_split_1_2022  not in leagues
cblol_academy_split_1_2022  not in leagues
honor_league_opening_2022  not in leagues
elements_league_opening_2022  not in leagues
volcano_league_opening_2022  n

In [24]:
%%time
#ELO_teams = advance_elo('2023-01', ELO_teams, new_leagues)
#ELO_teams = advance_elo('2023-02', ELO_teams, new_leagues)
#ELO_teams = advance_elo('2023-04', ELO_teams, new_leagues)
ELO_teams = advance_elo('2023-05', ELO_teams, new_leagues)
#ELO_teams['ELO_05_2023'] = ELO_teams['ELO']
#ELO_teams['ELO'] = ELO_teams['ELO'].apply(lambda x: x*0.9)
#ELO_teams = advance_elo('2023-05-28', ELO_teams, new_leagues)
ELO_teams = advance_elo('2023-06', ELO_teams, new_leagues)
ELO_teams = advance_elo('2023-07', ELO_teams, new_leagues)
ELO_teams = advance_elo('2023-08', ELO_teams, new_leagues)
ELO_teams = advance_elo('2023-09', ELO_teams, new_leagues)
ELO_teams = advance_elo('2023-10', ELO_teams, new_leagues)
ELO_teams = advance_elo('2023-11', ELO_teams, new_leagues)
ELO_teams = advance_elo('2023-12', ELO_teams, new_leagues)
#ELO_teams['ELO_12_2023'] = ELO_teams['ELO']
#ELO_teams['ELO'] = ELO_teams['ELO'].apply(lambda x: x*0.9)f

Processing msi_2023
Processing lrn_closing_2023
Processing ultraliga_summer_2023
Processing superliga_summer_2023
Processing lpl_summer_2023
Processing lfl_summer_2023
Processing prime_league_summer_2023
Processing lrs_closing_2023
Processing lcs_summer_2023
Processing lck_challengers_summer_2023
Processing liga_portuguesa_summer_2023
Processing hitpoint_masters_summer_2023
Processing gll_summer_2023
Processing nlc_summer_2023
Processing lla_closing_2023
Processing elite_series_summer_2023
Processing lco_split_2_2023
Processing ebl_summer_2023
Processing tcl_summer_2023
Processing lck_summer_2023
Processing pg_nationals_summer_2023
Processing nacl_summer_2023
Processing ljl_summer_2023
Processing cblol_2023_split_2
Processing cblol_academy_2023_split_2
Processing arabian_league_summer_2023
Processing nacl_qualifiers_1_summer_2023
Processing lec_summer_2023
Processing ljl_academy_2023
Processing vcs_summer_2023
Processing pcs_summer_2023
Processing nacl_qualifiers_2_summer_2023
Processi

In [25]:
ELO_teams.sort_values(by =['ELO'], ascending = False, inplace =True)

In [28]:
ELO_teams

Unnamed: 0,id,name,ELO
12,99566404852189289,jd-gaming,1689
8,100205573495116443,geng,1654
0,99566404853854212,bilibili-gaming,1651
48,99566404850008779,lng-esports,1626
4,98767991926151025,g2-esports,1623
10,98767991853197861,t1,1608
181,99566404579461230,kt-rolster,1599
57,99566404853058754,weibo-gaming,1571
6,104367068120825486,psg-talon,1562
178,100725845018863243,dwg-kia,1552


In [8]:
pd.set_option('display.max_rows', None)

In [21]:
def get_tournament_rank(tournament_id):
    ELO_teams = pd.DataFrame(columns = ['id', 'name', 'ELO'])
    start_date = tournaments.loc[tournaments['id'] == tournament_id]['startDate'].item()
    start = datetime.strptime(start_date, '%Y-%m-%d')
    date = start - timedelta(days = 240)
    while date < start :
        ELO_teams = advance_elo(date.strftime('%Y-%m-%d'), ELO_teams, new_leagues)
        date = date + timedelta(days=1)
    tournament_teams = get_tournament_teams(tournament_id)
    tournament_teams = pd.merge(tournament_teams, ELO_teams, on="id", how="left")
    tournament_teams.sort_values(by =['ELO'], ascending = False, inplace =True)
    return tournament_teams

In [16]:
def get_tournament_teams(tournament_id):
    tournament_teams = pd.DataFrame(columns = ['id'])
    stages = tournaments.loc[tournaments['id'] == tournament_id, 'stages'].item()
    
    for stage in stages:
        for section in stage["sections"]:
            for match in section["matches"]:
                for team in match["teams"]:
                    team_id = team['id']
                    if team_id not in tournament_teams.values:
                        df = pd.DataFrame({'id': [team_id]})
                        tournament_teams = pd.concat([tournament_teams, df], ignore_index=True)
    
    return tournament_teams

In [20]:
%%time
df = get_tournament_rank('108998961191900167')

1
1
1
1
1
1
Processing lcs_spring_2022
1
1
1
1
1
Processing ljl_spring_2022
1
1
Processing lcl_spring_2022
Processing vcs_spring_2022
Processing pcs_spring_2022
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
lcs_proving_grounds_spring_2022  not in leagues
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
Processing european_masters_spring_2022_play_ins
1
1
1
1
1
1
1
Processing tcl_summer_2022
1
1
1
Processing european_masters_spring_2022_main_event
1
1
1
1
1
1
1
1
1
1
Processing college_championship_2022
1
1
1
1
1
1
Processing lck_academy_01_2022
1
1
tft_event_summer  not in leagues
1
1
Processing lck_summer_2022
1
1
1
1
1
Processing lck_challengers_summer_2022
1
Processing msi_2022
1
1
1
1
master_flow_league_closing_2022  not in leagues
1
golden_league_closing_2022  not in leagues
1
1
honor_league_closing_2022  not in leagues
1
1
honor_division_closing_2022  not in leagues
1
1
1
1
elements_league_closing_2022  not in leagues
1
volcano_league_closing_2022  not in leagues
1
1
1
sta

In [18]:
df

Unnamed: 0,id,name,ELO
21,100205573495116443,geng,1665
7,98767991892579754,royal-never-give-up,1595
13,98767991853197861,t1,1590
15,99566404852189289,jd-gaming,1584
14,98767991882270868,edward-gaming,1581
19,99566404854685458,top-esports,1560
17,100725845018863243,dwg-kia,1554
18,101383793574360315,rogue,1531
6,99566404585387054,drx,1511
16,98767991926151025,g2-esports,1491


In [111]:
start_date = '2022-08-15'
date_string = '2022-06-01'
start = datetime.strptime(start_date, '%Y-%m-%d')
date = datetime.strptime(date_string, '%Y-%m-%d')
while date < start :
    print(date.strftime('%Y-%m-%d'))
    date = date + timedelta(days = 1)

2022-06-01
2022-06-02
2022-06-03
2022-06-04
2022-06-05
2022-06-06
2022-06-07
2022-06-08
2022-06-09
2022-06-10
2022-06-11
2022-06-12
2022-06-13
2022-06-14
2022-06-15
2022-06-16
2022-06-17
2022-06-18
2022-06-19
2022-06-20
2022-06-21
2022-06-22
2022-06-23
2022-06-24
2022-06-25
2022-06-26
2022-06-27
2022-06-28
2022-06-29
2022-06-30
2022-07-01
2022-07-02
2022-07-03
2022-07-04
2022-07-05
2022-07-06
2022-07-07
2022-07-08
2022-07-09
2022-07-10
2022-07-11
2022-07-12
2022-07-13
2022-07-14
2022-07-15
2022-07-16
2022-07-17
2022-07-18
2022-07-19
2022-07-20
2022-07-21
2022-07-22
2022-07-23
2022-07-24
2022-07-25
2022-07-26
2022-07-27
2022-07-28
2022-07-29
2022-07-30
2022-07-31
2022-08-01
2022-08-02
2022-08-03
2022-08-04
2022-08-05
2022-08-06
2022-08-07
2022-08-08
2022-08-09
2022-08-10
2022-08-11
2022-08-12
2022-08-13
2022-08-14


In [124]:
i = 0
#print(tournaments.loc[tournaments['id'] == '108998961191900167', 'stages'].item())
stages = tournaments.loc[tournaments['id'] == '108998961191900167', 'stages'].item()
for stage in stages:
    i += 1
    print(i)

1
2
3
4
