# Data Exploration

Loading in the data and learning more about what we have access to

In [2]:
import pandas as pd
from collections import defaultdict
import os
from sklearn import linear_model

In [3]:
# leagues_df = pd.read_json("esports-data/leagues.json")
# players_df = pd.read_json("esports-data/players.json")
# teams_df = pd.read_json("esports-data/teams.json")
# tournaments_df = pd.read_json("esports-data/tournaments.json")

In [4]:
single_game_df = pd.read_json("games/ESPORTSTMNT03_3195064.json")

# game_info = single_game_df[single_game_df["eventType"] == "game_info"]
# for x in game_info["participants"][0]:
#     print(x)

In [5]:
def get_team_players(game_info):
    participants = game_info["participants"]
    team = defaultdict(list)
    for participant in participants:
        for info in participant:
            team[info["summonerName"].split(" ")[0]].append(info)

    return team

In [6]:
# All event types in this game
set(single_game_df["eventType"])

{'building_destroyed',
 'champion_kill',
 'champion_kill_special',
 'champion_level_up',
 'epic_monster_kill',
 'epic_monster_spawn',
 'game_end',
 'game_info',
 'item_destroyed',
 'item_purchased',
 'item_sold',
 'item_undo',
 'objective_bounty_prestart',
 'queued_dragon_info',
 'queued_epic_monster_info',
 'skill_level_up',
 'stats_update',
 'summoner_spell_used',
 'turret_plate_destroyed',
 'turret_plate_gold_earned',
 'ward_killed',
 'ward_placed'}

In [7]:
def get_players_to_team(game_info):
    teams_to_players = get_team_players(game_info)
    players_to_team = {}
    for team_id,info in teams_to_players.items():
        for player in info:
            players_to_team[player["participantID"]] = player["teamID"]
    
    return players_to_team

In [8]:
# team_ids = defaultdict(int)
# for team in teams_to_players.keys():
#     team_ids[team] = 0

In [9]:
def teamID_to_teamLetters(game_info):
    teamID_to_teamLetters = {}
    for participant in game_info["participants"][0]:
        teamID = participant["teamID"]
        if teamID not in teamID_to_teamLetters:
            teamID_to_teamLetters[teamID] = participant["summonerName"].split(" ")[0] #Grab the first part of the name. ie: TSM Doublelift -> TSM

    return teamID_to_teamLetters

In [10]:
def get_game_id(single_game_df):
    game_info = single_game_df[single_game_df["eventType"] == "game_info"]
    return game_info["platformGameId"][0]

In [11]:
def get_num_kills(single_game_df, teamLetters):
    numKills = defaultdict(int)

    kill_info = single_game_df[single_game_df["eventType"] == "champion_kill"]

    for _, row in kill_info.iterrows():
        numKills[teamLetters[int(row["killerTeamID"])]] += 1

    return numKills

In [12]:
def get_num_wards(single_game_df, teamLetters, players_to_team):
    numWards = defaultdict(int)

    ward_info = single_game_df[single_game_df["eventType"] == "ward_placed"]

    for _, row in ward_info.iterrows():
        numWards[teamLetters[players_to_team[row["placer"]]]] += 1

    return numWards

In [13]:
def get_first_blood(single_game_df, teamLetters, players_to_team):
    firstBlood = {}
    for _, letters in teamLetters.items():
        firstBlood[letters] = 0

    firstBlood_df = single_game_df[single_game_df["killType"] == "firstBlood"]
    firstBlood_df = firstBlood_df[firstBlood_df.columns[~firstBlood_df.isnull().all()]]

    for _, row in firstBlood_df.iterrows():
        firstBlood[teamLetters[players_to_team[row["killer"]]]] += 1

    return firstBlood

In [14]:
def get_winning_team(single_game_df, teamLetters):
    winningTeam = {}
    for _, letters in teamLetters.items():
        winningTeam[letters] = 0

    game_end_info = single_game_df[single_game_df["eventType"] == "game_end"]
    game_end_info = game_end_info[game_end_info.columns[~game_end_info.isnull().all()]]

    for _, row in game_end_info.iterrows():
        winningTeam[teamLetters[int(row["winningTeam"])]] += 1

    return winningTeam

In [15]:
def get_losing_team(single_game_df, teamLetters):
    teamResults = get_winning_team(single_game_df, teamLetters)
    for letters, win in teamResults.items():
        teamResults[letters] = 0 if win else 1

    return teamResults    

In [16]:
def transform_to_linear_model_df(single_game_df):
    """
    Takes a single game dataframe, and converts it to the following format:
            numKills    |   numWards    |   firstBlood  |   ...
    team1:              |               |               |
    --------------------|---------------|---------------|----------
    team2:              |               |               |
    """
    teamLetters = teamID_to_teamLetters(single_game_df[single_game_df["eventType"] == "game_info"])
    players_to_team = get_players_to_team(single_game_df[single_game_df["eventType"] == "game_info"])
    model_dict = {
        'game': get_game_id(single_game_df),
        'numKills': get_num_kills(single_game_df, teamLetters),
        'numWards': get_num_wards(single_game_df, teamLetters, players_to_team),
        'firstBlood': get_first_blood(single_game_df, teamLetters, players_to_team),
        'wins': get_winning_team(single_game_df, teamLetters),
        'losses': get_losing_team(single_game_df, teamLetters),
    }
    # print(model_dict)
    # return pd.DataFrame.from_dict(model_dict)
    return model_dict

In [17]:
transform_to_linear_model_df(single_game_df)

{'game': 'ESPORTSTMNT03:3195064',
 'numKills': defaultdict(int, {'TA': 15, 'TRV': 27}),
 'numWards': defaultdict(int, {'TA': 51, 'TRV': 41}),
 'firstBlood': {'TA': 1, 'TRV': 0},
 'wins': {'TA': 0, 'TRV': 1},
 'losses': {'TA': 1, 'TRV': 0}}

In [39]:
games_folder = os.listdir("games")
games = games_folder[:40]

dfs = defaultdict(int)

for game in games:
    single_game_df = pd.read_json(f"games/{game}")
    linear_df = transform_to_linear_model_df(single_game_df=single_game_df)
    for feature,team_to_float in linear_df.items():
        # print(team_to_float)
        if feature != "game":
            for team, val in team_to_float.items():
                if feature in dfs:
                    if team in dfs[feature]:
                        dfs[feature][team] += val
                    else:
                        dfs[feature][team] = val
                else:
                    dfs[feature] = {}

print(dfs)
linear = pd.DataFrame.from_dict(dfs)
linear

KeyError: 300

In [24]:
features = ["numKills", "numWards", "firstBlood"]
predict = ["wins"]

In [30]:
linear['winRatio'] = linear['wins']/(linear['wins'] + linear['losses'])
linear

Unnamed: 0,numKills,numWards,firstBlood,wins,losses,winRatio
CCG,25,76,1,1,1,0.5
TRV,66,213,2,2,1,0.666667
TA,120,789,3,4,4,0.5
TET,52,207,2,1,2,0.333333
COLD,21,230,1,1,0,1.0
LIT,99,620,1,2,3,0.4
CFY,135,774,5,4,3,0.571429
GHG,83,343,3,2,3,0.4
WU,119,602,4,3,3,0.5
MIR,152,737,2,6,1,0.857143


In [31]:
def fit_model(df):
    """
    return coeffs, intercept
    """
    regr = linear_model.LinearRegression()
    regr.fit(df[features], df["winRatio"])

    feat_to_coef = {}
    for feat, coef in zip(features, regr.coef_):
        feat_to_coef[feat] = coef

    return regr.intercept_, feat_to_coef


In [33]:
intercept_, coeff = fit_model(linear)
intercept_, coeff

(0.41896732052870467,
 {'numKills': 0.003739864903306314,
  'numWards': -0.000614942627725131,
  'firstBlood': 0.028496174813236085})

In [38]:
single_game = pd.read_json("games/ESPORTSTMNT03_3200614.json")
test = transform_to_linear_model_df(single_game)

(test['numKills']['TET'] * coeff['numKills']) + (test['numWards']['TET'] * coeff['numWards']) + (test['firstBlood']['TET'] * coeff['firstBlood'])

0.01915918413918927

In [37]:
test

{'game': 'ESPORTSTMNT03:3200614',
 'numKills': defaultdict(int, {'TET': 10, 'TA': 18}),
 'numWards': defaultdict(int, {'TA': 89, 'TET': 76}),
 'firstBlood': {'TET': 1, 'TA': 0},
 'wins': {'TET': 0, 'TA': 1},
 'losses': {'TET': 1, 'TA': 0}}