In [None]:
import pandas as pd
import numpy as np

Basic cleaning of the data

In [254]:
def load_and_clean_data():
    df = pd.read_csv("data.csv",  sep=';')
    df.team = df.team.str.lower()
    df.league = df.league.str.lower()
    return df.replace({ "1": 1.0, "0": 0.0, 1: 1.0, 0: 0.0, 'Blue': -1, 'Red': 1.0 })


Returns all unique games played by a team.

Arguments:
    df: the dataframe containing the data.
    team: the team you are interested in. Eg: "Echo Fox"

In [255]:
def team_games(df, team):
    print(team)
    return df[( df['team'] == team ) & ( df['player'] == 'Team')].drop_duplicates(subset=['gameid'])


Returns opponents for a given team.

Arguments:
    df: the dataframe containing the data.
    team: the team you are interested in games: a array of gameids the team played in.


In [256]:
def opponent_games(df, team, gameids):
    games = df[df.gameid.isin(gameids)]
    opponent_games = games[games.team != team].drop_duplicates(subset=['gameid'])[['gameid', 'team']]
    opponent_games.columns = ['gameid', 'opponent']

    return opponent_games

In [257]:
df = load_and_clean_data()
team = team_games(df, 'cloud9')
opponents = opponent_games(df, 'cloud9', team.gameid)
result = team.merge(opponents, on="gameid")[['week', 'game', 'team', 'opponent', 'side', 'result', 'teamkills', 'teamdeaths']]

# Gives as a nice summary of the results up to date
print(result)


cloud9
     week  game    team             opponent  side  result  teamkills  \
0      PI   1.0  cloud9       kabum e-sports  -1.0     1.0       13.0   
1      PI   1.0  cloud9   detonation focusme   1.0     1.0       18.0   
2      PI   3.0  cloud9       kabum e-sports   1.0     1.0       18.0   
3      PI   3.0  cloud9   detonation focusme  -1.0     1.0       14.0   
4   PI-KO   1.0  cloud9       gambit esports  -1.0     1.0       13.0   
5   PI-KO   2.0  cloud9       gambit esports   1.0     0.0        6.0   
6   PI-KO   3.0  cloud9       gambit esports  -1.0     1.0       18.0   
7   PI-KO   4.0  cloud9       gambit esports   1.0     0.0        8.0   
8   PI-KO   5.0  cloud9       gambit esports  -1.0     1.0       17.0   
9       G   1.0  cloud9  royal never give up   1.0     0.0        3.0   
10      G   2.0  cloud9             vitality   1.0     1.0       17.0   
11      G   3.0  cloud9                gen.g  -1.0     0.0        2.0   
12      G   5.0  cloud9                gen.g

Stats about First Blood/Turret/Dragon

In [258]:
fields = ['fb', 'ft', 'fd', 'fbaron']
games = team_games(df, 'cloud9')

for f in fields:
    taken = games[f].sum()
    total = games[f].shape[0]
    print(f + ':\t', taken, "/" , total, taken/total)

cloud9
fb:	 16.0 / 19 0.8421052631578947
ft:	 9.0 / 19 0.47368421052631576
fd:	 11.0 / 19 0.5789473684210527
fbaron:	 12.0 / 19 0.631578947368421


Using corr to find correlation
Let’s see if there is a relationship between first dragon, turret, etc, and actually winning the game.

In [259]:
fields_to_correlate = 'side fb fd ft fbaron result'.split()

def do_correlation(team):
    games = team_games(df, team) # [fields_to_correlate]
    games.replace(' ', np.nan, inplace=True) # replace empty values
    corr = games[fields_to_correlate].corr()
    print(corr.round(2))

do_correlation('cloud9')


cloud9
        side    fb    fd    ft  fbaron  result
side    1.00 -0.22  0.08 -0.38   -0.40   -0.46
fb     -0.22  1.00  0.22 -0.17    0.13    0.40
fd      0.08  0.22  1.00  0.17   -0.08    0.46
ft     -0.38 -0.17  0.17  1.00    0.24    0.33
fbaron -0.40  0.13 -0.08  0.24    1.00    0.47
result -0.46  0.40  0.46  0.33    0.47    1.00
