## Importing libraries

In [108]:
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt

## Data Extraction

In [68]:
# Group stage data found by identifying network calls made on lolesports.com
worlds_2015_group = requests.get('https://api.lolesports.com/api/v2/tournamentPlayerStats?groupName=groups&tournamentId=91be3d78-874a-44e0-943f-073d4c9d7bf6')
worlds_2016_group = requests.get('https://api.lolesports.com/api/v2/tournamentPlayerStats?groupName=groups&tournamentId=3c5fa267-237e-4b16-8e86-20378a47bf1c')
worlds_2017_group = requests.get('https://api.lolesports.com/api/v2/tournamentPlayerStats?groupName=groups&tournamentId=a246d0f8-2b5c-4431-af4c-b872c8dee023')

# Knockout stage data also found on lolesports.com
worlds_2015_ko = requests.get('https://api.lolesports.com/api/v2/tournamentPlayerStats?groupName=elim&tournamentId=91be3d78-874a-44e0-943f-073d4c9d7bf6')
worlds_2016_ko = requests.get('https://api.lolesports.com/api/v2/tournamentPlayerStats?groupName=elim&tournamentId=3c5fa267-237e-4b16-8e86-20378a47bf1c')
worlds_2017_ko = requests.get('https://api.lolesports.com/api/v2/tournamentPlayerStats?groupName=elim&tournamentId=a246d0f8-2b5c-4431-af4c-b872c8dee023')

## Data Wrangling

In [69]:
# Data wrangled to prepare for pandas
worlds_2015_group = worlds_2015_group.json()
worlds_2016_group = worlds_2016_group.json()
worlds_2017_group = worlds_2017_group.json()
worlds_2015_ko = worlds_2015_ko.json()
worlds_2016_ko = worlds_2016_ko.json()
worlds_2017_ko = worlds_2017_ko.json()

In [94]:
# Converting JSON files into pandas dataframes for both group and knockout stages
df_15_group = pd.DataFrame(worlds_2015_group['stats'])
df_16_group = pd.DataFrame(worlds_2016_group['stats'])
df_17_group = pd.DataFrame(worlds_2017_group['stats'])
df_15_ko = pd.DataFrame(worlds_2015_ko['stats'])
df_16_ko = pd.DataFrame(worlds_2016_ko['stats'])
df_17_ko = pd.DataFrame(worlds_2017_ko['stats'])

print "Players in worlds group stage 2015: ", len(df_15_group)
print "Players in worlds group stage 2016: ", len(df_16_group)
print "Players in worlds group stage 2017: ", len(df_17_group)
print "Players in worlds knockout stage 2015: ", len(df_15_ko)
print "Players in worlds knockout stage 2016: ", len(df_16_ko)
print "Players in worlds knockout stage 2017: ", len(df_17_ko)
print "Sum of all players: ", (len(df_15_group)+len(df_16_group)+len(df_17_group)+len(df_15_ko)+len(df_16_ko)+len(df_17_ko))

Players in worlds group stage 2015:  84
Players in worlds group stage 2016:  85
Players in worlds group stage 2017:  85
Players in worlds knockout stage 2015:  41
Players in worlds knockout stage 2016:  41
Players in worlds knockout stage 2017:  41
Sum of all players:  377


In [86]:
# Adding a new column identifying the tournament for each row
df_15_group['tournamentId'], df_16_group['tournamentId'], df_17_group['tournamentId'] = [
    'worlds_2015_group', 
    'worlds_2016_group', 
    'worlds_2017_group']

df_15_ko['tournamentId'], df_16_ko['tournamentId'], df_17_ko['tournamentId'] = [
    'worlds_2015, ko',
    'worlds_2016_ko',
    'worlds_2017_ko'
]
# Checking whether each dataframe is correcltly labelled in its tournamentId field
print "Worlds group stage 2015: ", df_15_group['tournamentId'].unique()
print "Worlds group stage 2016: ", df_16_group['tournamentId'].unique()
print "Worlds group stage 2017: ", df_17_group['tournamentId'].unique()
print "Worlds knockout stage 2015: ", df_15_ko['tournamentId'].unique()
print "Worlds knockout stage 2016: ", df_16_ko['tournamentId'].unique()
print "Worlds knockout stage 2017: ", df_17_ko['tournamentId'].unique()

Worlds group stage 2015:  ['worlds_2015_group']
Worlds group stage 2016:  ['worlds_2016_group']
Worlds group stage 2017:  ['worlds_2017_group']
Worlds knockout stage 2015:  ['worlds_2015, ko']
Worlds knockout stage 2016:  ['worlds_2016_ko']
Worlds knockout stage 2017:  ['worlds_2017_ko']


In [90]:
# Concatenating all 6 dataframes into one single dataframe
df = pd.concat([df_15_group, df_16_group, df_17_group, df_15_ko, df_16_ko, df_17_ko], axis=0)

# Checking that the values for each tournament exists in the new dataframe
print "Range of tournamentIds found: ", df['tournamentId'].unique()

Range of tournamentIds found:  ['worlds_2015_group' 'worlds_2016_group' 'worlds_2017_group'
 'worlds_2015, ko' 'worlds_2016_ko' 'worlds_2017_ko']


## Exploratory Data Analysis

In [110]:
print "{} rows of which {} are unique player names.".format(len(df), len(df['playerSlug'].unique()))

377 rows of which 173 are unique player names.


In [101]:
print "Rows: {} \nColumns: {}".format(df.shape[0], df.shape[1])
df.head()

Rows: 377 
Columns: 16


Unnamed: 0,assists,cs,csPerMin,deaths,gamesPlayed,id,kda,killParticipation,kills,minutesPlayed,name,playerSlug,position,team,teamSlug,tournamentId
0,18,1014,5.917137,20,6,7,1.5,0.666667,12,171,WarL0cK,warl0ck,toplane,BKT,bangkok-titans,worlds_2015_group
1,20,1217,7.101731,26,6,10,1.153846,0.666667,10,171,Lloyd,lloyd,adcarry,BKT,bangkok-titans,worlds_2015_group
2,24,547,3.191986,25,6,11,1.28,0.711111,8,171,007x,007x,jungle,BKT,bangkok-titans,worlds_2015_group
3,32,2312,9.158854,11,7,20,6.363636,0.673077,38,252,An,an,adcarry,AHQ,ahq-e-sports-club,worlds_2015_group
4,39,2039,8.07738,13,7,34,4.769231,0.596154,23,252,Westdoor,westdoor,midlane,AHQ,ahq-e-sports-club,worlds_2015_group
