In [1]:
import requests
import pandas as pd
import numpy as np
import datetime
import time as t
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
sns.set_style("whitegrid", {'axes.grid' : False})
import os
import math
import time as t

In [7]:
# For any URL, return the JSON
def return_json(URL, session):
    while True:
        response = session.get(URL)
        try:
            # Check for 404 error and quit if received
            if response.json()['status']['status_code'] == 404:
                return "error - status code 404"
            # Check for 429 (too many requests made), sleep if received
            elif response.json()['status']['status_code'] == 429:
                t.sleep(10)
                continue
            else:
                return "error - unknown reason"
        except:
            break
    return response.json()

# Provide the match-id & region, receive the json of match timeline (1 minute interval of match data)
def get_matchTimeline(matchId, region, key, session):
    URL = 'https://' + region + '.api.riotgames.com/lol/match/v4/timelines/by-match/' + str(
        matchId) + '/?api_key=' + key
    json = return_json(URL, session)
    return json


# Provide the match-id & region, receive the match information (game length, participants etc..)
def get_gameInfo(matchId, region, key, session):
    URL = 'https://' + region + '.api.riotgames.com/lol/match/v4/matches/' + str(matchId) + '/?api_key=' + key
    json = return_json(URL, session)
    return json

# Decide how much data to gather in each elo
def set_volume(tier):
    tier_list = {
        'DIAMOND': 20,
        'GOLD': 1,
        'SILVER': 1
    }
    size = tier_list[tier]
    return size

# Loop to get summoner IDs from given regions / tiers
def get_summoners(fullRegionList, tierList, key, session):
    summonerIds, summonerRegions, summonerTier = [], [], []
    for y in fullRegionList:
        for z in range(len(tierList)):
            size = set_volume(tierList[z][0])
            for x in range(size):
                page = x + 1
                URL_ids = ('https://' + y + '.api.riotgames.com/lol/league-exp/v4/entries/RANKED_SOLO_5x5/' +
                           tierList[z][0] + '/' + tierList[z][1] + '/?page=' + str(page) + '&api_key=' + key)
                json = return_json(URL_ids, session)
                for x in range(0, len(json)):
                    summonerIds.append(json[x]['summonerId'])
                    summonerRegions.append(y)
                    summonerTier.append(tierList[z][0])
    return summonerIds, summonerRegions, summonerTier


# Convert a list of names to IDs
def name_to_id(selectedIds, selectedRegions, selectedTiers,  key, session):
    accountIds, accountRegions, accountTiers = [], [], []
    for i in range(len(selectedIds)):
        URL = 'https://' + selectedRegions[i] + '.api.riotgames.com/lol/summoner/v4/summoners/' + selectedIds[
            i] + '/?api_key=' + key
        json = return_json(URL, session)
        account_id = json['accountId']
        accountIds.append(account_id)
        accountRegions.append(selectedRegions[i])
        accountTiers.append(selectedTiers[i])
    return accountIds, accountRegions, accountTiers


# Python code to remove duplicate elements
def remove_duplicates(list1, list2, list3):
    final_list1 = []
    final_list2 = []
    final_list3 = []
    for i in range(len(list1)):
        if list1[i] not in final_list1:
            final_list1.append(list1[i])
            final_list2.append(list2[i])
            final_list3.append(list3[i])
    return final_list1, final_list2, final_list3


def time_conv(yyyy, mm, dd):
    sd = datetime.date(yyyy, mm, dd) - datetime.timedelta(7)
    time = t.mktime(sd.timetuple())
    time = str(int(time)) + "000"
    return time
    
def get_matchIds(accountIds, accountRegions, accountTiers, key, session):
    matchIds, matchTiers, matchRegions = [], [], []
    start_time = time_conv(2020, 10, 19)
    for i in range(len(accountIds)):
        URL = 'https://' + accountRegions[i] + '.api.riotgames.com/lol/match/v4/matchlists/by-account/' + accountIds[
            i] + '/?beginTime=' + start_time + '&queue=420' + '&api_key=' + key
        try:
            match_json = return_json(URL, session) 
            for match in match_json['matches'][:10]:
                matchIds.append(match['gameId'])
                matchRegions.append(accountRegions[i])
                matchTiers.append(accountTiers[i])
        except:
            pass
    return matchIds, matchRegions, matchTiers

def main(fullRegionList, tierList, key, session):
    summonerIds, summonerRegions, summonerTiers = get_summoners(fullRegionList, tierList, key, session)
    print(str(len(summonerIds)) + " summoners found. Transforming to account IDs.")
    accountIds, accountRegions, accountTiers = name_to_id(summonerIds, summonerRegions, summonerTiers,  key, session)
    print(str(len(accountIds)) + " account IDs successfully transformed. Getting match IDs.")
    matchIds, matchRegions, matchTiers = get_matchIds(accountIds, accountRegions, accountTiers, key, session)
    print(str(len(matchIds)) + " game IDs found, converting to data.")
    return matchIds, matchRegions, matchTiers

In [8]:
# For all lanes, check whether the jungler is within a given range at a given time
def current_lane(pos, dist):
    for lane in lane_locs:
        if lane_distance(pos, lane) < dist:
            return lane
    return False
        
# Find the distance between jungler at a given time, and a given lane
def lane_distance(pos, lane):
    loc_X = pos['x']
    loc_Y = pos['y']
    current_loc = [loc_X, loc_Y]
    shortest_dist = 99999
    for i in range(2):
        lane_loc = lane_locs[lane][i]
        dist = distance(current_loc, lane_loc)
        if dist < shortest_dist:
            shortest_dist = dist
    return shortest_dist

# Calculate the distance between point 1 (x1, y1) and point 2 (x2, y2)
def distance(p1, p2):
    return math.sqrt(((p1[0] - p2[0]) ** 2) + ((p1[1] - p2[1]) ** 2))

# Create dictionary of lane co-ordinates, which can be used to check for ganks
lane_locs = {"top": [[2250, 12750],
                      [1500, 12000],
                      [3500, 13500]],
             "middle": [[7500, 7500],
                     [8500, 8500],
                     [6500, 6750]],
             "adc": [[12750, 2250],
                     [13500, 3500],
                     [11500, 1500],
                     [10500, 800],
                     [14100, 4500]]}


def determine_lane(lane_cs, jungle_cs, coord_2, coord_3):
    if jungle_cs > 5:
        lane = "jungle"
    elif lane_cs < 12:
        lane = "support"
    elif lane_cs >= 13:
        lane = position_lane_check(coord_2, coord_3, 1800)
    else:
        lane = False
    return lane


def position_lane_check(coord_2, coord_3, dist):
    lane_check_one = current_lane(coord_2, dist)
    lane_check_two = current_lane(coord_3, dist)

    if lane_check_one == lane_check_two and lane_check_one != False:
        lane = lane_check_one
    elif lane_check_one == False and lane_check_two != False:
        lane = lane_check_two
    elif lane_check_one != False and lane_check_two == False:
        lane = lane_check_one
    else:
        lane = False
    return lane


def create_id_lane_dict(match_timeline):
    id_lane_dict = {}
    for i in range(1, 11):
        part_id = match_timeline['frames'][0]['participantFrames'][str(i)]['participantId']
        lane_cs = match_timeline['frames'][4]['participantFrames'][str(i)]['minionsKilled']
        jungle_cs = match_timeline['frames'][4]['participantFrames'][str(i)]['jungleMinionsKilled']
        coord_2 = match_timeline['frames'][3]['participantFrames'][str(i)]['position']
        coord_3 = match_timeline['frames'][4]['participantFrames'][str(i)]['position']
        lane = determine_lane(lane_cs, jungle_cs, coord_2, coord_3)
        id_lane_dict[part_id] = lane
    return id_lane_dict


def parse_event_info(match_timeline):
    killer, victim, assists, timestamp, pos = [], [], [], [], []
    all_kills = []
    drake_kills_list = []
    for i in range(len(match_timeline['frames'])):
        for event in match_timeline['frames'][i]['events']:
            if event['type'] == 'CHAMPION_KILL':
                kill_info = [event['killerId']]
                kill_info.append(event['victimId'])
                kill_info.append(event['assistingParticipantIds'])
                kill_info.append(event['timestamp'])
                kill_info.append(event['position'])
                all_kills.append(kill_info)
            if event['type'] == 'ELITE_MONSTER_KILL' and event['monsterType'] == 'DRAGON':
                drake_kills_list.append(event['killerId'])
    kill_df = pd.DataFrame(all_kills, columns = ['Killer', 'Victim', 'Assists', 'Timestamp','Position'])
    solo_kills_df = kill_df[kill_df['Assists'].map(lambda d: len(d)) == 0].groupby('Killer').count()
    
    early_kills_df = kill_df[kill_df['Timestamp'] < 60000 * 10]
    lanes = []
    for i in range(len(early_kills_df)):
        kill_lane = current_lane(early_kills_df.loc[i]['Position'], 1750)
        lanes.append(kill_lane)
    early_kills_df['Lanes'] = lanes
    
    return solo_kills_df, early_kills_df, drake_kills_list


def get_solo_kills(solo_kills_df, part_id):
    if part_id in solo_kills_df.index:
        solo_kills = solo_kills_df.loc[part_id].Victim
    else:
        solo_kills = 0
    return solo_kills


def get_early_ganks(early_kills_df, part_id, lane):
    early_ganks = early_kills_df[(early_kills_df['Lanes'] != False) &
                   (early_kills_df['Killer'] == part_id) &
                  (early_kills_df['Lanes'] != lane)].count()['Victim']
    return early_ganks
    

def blue_win_check(game_info):
    if game_info['teams'][0]['win'] == 'Win':
        return 1
    else:
        return 0
    
def get_team_data(features, matchId, matchRegion, matchTier, key, session):  
    game_info = get_gameInfo(matchId, matchRegion, key, session)
    match_timeline = get_matchTimeline(matchId, matchRegion, key, session)
    solo_kills_df, early_kills_df, drake_kills_list = parse_event_info(match_timeline)
    id_lane_dict = create_id_lane_dict(match_timeline)
    game_duration = game_info['gameDuration']
    all_team_stats = []
    blue_result = blue_win_check(game_info)
    red_result = 1 - blue_result
    for player in game_info['participants']:
        part_id = player['participantId']
        lane = id_lane_dict[part_id]
        player_stats = []
        for stat in features:
            player_stats.append(player['stats'][stat])
        first10_xp = game_info['participants'][0]['timeline']['xpPerMinDeltas']['0-10']
        first10_gold = game_info['participants'][0]['timeline']['goldPerMinDeltas']['0-10']
        solo_kills = get_solo_kills(solo_kills_df, part_id) 
        early_ganks = get_early_ganks(early_kills_df, part_id, lane)
        drake_kills = drake_kills_list.count(part_id)
        player_stats.append(first10_xp)
        player_stats.append(first10_gold)
        player_stats.append(solo_kills)
        player_stats.append(early_ganks)
        player_stats.append(drake_kills)
        if part_id <= 5:
            player_stats.insert(0, blue_result)
        else:
            player_stats.insert(0, red_result)
        player_stats.insert(0, matchTier)
        player_stats.insert(0, lane)
        player_stats.insert(0, player['championId'])

        all_team_stats.append(player_stats)
    return all_team_stats


def gameIds_to_data(matchIds, matchRegions, matchTiers, key, session):

    features = ['kills', 'deaths', 'assists', 'largestKillingSpree', 'largestMultiKill', 'killingSprees', 
                'longestTimeSpentLiving',  'totalDamageDealt', 'magicDamageDealt', 'physicalDamageDealt', 
                'trueDamageDealt', 'largestCriticalStrike', 'totalDamageDealtToChampions', 
                'magicDamageDealtToChampions', 'trueDamageDealtToChampions', 'totalHeal', 'damageSelfMitigated',
                'damageDealtToObjectives', 'damageDealtToTurrets', 'timeCCingOthers', 'totalDamageTaken', 
                'goldEarned', 'turretKills', 'inhibitorKills', 'totalMinionsKilled', 'totalTimeCrowdControlDealt',
               'neutralMinionsKilled', 'neutralMinionsKilledTeamJungle', 'neutralMinionsKilledEnemyJungle',
                'firstBloodKill', 'firstBloodAssist', 'firstTowerKill', 'firstTowerAssist']
    
    all_stats = []
    for i in range(len(matchIds)):
        if i % 1000 == 0:
            print(i)
        try:
            all_stats.extend(get_team_data(features, matchIds[i], matchRegions[i], matchTiers[i], key, session))
        except:
            pass
    col = ['Champ ID', 'Lane', 'Tier', 'Result'] + features + ['first10_xpm', 'first10_gpm', 'soloKills', 'earlyGanks', 'drakesKilled']
    df = pd.DataFrame(all_stats, columns = col) 
    bool_cols = ['firstBloodKill', 'firstBloodAssist', 'firstTowerKill', 'firstTowerAssist']
    for col in bool_cols:
        df[col] = df[col].astype(int)
    return df

def full(fullRegionList, tierList, key, session):
    start = t.time()
    matchIds, matchRegions, matchTiers = main(fullRegionList, tierList, key, session)
    end = t.time()
    run_time = end - start
    print("Match ID finding run time:", run_time)
    start = t.time()
    df = gameIds_to_data(matchIds, matchRegions, matchTiers, key, session)
    end = t.time()
    run_time = end - start
    print("ID to DF run time:", run_time)
    return df

In [9]:
fullRegionList = ['euw1']
tierList = [['DIAMOND', 'III'], ['DIAMOND', 'II'], ['DIAMOND', 'I']]
key = 'YOUR_KEY (see Riot API site for details)'
session = requests.Session()

df = full(fullRegionList, tierList, key, session)

12300 summoners found. Transforming to account IDs.
12300 account IDs successfully transformed. Getting match IDs.
68415 game IDs found, converting to data.
Match ID finding run time: 4306.0135061740875
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result = method(y)


1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000
60000
61000
62000
63000
64000
65000
66000
67000
68000
ID to DF run time: 52127.05663204193


In [12]:
df.to_csv('final_data2.csv')

# 12,300 summoners
# 65 seconds per batch, 60 batches = 1 hour 5 minutes
# ~~ 92,852 games
# 92,852 games * 0.685 secs per game = ~~63,418 seconds
# 17 hours 36 minutes + 1 hour 5 minutes = ~~18 hours 41 minutes
# 21:08 start time
# 15:49 end time

# gameIds were found by 22:17
# 22:17 to 16:27 = 18 hours 10 minutes = 65,400 seconnds
# 65,400 seconds / 95,000 batch  = 0.688 secs per game

# 18,000 batches * 0.688 = 12,384 / 60 / 60 =  3 hours 26 minutes
# 09:03 + 3 hours 26 minutes = 12:44 end time
# 12,000 batches * 0.688 = 8,256 / 60 / 60 = 2 hours 17 minutes
# 10:32 + 2 hours 17 minutes = 12:49 end time

# 11,000 batches * 0.688 = 2 hours 6 minutes
# 13:06 + 2 hours 6 minutes = 15:12 ????

# 4,500 batches * 0.688 = 51 minutes

