In [None]:
!pip install tqdm



In [None]:
import json
import os
import pandas as pd
from glob import glob
from tqdm import tqdm
import random
import shutil
import zipfile

# Project Outline
This project showcases how I extract important laning data from the Riot API timeline JSON through annotated python code. Beforehand, I collected a dataset of timeline JSON files from the Riot API. This process is straightforward to automate up to the request limit of a Riot API key however I cannot show this as the code includes my key.

In [None]:
#unzip
zip_file_path = '/content/EUW_challenger_timeline_filtered.zip'
extract_to_path = '/content/extracted_timeline'

os.makedirs(extract_to_path, exist_ok=True)

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to_path)

print(f'Unzipped {zip_file_path} to {extract_to_path}')


Unzipped /content/EUW_challenger_timeline_filtered.zip to /content/extracted_timeline


In [None]:
#@title sampler

# Paths
source_folder = '/content/extracted_timeline'
sample_folder = '/content/timeline_sample'    # folder for sample files

# Create sample folder if it does not exist
os.makedirs(sample_folder, exist_ok=True)

# List all JSON files in source folder
all_files = [f for f in os.listdir(source_folder) if f.endswith('.json')]

# Randomly pick 10 files
sample_files = random.sample(all_files, 10)

# Copy sample files to sample folder
for filename in sample_files:
    src = os.path.join(source_folder, filename)
    dst = os.path.join(sample_folder, filename)
    shutil.copy(src, dst)

print(f'Sampled and copied {len(sample_files)} files to {sample_folder}')


Sampled and copied 10 files to /content/timeline_sample


In [None]:
#@title parser

def parse(json_path):
    with open(json_path, 'r') as f:
        timeline = json.load(f)

    frames = timeline['info']['frames']
    roles = ['top', 'jungle', 'mid', 'adc', 'support']
    team1_players = {'top': 1, 'jungle': 2, 'mid': 3, 'adc': 4, 'support': 5}
    team2_players = {'top': 6, 'jungle': 7, 'mid': 8, 'adc': 9, 'support': 10}

    # Find frames closest to 14 and 20 minutes
    frame_14 = min(frames, key=lambda x: abs(x['timestamp'] - 14 * 60 * 1000))
    participants_14 = frame_14['participantFrames']
    frame_20 = min(frames, key=lambda x: abs(x['timestamp'] - 20 * 60 * 1000))
    participants_20 = frame_20['participantFrames']

    # Initialize KDA dictionary
    kda = {str(pid): {'kills': 0, 'deaths': 0, 'assists': 0} for pid in range(1, 11)}

    # Initialize objective counters
    objectives = {
        'dragonKills': {100: 0, 200: 0},
        'grubKills': {100: 0, 200: 0},
        'towerKills': {100: 0, 200: 0},
        'towerPlatesDestroyed': {100: 0, 200: 0}
    }

    # Initialize vision counters
    wards_placed = {100: 0, 200: 0}
    wards_destroyed = {100: 0, 200: 0}

    # Map participantId to teamId for quick lookup (1-5 -> 100, 6-10 -> 200)
    participant_to_team = {pid: 100 if pid <= 5 else 200 for pid in range(1, 11)}

    # Aggregate from events up to 14 minutes
    for frame in frames:
        if frame['timestamp'] <= 14 * 60 * 1000:
            for event in frame.get('events', []):
                if event['type'] == 'CHAMPION_KILL':
                    killer_id = str(event.get('killerId'))
                    victim_id = str(event.get('victimId'))
                    assist_ids = [str(a) for a in event.get('assistingParticipantIds', [])]
                    if killer_id in kda:
                        kda[killer_id]['kills'] += 1
                    if victim_id in kda:
                        kda[victim_id]['deaths'] += 1
                    for aid in assist_ids:
                        if aid in kda:
                            kda[aid]['assists'] += 1

                elif event['type'] == 'ELITE_MONSTER_KILL':
                    monster = event.get('monsterType')
                    team_id = event.get('killerTeamId')
                    if monster == 'DRAGON' and team_id in objectives['dragonKills']:
                        objectives['dragonKills'][team_id] += 1
                    elif monster == 'HORDE' and team_id in objectives['grubKills']:
                        objectives['grubKills'][team_id] += 1

                elif event['type'] == 'BUILDING_KILL' and event.get('buildingType') == 'TOWER_BUILDING':
                    destroyed_team = event.get('teamId')
                    killer_team = 200 if destroyed_team == 100 else 100
                    if killer_team in objectives['towerKills']:
                        objectives['towerKills'][killer_team] += 1

                elif event['type'] == 'TURRET_PLATE_DESTROYED':
                    destroyed_team = event.get('teamId')
                    killer_team = 200 if destroyed_team == 100 else 100
                    if killer_team in objectives['towerPlatesDestroyed']:
                        objectives['towerPlatesDestroyed'][killer_team] += 1

                elif event['type'] == 'WARD_PLACED':
                    creator_id = event.get('creatorId')
                    if creator_id is not None:
                        team_id = participant_to_team.get(creator_id)
                        if team_id in wards_placed:
                            wards_placed[team_id] += 1

                elif event['type'] == 'WARD_KILL':
                    killer_id = event.get('killerId')
                    if killer_id is not None:
                        team_id = participant_to_team.get(killer_id)
                        if team_id in wards_destroyed:
                            wards_destroyed[team_id] += 1

    def extract_team_data(participants, team_id, player_map):
        data = {'team': team_id}
        for role in roles:
            pid = player_map[role]
            pdata = participants[str(pid)]
            data[f'{role}_gold_20'] = pdata['totalGold']
            data[f'{role}_exp_20'] = pdata['xp']
            data[f'{role}_damage_20'] = pdata.get('damageStats', {}).get('totalDamageDone', 0)
            data[f'{role}_kills_14'] = kda[str(pid)]['kills']
            data[f'{role}_deaths_14'] = kda[str(pid)]['deaths']
            data[f'{role}_assists_14'] = kda[str(pid)]['assists']
        return data

    team1 = extract_team_data(participants_20, 1, team1_players)
    team2 = extract_team_data(participants_20, 2, team2_players)

    # Compute gold and exp differences at 14 minutes
    for role in roles:
        pid1 = team1_players[role]
        pid2 = team2_players[role]
        p1 = participants_14[str(pid1)]
        p2 = participants_14[str(pid2)]
        gold_diff_14 = p1['totalGold'] - p2['totalGold']
        exp_diff_14 = p1['xp'] - p2['xp']
        cs1 = p1['jungleMinionsKilled'] + p1['minionsKilled']
        cs2 = p2['jungleMinionsKilled'] + p2['minionsKilled']
        cs_diff_14 = cs1 - cs2
        team1[f'{role}_gold_diff_14'] = gold_diff_14
        team2[f'{role}_gold_diff_14'] = -gold_diff_14
        team1[f'{role}_exp_diff_14'] = exp_diff_14
        team2[f'{role}_exp_diff_14'] = -exp_diff_14
        team1[f'{role}_cs_diff_14'] = cs_diff_14
        team2[f'{role}_cs_diff_14'] = -cs_diff_14



    # Add objective differences (team 1 = 100, team 2 = 200)
    for obj_name, counts in objectives.items():
        diff = counts[100] - counts[200]
        team1[f'{obj_name}_diff_14'] = diff
        team2[f'{obj_name}_diff_14'] = -diff

    # Add vision score differences
    team1['team_ally_vision_score_14'] = wards_placed[100] - wards_destroyed[100]
    team2['team_ally_vision_score_14'] = wards_placed[200] - wards_destroyed[200]
    team1['team_enemy_vision_score_14'] = wards_placed[200] - wards_destroyed[200]
    team2['team_enemy_vision_score_14'] = wards_placed[100] - wards_destroyed[100]

    # Add match ID
    match_id = os.path.basename(json_path).replace('.json', '')
    team1['match_id'] = match_id
    team2['match_id'] = match_id

    return [team1, team2]
def parse_many_files(json_files):
    all_results = []
    for path in tqdm(json_files, desc="Files parsed", unit="file"):
        result = parse(path)
        all_results.extend(result)
    return all_results

In [None]:
#@title sample csv builder

sample_folder = '/content/timeline_sample'
csv_path = '/content/sample.csv'

all_rows = []

# Wrap the file list with tqdm
for filename in tqdm(os.listdir(sample_folder), desc="Parsing files", unit="file"):
    if filename.endswith('.json'):
        file_path = os.path.join(sample_folder, filename)
        rows = parse(file_path)
        all_rows.extend(rows)

df = pd.DataFrame(all_rows)
df.to_csv(csv_path, index=False)


Parsing files: 100%|██████████| 10/10 [00:00<00:00, 23.37file/s]


In [None]:
#@title sample csv tester

df = pd.read_csv('sample.csv')
print(df.head())
print(df.info())


   team  top_gold_20  top_exp_20  top_damage_20  top_kills_14  top_deaths_14  \
0     1         6284        8285          71358             4              1   
1     2         5360        6526          52243             2              3   
2     1         6284        8139          54499             1              2   
3     2         8571       12205          93886             2              2   
4     1         9428       11403         116704             4              2   

   top_assists_14  jungle_gold_20  jungle_exp_20  jungle_damage_20  ...  \
0               1            6672           6282             99624  ...   
1               1            4534           4767             75038  ...   
2               2            9751           8735            148819  ...   
3               0            6113           7557            132802  ...   
4               2            7899           8169            128836  ...   

   support_gold_diff_14  support_exp_diff_14  support_cs_diff_14  \


In [None]:
#@title tests
random_row = df.sample(n=1)
print(df.sample(1).T)


                                                     2
team                                                 1
top_gold_20                                       6284
top_exp_20                                        8139
top_damage_20                                    54499
top_kills_14                                         1
top_deaths_14                                        2
top_assists_14                                       2
jungle_gold_20                                    9751
jungle_exp_20                                     8735
jungle_damage_20                                148819
jungle_kills_14                                      6
jungle_deaths_14                                     1
jungle_assists_14                                    3
mid_gold_20                                       9192
mid_exp_20                                        9366
mid_damage_20                                    90653
mid_kills_14                                         5
mid_deaths

In [None]:
#@title example of parsing a folder
from tqdm.notebook import tqdm

sample_folder = '/content/extracted_timeline'
csv_path = '/content/main.csv'

all_rows = []

for filename in tqdm(os.listdir(sample_folder), desc="Parsing files", unit="file"):
    if filename.endswith('.json'):
        file_path = os.path.join(sample_folder, filename)
        rows = parse(file_path)
        all_rows.extend(rows)

df = pd.DataFrame(all_rows)
df.to_csv(csv_path, index=False)


Parsing files:   0%|          | 0/12673 [00:00<?, ?file/s]

In [None]:
#@title main csv tester
import pandas as pd

df = pd.read_csv('main.csv')
print(df.head())
print(df.info())
print(df.sample(1).T)

   team  top_gold_20  top_exp_20  top_damage_20  top_kills_14  top_deaths_14  \
0     1         9494       10467          78993             5              2   
1     2         9281       10389         105757             1              5   
2     1         8181        9073          98643             1              4   
3     2         8970        8389          78895             4              2   
4     1         6815        9977          90605             1              0   

   top_assists_14  jungle_gold_20  jungle_exp_20  jungle_damage_20  ...  \
0               3            8408           9236            153933  ...   
1               2           10521          10651            146905  ...   
2               1            9363          11185            185723  ...   
3               0            7291           7887            117322  ...   
4               0            9227          10891            171585  ...   

   support_gold_diff_14  support_exp_diff_14  support_cs_diff_14  \
