# TLoL Agent - Analyse Metadata

## Load Data

In [1]:
import pandas as pd
df = pd.read_csv("metadata_per_game.csv")

In [3]:
df.head()

Unnamed: 0,file,game_idx,patch,size_mb,duration_sec,duration_min,deaths,spells,attacks,damage,items,n_champs,champs,hero_net_ids,n_heroes
0,batch_009.jsonl.gz,0,12_22,73.01,1020.3,17.0,0,2621,2639,24973,177,10,amumu|ezreal|fizz|hecarim|jhin|qiyana|riven|se...,1073741854:sett|1073741855:hecarim|1073741856:...,10
1,batch_009.jsonl.gz,1,12_22,127.45,1680.5,28.01,0,5606,3168,38278,254,10,ahri|blitzcrank|fiora|kaisa|rumble|seraphine|t...,1073741854:fiora|1073741855:rumble|1073741856:...,10
2,batch_009.jsonl.gz,2,12_22,97.31,1350.4,22.51,0,3515,3277,29873,210,10,aatrox|brand|ekko|gnar|khazix|ksante|nilah|qiy...,1073741854:gnar|1073741855:khazix|1073741856:q...,10
3,batch_009.jsonl.gz,3,12_22,64.67,840.2,14.0,0,3093,1980,20517,161,10,cassiopeia|heimerdinger|ksante|lissandra|missf...,1073741854:riven|1073741855:nidalee|1073741856...,10
4,batch_009.jsonl.gz,4,12_22,89.25,1200.3,20.01,0,3078,3239,26866,179,10,caitlyn|diana|drmundo|ekko|ezreal|janna|jax|ks...,1073741854:drmundo|1073741855:jax|1073741856:d...,10


In [4]:
# Option 1: Explode to get one row per champion per game
champs_exploded = df.assign(champ=df['champs'].str.split('|')).explode('champ')

# Now you can analyze:
# Champion frequency
champ_counts = champs_exploded['champ'].value_counts()

# Champion stats (avg game duration, damage, etc.)
champ_stats = champs_exploded.groupby('champ').agg({
    'duration_min': 'mean',
    'damage': 'mean',
    'deaths': 'mean',
    'size_mb': 'count'  # games played
}).rename(columns={'size_mb': 'games_played'})

# Option 2: One-hot encode for correlation analysis
champs_onehot = df['champs'].str.get_dummies(sep='|')
df_encoded = pd.concat([df, champs_onehot], axis=1)

# Option 3: Get unique champions list
all_champs = set('|'.join(df['champs']).split('|'))
print(f"Unique champions: {len(all_champs)}")

Unique champions: 162


In [6]:
champ_counts

champ
kaisa      6065
caitlyn    6058
ezreal     5837
sylas      5148
jhin       4287
           ... 
kennen      444
quinn       431
ivern       421
corki       381
skarner     280
Name: count, Length: 162, dtype: int64

## Champ Pick Rate

In [10]:
champs_exploded = df.assign(champ=df['champs'].str.split('|')).explode('champ')

total_games = df.shape[0]
print(f"Total games: {total_games}\n")

champ_stats = pd.DataFrame({
    'games': champs_exploded['champ'].value_counts(),
    'pick_rate_pct': (champs_exploded['champ'].value_counts() / total_games * 100).round(2)
})

print(champ_stats.head(20))

Total games: 27913

             games  pick_rate_pct
champ                            
kaisa         6065          21.73
caitlyn       6058          21.70
ezreal        5837          20.91
sylas         5148          18.44
jhin          4287          15.36
zed           3980          14.26
nami          3836          13.74
lux           3543          12.69
mordekaiser   3532          12.65
twitch        3505          12.56
ashe          3473          12.44
aatrox        3371          12.08
graves        3324          11.91
varus         3289          11.78
ekko          3286          11.77
lucian        3178          11.39
yone          3122          11.18
fiora         3068          10.99
yasuo         2949          10.56
akali         2831          10.14
