# 06 - Data Explorer
Explorateur pour matches/teams/heros avec options basiques.


In [1]:
import os
import sys
from pathlib import Path
ROOT = Path(os.getcwd()).resolve()
if (ROOT / 'src').exists():
    sys.path.append(str(ROOT))
elif (ROOT.parent / 'src').exists():
    sys.path.append(str(ROOT.parent))
else:
    sys.path.append(str(ROOT))

raw_candidates = [
    ROOT / 'data/raw/data.json',
    ROOT.parent / 'data/raw/data.json',
]
raw_path = next((p for p in raw_candidates if p.exists()), None)
if raw_path is None:
    raise FileNotFoundError(f'data/raw/data.json introuvable. Cherché: {raw_candidates}')

processed_dir = ROOT / 'data/processed'
processed_dir.mkdir(parents=True, exist_ok=True)

import json
import polars as pl
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from src.dota_data import (
    read_processed_tables,
    write_parquet_tables,
    match_header,
    objectives_timeline,
    teamfights_for_match,
    build_team_dictionary,
    build_player_dictionary,
    build_hero_counts,
    load_hero_dictionary,
)


FileNotFoundError: data/raw/data.json introuvable. Cherché: [PosixPath('/home/ju/Documents/Dev/Dota-Datas/notebooks/data/raw/data.json'), PosixPath('/home/ju/Documents/Dev/Dota-Datas/data/raw/data.json')]

In [2]:
sns.set_theme(style='whitegrid')

if not (processed_dir / 'matches.parquet').exists():
    write_parquet_tables(raw_path, processed_dir)

raw_json = json.loads(raw_path.read_text())
raw_map = {m['json']['match_id']: m['json'] for m in raw_json}
tables = read_processed_tables(processed_dir)
matches = tables['matches']
players = tables['players']
objectives = tables['objectives']
teamfights = tables['teamfights']

teams_dict = build_team_dictionary(matches)
players_dict = build_player_dictionary(players)
hero_counts = build_hero_counts(players)
try:
    heroes_dict = load_hero_dictionary()
except FileNotFoundError:
    heroes_dict = None

matches.shape, players.shape, objectives.shape, teamfights.shape


((50, 51), (500, 149), (1485, 11), (2020, 19))

In [3]:
# Baselines pour paris: winrate, first blood côté radiant, first tower
winrate = matches.select(pl.col('radiant_win').mean().alias('radiant_winrate'))
fb_rate = players.group_by('is_radiant').agg(pl.col('firstblood_claimed').sum().alias('firstblood_claims'))
tower_events = objectives.filter(pl.col('type').str.contains('building_kill', case=False, literal=False))
first_tower = tower_events.sort('time').select(['match_id','time','team']).head(1)
display(winrate)
display(fb_rate)
display(first_tower)


TypeError: ExprStringNameSpace.contains() got an unexpected keyword argument 'case'

In [None]:
# Selection d'un match pour inspection
match_id = int(matches.select('match_id').to_series()[0])  # ajuster ici
match = raw_map[match_id]
print('Match', match_id, 'radiant_win:', match.get('radiant_win'))
print('Duration min:', round(match.get('duration',0)/60,2))
print('First blood time:', match.get('first_blood_time'))


In [None]:
# Timelines objectifs pour le match sélectionné
objs = objectives_timeline(objectives, match_id).to_pandas()
towers = objs[objs['type'].str.contains('building', case=False, na=False)]
rosh = objs[objs['type'].str.contains('roshan', case=False, na=False)]
plt.figure(figsize=(10,3))
sns.scatterplot(data=towers, x='time', y='type', hue='team', s=60)
plt.title('Towers/buildings events')
plt.show()
plt.figure(figsize=(10,3))
sns.scatterplot(data=rosh, x='time', y='type', hue='team', s=100)
plt.title('Roshan events')
plt.show()


In [None]:
# Team dictionary aperçu
teams_dict.head(10)


In [None]:
# Player dictionary aperçu
players_dict.head(10)


In [None]:
# Heroes counts (sans noms si dictionnaire absent)
hero_counts.head(10)
