# 02 - Filtrer >= 2025 et inspecter une game détaillée
Charge les matches bruts, filtre après le 1er janvier 2025, puis récupère le détail d'une game (objectives/series) pour debugger la représentation des séries/maps.

In [1]:
from pathlib import Path
import sys

def _find_root():
    candidates = [Path.cwd()] + list(Path.cwd().parents[:4])
    for cand in candidates:
        if (cand / "src").exists():
            return cand
    return Path.cwd()

ROOT = _find_root()
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))
print(f"Using project root: {ROOT}")


Using project root: /home/ju/Documents/Dev/Dota-Datas


In [2]:
from datetime import datetime, timezone
from pathlib import Path
import json
import pandas as pd
from dotenv import load_dotenv

from src.dota_data.api import (
    load_api_key,
    build_session,
    load_team_list,
    fetch_team_matches,
    filter_matches_since,
    unique_match_ids,
    fetch_match_detail,
    annotate_matches_with_team,
    write_json,
)

load_dotenv()
api_key = load_api_key(load_env_file=False)
session = build_session(api_key)
teams = load_team_list(ROOT / "data/teams_to_look.csv")

raw_path = Path("data/interim/team_matches_raw.json")
if raw_path.exists():
    team_matches = json.loads(raw_path.read_text())
else:
    team_matches = []
    for team in teams:
        team_matches.append({"team": team, "matches": fetch_team_matches(team["TeamID"], session=session)})
        raw_path.parent.mkdir(parents=True, exist_ok=True)
        write_json(team_matches, raw_path)
print(f"Matches chargés depuis {raw_path} : {len(team_matches)} entrées")


Matches chargés depuis data/interim/team_matches_raw.json : 173 entrées


In [3]:
flat_rows = []
for entry in team_matches:
    flat_rows.extend(annotate_matches_with_team(entry["matches"], entry["team"]["TeamID"], entry["team"]["TeamName"]))
matches_df = pd.DataFrame(flat_rows)
if not matches_df.empty:
    matches_df["start_dt"] = pd.to_datetime(matches_df["start_time"], unit="s")
matches_df.head()


Unnamed: 0,match_id,radiant_win,radiant_score,dire_score,radiant,duration,start_time,leagueid,league_name,cluster,opposing_team_id,opposing_team_name,opposing_team_logo,_source_team_id,_source_team_name,start_dt
0,8572758153,True,42,22,False,2328,1763897576,18920,PGL Wallachia 2025 Season 6,272,9338413,MOUZ,https://cdn.steamusercontent.com/ugc/149367842...,2163,Team Liquid,2025-11-23 11:32:56
1,8572646777,False,23,29,False,2909,1763892992,18920,PGL Wallachia 2025 Season 6,271,9338413,MOUZ,https://cdn.steamusercontent.com/ugc/149367842...,2163,Team Liquid,2025-11-23 10:16:32
2,8572546517,False,17,44,True,2326,1763888685,18920,PGL Wallachia 2025 Season 6,272,9338413,MOUZ,https://cdn.steamusercontent.com/ugc/149367842...,2163,Team Liquid,2025-11-23 09:04:45
3,8571364449,False,21,28,True,2488,1763826709,18920,PGL Wallachia 2025 Season 6,271,7119388,Team Spirit,https://cdn.steamusercontent.com/ugc/183917912...,2163,Team Liquid,2025-11-22 15:51:49
4,8571247569,False,11,19,True,1753,1763823160,18920,PGL Wallachia 2025 Season 6,274,7119388,Team Spirit,https://cdn.steamusercontent.com/ugc/183917912...,2163,Team Liquid,2025-11-22 14:52:40


In [4]:
cutoff_ts = int(datetime(2025, 1, 1, tzinfo=timezone.utc).timestamp())
recent_rows = filter_matches_since(flat_rows, cutoff_ts)
recent_ids = unique_match_ids(recent_rows)
print(f"Matches >= 2025-01-01: {len(recent_rows)} (ids uniques: {len(recent_ids)})")
pd.DataFrame(recent_rows)[["match_id", "start_time", "_source_team_name"]].head()


Matches >= 2025-01-01: 14416 (ids uniques: 8052)


Unnamed: 0,match_id,start_time,_source_team_name
0,8572758153,1763897576,Team Liquid
1,8572646777,1763892992,Team Liquid
2,8572546517,1763888685,Team Liquid
3,8571364449,1763826709,Team Liquid
4,8571247569,1763823160,Team Liquid


In [5]:
# Duplicats de match_id (même game renvoyée par plusieurs équipes)

dup_df = matches_df[matches_df.duplicated('match_id', keep=False)] if not matches_df.empty else pd.DataFrame()
dup_counts = dup_df['match_id'].value_counts() if not dup_df.empty else pd.Series(dtype=int)
sample_dup_ids = dup_counts.head(3).index.tolist()
print(f"match_id dupliqués: {len(dup_counts)} (affiche les 3 premiers)")
if sample_dup_ids:
    display(dup_df[dup_df['match_id'].isin(sample_dup_ids)].sort_values(['match_id', '_source_team_name', 'start_time']))
else:
    print('Pas de duplicats trouvés')


match_id dupliqués: 12500 (affiche les 3 premiers)


Unnamed: 0,match_id,radiant_win,radiant_score,dire_score,radiant,duration,start_time,leagueid,league_name,cluster,opposing_team_id,opposing_team_name,opposing_team_logo,_source_team_id,_source_team_name,start_dt
9576,8572546517,False,17,44,False,2326,1763888685,18920,PGL Wallachia 2025 Season 6,272,2163,Team Liquid,https://steamcdn-a.akamaihd.net/apps/dota2/ima...,9338413,MOUZ,2025-11-23 09:04:45
2,8572546517,False,17,44,True,2326,1763888685,18920,PGL Wallachia 2025 Season 6,272,9338413,MOUZ,https://cdn.steamusercontent.com/ugc/149367842...,2163,Team Liquid,2025-11-23 09:04:45
9575,8572646777,False,23,29,True,2909,1763892992,18920,PGL Wallachia 2025 Season 6,271,2163,Team Liquid,https://steamcdn-a.akamaihd.net/apps/dota2/ima...,9338413,MOUZ,2025-11-23 10:16:32
1,8572646777,False,23,29,False,2909,1763892992,18920,PGL Wallachia 2025 Season 6,271,9338413,MOUZ,https://cdn.steamusercontent.com/ugc/149367842...,2163,Team Liquid,2025-11-23 10:16:32
9574,8572758153,True,42,22,True,2328,1763897576,18920,PGL Wallachia 2025 Season 6,272,2163,Team Liquid,https://steamcdn-a.akamaihd.net/apps/dota2/ima...,9338413,MOUZ,2025-11-23 11:32:56
0,8572758153,True,42,22,False,2328,1763897576,18920,PGL Wallachia 2025 Season 6,272,9338413,MOUZ,https://cdn.steamusercontent.com/ugc/149367842...,2163,Team Liquid,2025-11-23 11:32:56


In [6]:
sample_id = recent_ids[0] if recent_ids else None
print(f"Sample match id: {sample_id}")
sample_match = fetch_match_detail(sample_id, session=session) if sample_id else {}
if sample_match:
    write_json(sample_match, Path("data/interim/sample_match_detail.json"))
    print("Sample match sauvegardée dans data/interim/sample_match_detail.json")
sample_match


Sample match id: 8572758153
Sample match sauvegardée dans data/interim/sample_match_detail.json


{'version': 22,
 'match_id': 8572758153,
 'draft_timings': [{'order': 1,
   'pick': False,
   'active_team': 3,
   'hero_id': 89,
   'player_slot': None,
   'extra_time': 130,
   'total_time_taken': 1},
  {'order': 2,
   'pick': False,
   'active_team': 3,
   'hero_id': 59,
   'player_slot': None,
   'extra_time': 130,
   'total_time_taken': 12},
  {'order': 3,
   'pick': False,
   'active_team': 3,
   'hero_id': 136,
   'player_slot': None,
   'extra_time': 130,
   'total_time_taken': 14},
  {'order': 4,
   'pick': False,
   'active_team': 2,
   'hero_id': 77,
   'player_slot': None,
   'extra_time': 130,
   'total_time_taken': 1},
  {'order': 5,
   'pick': False,
   'active_team': 3,
   'hero_id': 96,
   'player_slot': None,
   'extra_time': 130,
   'total_time_taken': 19},
  {'order': 6,
   'pick': False,
   'active_team': 3,
   'hero_id': 49,
   'player_slot': None,
   'extra_time': 130,
   'total_time_taken': 20},
  {'order': 7,
   'pick': False,
   'active_team': 2,
   'hero_id':

In [7]:
# Focus sur les champs de série / map et objectifs Roshan/Aegis
if sample_match:
    series_keys = ["match_id", "series_id", "series_type", "match_seq_num", "start_time", "leagueid", "radiant_team_id", "dire_team_id", "radiant_name", "dire_name"]
    print({k: sample_match.get(k) for k in series_keys})
    obj_df = pd.DataFrame(sample_match.get("objectives") or [])
    if not obj_df.empty:
        rosh_types = ["CHAT_MESSAGE_ROSHAN_KILL", "CHAT_MESSAGE_AEGIS", "building_kill"]
        if "type" in obj_df.columns:
            display(obj_df[obj_df["type"].isin(rosh_types)][[c for c in ["time", "type", "team", "key", "slot", "player_slot"] if c in obj_df.columns]].head(20))
        display(obj_df)
    else:
        print("Pas d'objectives dans le sample")


{'match_id': 8572758153, 'series_id': 1039062, 'series_type': 1, 'match_seq_num': 7199921962, 'start_time': 1763897576, 'leagueid': 18920, 'radiant_team_id': 9338413, 'dire_team_id': 2163, 'radiant_name': 'MOUZ', 'dire_name': 'Team Liquid'}


Unnamed: 0,time,type,team,key,slot,player_slot
3,602,building_kill,,npc_dota_badguys_tower1_mid,,
4,667,building_kill,,npc_dota_badguys_tower1_bot,2.0,2.0
5,877,building_kill,,npc_dota_badguys_tower1_top,4.0,4.0
8,1301,building_kill,,npc_dota_badguys_tower2_top,,
9,1321,building_kill,,npc_dota_goodguys_tower1_bot,,
10,1441,CHAT_MESSAGE_ROSHAN_KILL,2.0,,,
11,1441,CHAT_MESSAGE_AEGIS,,,0.0,0.0
12,1511,building_kill,,npc_dota_badguys_tower2_bot,2.0,2.0
13,1570,building_kill,,npc_dota_badguys_tower2_mid,2.0,2.0
14,1724,building_kill,,npc_dota_badguys_tower3_bot,0.0,0.0


Unnamed: 0,time,type,value,killer,team,slot,key,player_slot,unit
0,67,CHAT_MESSAGE_COURIER_LOST,25.0,131.0,2.0,,,,
1,121,CHAT_MESSAGE_FIRSTBLOOD,,,,0.0,7,0.0,
2,230,CHAT_MESSAGE_COURIER_LOST,35.0,-1.0,3.0,,,,
3,602,building_kill,,,,,npc_dota_badguys_tower1_mid,,npc_dota_creep_goodguys_melee
4,667,building_kill,,,,2.0,npc_dota_badguys_tower1_bot,2.0,npc_dota_hero_abaddon
5,877,building_kill,,,,4.0,npc_dota_badguys_tower1_top,4.0,npc_dota_hero_jakiro
6,1221,CHAT_MESSAGE_MINIBOSS_KILL,,,3.0,5.0,,128.0,
7,1249,CHAT_MESSAGE_COURIER_LOST,70.0,130.0,2.0,,,,
8,1301,building_kill,,,,,npc_dota_badguys_tower2_top,,npc_dota_goodguys_siege
9,1321,building_kill,,,,,npc_dota_goodguys_tower1_bot,,npc_dota_badguys_siege


In [8]:
# Visualiser les différentes games d'une même série (autour du sample)

series_matches = []
if sample_match:
    target_series = sample_match.get('series_id')
    target_league = sample_match.get('leagueid')
    team_set = {sample_match.get('radiant_team_id'), sample_match.get('dire_team_id')}
    candidates = matches_df.copy() if not matches_df.empty else pd.DataFrame()
    if 'opposing_team_id' in candidates.columns:
        candidates = candidates[(candidates['_source_team_id'].isin(team_set)) & (candidates['opposing_team_id'].isin(team_set))]
    if 'leagueid' in candidates.columns and target_league is not None:
        candidates = candidates[candidates['leagueid'] == target_league]
    candidate_ids = list(dict.fromkeys(candidates['match_id'].tolist())) if not candidates.empty else []
    print(f"Candidats même affiche/league: {len(candidate_ids)} (limite 10 pour fetch)")
    for mid in candidate_ids[:10]:
        try:
            m = fetch_match_detail(mid, session=session)
        except Exception as exc:  # noqa: BLE001
            print(f"Fetch KO pour {mid}: {exc}")
            continue
        if target_series is None or m.get('series_id') == target_series:
            series_matches.append({
                'match_id': mid,
                'series_id': m.get('series_id'),
                'series_type': m.get('series_type'),
                'start_time': m.get('start_time'),
                'radiant': m.get('radiant_name'),
                'dire': m.get('dire_name'),
                'radiant_win': m.get('radiant_win'),
                'leagueid': m.get('leagueid'),
                'match_seq_num': m.get('match_seq_num'),
            })
    if series_matches:
        df_series = pd.DataFrame(series_matches)
        df_series['start_dt'] = pd.to_datetime(df_series['start_time'], unit='s')
        df_series = df_series.sort_values(['start_time', 'match_seq_num'])
        df_series['map_num'] = df_series.groupby('series_id').cumcount() + 1
        df_series['radiant_score_series'] = df_series.groupby('series_id')['radiant_win'].cumsum()
        df_series['dire_score_series'] = df_series.groupby('series_id')['radiant_win'].transform(lambda s: s.expanding().count()) - df_series['radiant_score_series']
        display(df_series)
    else:
        print('Aucune autre game trouvée pour cette série (ou pas de series_id sur le sample).')
else:
    print('Pas de sample_match chargé.')


Candidats même affiche/league: 3 (limite 10 pour fetch)


Unnamed: 0,match_id,series_id,series_type,start_time,radiant,dire,radiant_win,leagueid,match_seq_num,start_dt,map_num,radiant_score_series,dire_score_series
2,8572546517,1039062,1,1763888685,Team Liquid,MOUZ,False,18920,7199752491,2025-11-23 09:04:45,1,0,1.0
1,8572646777,1039062,1,1763892992,MOUZ,Team Liquid,False,18920,7199837055,2025-11-23 10:16:32,2,0,2.0
0,8572758153,1039062,1,1763897576,MOUZ,Team Liquid,True,18920,7199921962,2025-11-23 11:32:56,3,1,2.0
