# Player

# Cellule 1 :

In [3]:
# ============================================
# NOTEBOOK 6 : ETL DIMENSIONS + FAITS
# ============================================

import pandas as pd
from sqlalchemy import create_engine, text
from datetime import datetime
import time

# Setup
conn_string = "postgresql://game_user:game_password@postgres:5432/game_dw"
engine = create_engine(conn_string)
print("‚úÖ Connexion OK")


# ============================================
# 1. ALIMENTATION DIMENSION DATE
# ============================================
print("\nüìÖ Alimentation dim_date...")

# R√©cup√©rer toutes les dates uniques des matchs
query_dates = text("""
SELECT DISTINCT 
    game_creation_ts,
    game_duration
FROM match
WHERE game_creation_ts IS NOT NULL
""")

df_dates = pd.read_sql(query_dates, engine)
print(f"  {len(df_dates)} timestamps uniques trouv√©s")

# Convertir timestamps en dates et ins√©rer
dates_data = []
for _, row in df_dates.iterrows():
    ts = row['game_creation_ts']
    duration = row['game_duration'] or 0
    
    # Convertir millisecondes en datetime
    dt = pd.to_datetime(ts, unit='ms')
    
    # D√©terminer p√©riode de la journ√©e
    hour = dt.hour
    if 6 <= hour < 12:
        period = 'morning'
    elif 12 <= hour < 18:
        period = 'afternoon'
    elif 18 <= hour < 22:
        period = 'evening'
    else:
        period = 'night'
    
    dates_data.append({
        'full_date': dt,
        'year': dt.year,
        'quarter': dt.quarter,
        'month': dt.month,
        'month_name': dt.strftime('%B'),
        'week': dt.isocalendar()[1],
        'day': dt.day,
        'day_of_week': dt.weekday(),
        'day_name': dt.strftime('%A'),
        'is_weekend': dt.weekday() >= 5,
        'hour': hour,
        'period_of_day': period
    })

# Ins√©rer par batch
if dates_data:
    df_insert = pd.DataFrame(dates_data)
    with engine.connect() as conn:
        df_insert.to_sql('dim_date', conn, if_exists='append', index=False)
        conn.commit()
    print(f"  ‚úÖ {len(dates_data)} dates ins√©r√©es")


# ============================================
# 2. ALIMENTATION DIMENSION JOUEURS
# ============================================
print("\nüë§ Alimentation dim_player...")

query_players = text("""
INSERT INTO dim_player (player_puuid, first_seen_date, total_games)
SELECT 
    p.player_puuid,
    TO_TIMESTAMP(MIN(m.game_creation_ts) / 1000) as first_seen_date,
    COUNT(*) as total_games
FROM player p
JOIN participation part ON p.player_puuid = part.player_puuid
JOIN match m ON part.match_id = m.match_id
GROUP BY p.player_puuid
ON CONFLICT (player_puuid) DO UPDATE SET
    total_games = EXCLUDED.total_games;
""")

with engine.connect() as conn:
    conn.execute(query_players)
    conn.commit()

# ============================================
# 3. ALIMENTATION DIMENSION CHAMPIONS
# ============================================
print("\n‚öîÔ∏è Alimentation dim_champion...")

# Classification simple des champions (√† enrichir manuellement si besoin)
champion_classes = {
    'Akali': 'Assassin', 'Zed': 'Assassin', 'Fizz': 'Assassin',
    'Alistar': 'Tank', 'Malphite': 'Tank', 'Leona': 'Tank',
    'XinZhao': 'Fighter', 'Darius': 'Fighter', 'Garen': 'Fighter',
    'Jinx': 'Marksman', 'Caitlyn': 'Marksman', 'Ashe': 'Marksman',
    'Lux': 'Mage', 'Ahri': 'Mage', 'Syndra': 'Mage',
    'Soraka': 'Support', 'Janna': 'Support', 'Lulu': 'Support'
}

# R√©cup√©rer champions existants
query_champs = text("SELECT champion_id, champion_name FROM champion")
df_champs = pd.read_sql(query_champs, engine)

champs_data = []
for _, row in df_champs.iterrows():
    name = row['champion_name']
    champs_data.append({
        'champion_id': row['champion_id'],
        'champion_name': name,
        'champion_class': champion_classes.get(name, 'Unknown'),
        'difficulty_level': 'medium',  # Par d√©faut
        'release_year': 2020  # Valeur par d√©faut
    })

if champs_data:
    df_insert = pd.DataFrame(champs_data)
    with engine.connect() as conn:
        df_insert.to_sql('dim_champion', conn, if_exists='append', index=False)
        conn.commit()
    print(f"  ‚úÖ {len(champs_data)} champions ins√©r√©s")


# ============================================
# 4. ALIMENTATION DIMENSION MAPS
# ============================================
print("\nüó∫Ô∏è Alimentation dim_map...")

query_maps = text("""
INSERT INTO dim_map (map_id, map_name, map_type, lane_config)
SELECT DISTINCT
    map_id,
    CASE map_id
        WHEN 11 THEN 'Summoners Rift'
        WHEN 12 THEN 'Howling Abyss'
        WHEN 21 THEN 'Nexus Blitz'
        ELSE 'Unknown Map'
    END as map_name,
    CASE 
        WHEN map_id = 11 THEN 'Classic 5v5'
        WHEN map_id = 12 THEN 'ARAM'
        ELSE 'Special'
    END as map_type,
    CASE 
        WHEN map_id = 11 THEN '5v5'
        WHEN map_id = 12 THEN '5v5'
        ELSE 'Other'
    END as lane_config
FROM match
WHERE map_id IS NOT NULL
ON CONFLICT (map_id) DO NOTHING;
""")

with engine.connect() as conn:
    conn.execute(query_maps)
    conn.commit()

with engine.connect() as conn:
    result = conn.execute(text("SELECT COUNT(*) FROM dim_map"))
    print(f"  ‚úÖ {result.scalar()} maps ins√©r√©es")


# ============================================
# 5. ALIMENTATION TABLE DE FAITS
# ============================================
print("\nüìä Alimentation fact_performance...")

# Requ√™te complexe : jointure de toutes les tables pour r√©cup√©rer les SK
query_facts = text("""
INSERT INTO fact_performance (
    date_id, player_sk, champion_sk, map_sk, match_id,
    kills, deaths, assists, gold_earned, champ_level,
    game_duration, win, kda_ratio, gold_per_minute
)
SELECT 
    d.date_id,
    dp.player_sk,
    dc.champion_sk,
    dm.map_sk,
    part.match_id,
    part.kills,
    part.deaths,
    part.assists,
    part.gold_earned,
    part.champ_level,
    m.game_duration,
    part.win,
    CASE 
        WHEN part.deaths = 0 THEN (part.kills + part.assists)::DECIMAL
        ELSE (part.kills + part.assists)::DECIMAL / part.deaths
    END as kda_ratio,
    CASE 
        WHEN m.game_duration > 0 THEN (part.gold_earned::DECIMAL / (m.game_duration::DECIMAL / 60))
        ELSE 0
    END as gold_per_minute
FROM participation part
JOIN match m ON part.match_id = m.match_id
JOIN dim_date d ON d.full_date = TO_TIMESTAMP(m.game_creation_ts/1000)
JOIN dim_player dp ON dp.player_puuid = part.player_puuid
JOIN dim_champion dc ON dc.champion_id = part.champion_id
JOIN dim_map dm ON dm.map_id = m.map_id
ON CONFLICT DO NOTHING;
""")

start = time.time()
with engine.connect() as conn:
    result = conn.execute(query_facts)
    conn.commit()
    print(f"  ‚úÖ Faits ins√©r√©s en {time.time()-start:.2f}s")


# ============================================
# 6. V√âRIFICATION FINALE
# ============================================
print("\n" + "="*50)
print("üìä R√âSULTAT FINAL DU MOD√àLE DIMENSIONNEL")
print("="*50)

verifications = {
    "dim_date": "SELECT COUNT(*) FROM dim_date",
    "dim_player": "SELECT COUNT(*) FROM dim_player",
    "dim_champion": "SELECT COUNT(*) FROM dim_champion", 
    "dim_map": "SELECT COUNT(*) FROM dim_map",
    "fact_performance": "SELECT COUNT(*) FROM fact_performance"
}

for table, query in verifications.items():
    with engine.connect() as conn:
        result = conn.execute(text(query))
        count = result.scalar()
        print(f"  {table:20s}: {count:>10,} lignes")

# Exemple de requ√™te analytique
print("\nüéØ Exemple d'analyse : Taux de victoire par classe de champion")
query_analyse = text("""
SELECT 
    dc.champion_class,
    COUNT(*) as total_games,
    SUM(CASE WHEN fp.win THEN 1 ELSE 0 END) as wins,
    ROUND(100.0 * SUM(CASE WHEN fp.win THEN 1 ELSE 0 END) / COUNT(*), 2) as winrate_pct
FROM fact_performance fp
JOIN dim_champion dc ON fp.champion_sk = dc.champion_sk
GROUP BY dc.champion_class
ORDER BY winrate_pct DESC;
""")

df_analyse = pd.read_sql(query_analyse, engine)
print(df_analyse)

print("\n‚úÖ ETL DIMENSIONNEL TERMIN√â")

‚úÖ Connexion OK

üìÖ Alimentation dim_date...
  12711 timestamps uniques trouv√©s
  ‚úÖ 12711 dates ins√©r√©es

üë§ Alimentation dim_player...

‚öîÔ∏è Alimentation dim_champion...
  ‚úÖ 178 champions ins√©r√©s

üó∫Ô∏è Alimentation dim_map...
  ‚úÖ 6 maps ins√©r√©es

üìä Alimentation fact_performance...
  ‚úÖ Faits ins√©r√©s en 0.26s

üìä R√âSULTAT FINAL DU MOD√àLE DIMENSIONNEL
  dim_date            :     25,422 lignes
  dim_player          :     91,418 lignes
  dim_champion        :        178 lignes
  dim_map             :          6 lignes
  fact_performance    :        382 lignes

üéØ Exemple d'analyse : Taux de victoire par classe de champion
  champion_class  total_games  wins  winrate_pct
0       Marksman           22    18        81.82
1        Support           10     6        60.00
2        Unknown          310   162        52.26
3        Fighter           12     6        50.00
4       Assassin            4     2        50.00
5           Mage           10     2        2