# Chargement des Données - World Cup ETL

**Auteur** : Short Kings Team  
**Date** : 17/12/2025

## Objectif
Injecter les données dans la base PostgreSQL.

In [3]:
import psycopg2
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

# Charger les variables d'environnement depuis .env
load_dotenv()

# Récupération des variables d'environnement
DB_USER = os.getenv('DB_USER')
DB_PASSWORD = os.getenv('DB_PASSWORD')
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT', '5432')
DB_NAME = os.getenv('DB_NAME')

DATABASE_URL = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
engine = create_engine(DATABASE_URL)

# Vérification
print(f"Host: {DB_HOST}")
print(f"Database: {DB_NAME}")
print(f"User: {DB_USER}")
print(f"Password: {'chargé' if DB_PASSWORD else 'manquant'}")

Host: dpg-d50mcnnfte5s73cqqbag-a.frankfurt-postgres.render.com
Database: worldcup_db_bpk8
User: worldcup_db_bpk8_user
Password: chargé


In [None]:
from sqlalchemy import text

with engine.connect() as conn:
    conn.execute(text("""
        ALTER TABLE matches 
        DROP CONSTRAINT IF EXISTS matches_stadium_id_fkey
    """))
    
    conn.execute(text("""
        ALTER TABLE matches 
        ALTER COLUMN stadium_id TYPE VARCHAR(100)
    """))
    
    conn.commit()

print("Colonne stadium_id changée en VARCHAR(100) ✓")

In [None]:
query = """
SELECT 
    'teams' as table_name, COUNT(*) as nb_rows FROM teams
UNION ALL
SELECT 
    'stadiums', COUNT(*) FROM stadiums
UNION ALL
SELECT 
    'matches', COUNT(*) FROM matches;
"""
pd.read_sql(query, engine)

In [None]:
from sqlalchemy import text

with engine.connect() as conn:
    conn.execute(text("DELETE FROM matches")) 
    conn.execute(text("DELETE FROM teams"))    
    conn.commit()
    
print("Tables vidées ✓")

In [None]:
df_teams = pd.read_csv("teams_traitees (2).csv")

print(f"Teams à insérer: {len(df_teams)}")
df_teams.head()

In [None]:
df_teams.to_sql('teams', engine, if_exists='append', index=False)
print(f"✓ {len(df_teams)} teams insérées")

In [None]:
df_matches = pd.read_csv("ONAFINILESAMIS.csv", index_col=0)

df_matches = df_matches.rename(columns={"id_stadium": "stadium_id"})

df_matches['date'] = pd.to_datetime(df_matches['date'], errors='coerce')

df_matches['stadium_id'] = df_matches['stadium_id'].replace('', None)

print(f"Matches à insérer: {len(df_matches)}")
df_matches.head()

In [None]:
df_matches.to_sql(
    'matches', 
    engine, 
    if_exists='append',
    index=False,
    method='multi',
    chunksize=500
)
print(f"✓ {len(df_matches)} matches insérés")

In [None]:
query = """
SELECT 'teams' as table_name, COUNT(*) as nb_rows FROM teams
UNION ALL
SELECT 'matches', COUNT(*) FROM matches;
"""
pd.read_sql(query, engine)