# Chargement des Données - World Cup ETL

**Auteur** : Short Kings Team  
**Date** : 17/12/2025

## Objectif
Injecter les données dans la base PostgreSQL.

In [1]:
import psycopg2
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

# Charger les variables d'environnement depuis .env
load_dotenv()

# Récupération des variables d'environnement
DB_USER = os.getenv('DB_USER')
DB_PASSWORD = os.getenv('DB_PASSWORD')
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT', '5432')
DB_NAME = os.getenv('DB_NAME')

DATABASE_URL = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
engine = create_engine(DATABASE_URL)

# Vérification
print(f"Host: {DB_HOST}")
print(f"Database: {DB_NAME}")
print(f"User: {DB_USER}")
print(f"Password: {'chargé' if DB_PASSWORD else 'manquant'}")

Host: dpg-d50mcnnfte5s73cqqbag-a.frankfurt-postgres.render.com
Database: worldcup_db_bpk8
User: worldcup_db_bpk8_user
Password: chargé


In [2]:
from sqlalchemy import text

with engine.connect() as conn:
    conn.execute(text("""
        ALTER TABLE matches 
        DROP CONSTRAINT IF EXISTS matches_stadium_id_fkey
    """))
    
    conn.execute(text("""
        ALTER TABLE matches 
        ALTER COLUMN stadium_id TYPE VARCHAR(100)
    """))
    
    conn.commit()

print("Colonne stadium_id changée en VARCHAR(100) ✓")

Colonne stadium_id changée en VARCHAR(100) ✓


In [3]:
query = """
SELECT 
    'teams' as table_name, COUNT(*) as nb_rows FROM teams
UNION ALL
SELECT 
    'stadiums', COUNT(*) FROM stadiums
UNION ALL
SELECT 
    'matches', COUNT(*) FROM matches;
"""
pd.read_sql(query, engine)

Unnamed: 0,table_name,nb_rows
0,teams,0
1,stadiums,0
2,matches,0


In [4]:
from sqlalchemy import text

with engine.connect() as conn:
    conn.execute(text("DELETE FROM matches")) 
    conn.execute(text("DELETE FROM teams"))    
    conn.commit()
    
print("Tables vidées ✓")

Tables vidées ✓


In [5]:
df_teams = pd.read_csv("../data/processed/teams_traitees.csv")

print(f"Teams à insérer: {len(df_teams)}")
df_teams.head()

Teams à insérer: 226


Unnamed: 0,id_team,nom_standard,confederation,aliases
0,1,Afghanistan,AFC,"[""Afghanistan (افغانستان)""]"
1,2,Albania,UEFA,"[""Albania (Shqipëri)""]"
2,3,Algeria,CAF,"[""Algeria (الجزائر)""]"
3,5,American Samoa,OFC,[]
4,6,Andorra,UEFA,[]


In [6]:
df_teams.to_sql('teams', engine, if_exists='append', index=False)
print(f"✓ {len(df_teams)} teams insérées")

✓ 226 teams insérées


In [7]:
df_matches = pd.read_csv("../data/processed/matches.csv", index_col=0)

df_matches = df_matches.rename(columns={"id_stadium": "stadium_id"})

df_matches['date'] = pd.to_datetime(df_matches['date'], errors='coerce')

df_matches['stadium_id'] = df_matches['stadium_id'].replace('', None)

print(f"Matches à insérer: {len(df_matches)}")
df_matches.head()

Matches à insérer: 7427


Unnamed: 0_level_0,home_team_id,away_team_id,home_result,away_result,result,extra_time,penalties,replay,date,round,city,stadium_id,edition
id_match,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,84,148,4,1,home_team,False,False,False,NaT,Group Stage,Montevideo,,1930
2,246,22,3,0,home_team,False,False,False,NaT,Group Stage,Montevideo,,1930
3,261,31,2,1,home_team,False,False,False,NaT,Group Stage,Montevideo,,1930
4,188,178,3,1,home_team,False,False,False,NaT,Group Stage,Montevideo,,1930
5,11,84,1,0,home_team,False,False,False,NaT,Group Stage,Montevideo,,1930


In [8]:
df_matches.to_sql(
    'matches', 
    engine, 
    if_exists='append',
    index=False,
    method='multi',
    chunksize=500
)
print(f"✓ {len(df_matches)} matches insérés")

✓ 7427 matches insérés


In [9]:
query = """
SELECT 'teams' as table_name, COUNT(*) as nb_rows FROM teams
UNION ALL
SELECT 'matches', COUNT(*) FROM matches;
"""
pd.read_sql(query, engine)

Unnamed: 0,table_name,nb_rows
0,teams,226
1,matches,7427
