In [4]:
# Chargement JSON avec gestion Decimal
import pandas as pd
import json
import os
import ijson
from decimal import Decimal
from sqlalchemy import create_engine, text

# Encoder personnalisé pour Decimal
class DecimalEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, Decimal):
            return float(obj)
        return super(DecimalEncoder, self).default(obj)

# Setup
conn_string = "postgresql://game_user:game_password@postgres:5432/game_dw"
engine = create_engine(conn_string)
json_path = "/home/jovyan/work/data/raw/match_v5.json"

print("Setup OK")

# Créer table
with engine.connect() as conn:
    conn.execute(text("""
        DROP TABLE IF EXISTS raw_matches CASCADE;
        CREATE TABLE raw_matches (
            id SERIAL PRIMARY KEY,
            data JSONB NOT NULL,
            loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        );
        CREATE INDEX idx_raw_matches_data ON raw_matches USING GIN (data);
    """))
    conn.commit()
    print("Table créée")

# Chargement
print("Chargement...")
batch = []
batch_size = 500
total = 0

with open(json_path, 'r', encoding='utf-8') as f:
    for match in ijson.items(f, 'item'):
        # Utiliser DecimalEncoder pour sérialiser
        batch.append({'data': json.dumps(match, cls=DecimalEncoder)})
        
        if len(batch) >= batch_size:
            df = pd.DataFrame(batch)
            with engine.connect() as conn:
                df.to_sql('raw_matches', conn, if_exists='append', index=False, method='multi')
            total += len(batch)
            if total % 5000 == 0:
                print(f"  → {total}")
            batch = []
    
    if batch:
        df = pd.DataFrame(batch)
        with engine.connect() as conn:
            df.to_sql('raw_matches', conn, if_exists='append', index=False, method='multi')
        total += len(batch)

print(f"Terminé : {total} matchs")

# Vérification
with engine.connect() as conn:
    result = conn.execute(text("SELECT COUNT(*) FROM raw_matches"))
    print(f"En base : {result.scalar()} matchs")

Setup OK
Table créée
Chargement...
  → 5000
  → 10000
Terminé : 12719 matchs
En base : 12719 matchs
