# Load Data to Database

This notebook loads football match data and team metadata from the extracted JSON files into a PostgreSQL database.

The data is organized by leagues:
- **Data_BundesLiga**: German Bundesliga
- **Data_La_Liga**: Spanish La Liga  
- **Data_Ligue1**: French Ligue 1
- **Data_SerieA**: Italian Serie A

## Database Structure

- **team_meta**: Stores unique team information (team_id, team_name)
- **matches_registered**: Stores match details with all game information
- **team_match_stats**: Stores detailed team statistics for each match

In [1]:
# Import required libraries
import json
import os
import psycopg2
from psycopg2.extras import execute_values
import glob
from datetime import datetime
import pandas as pd
import numpy as np


import warnings
warnings.filterwarnings("ignore")

from tqdm import tqdm


In [14]:
# Mapeo de nombres de equipos para normalizaci√≥n
# Esto asegura que equipos con nombres similares se traten como el mismo
TEAM_NAME_MAPPING = {
    "Nott'ham Forest": "Nottingham Forest",
    "Nottingham Forest": "Nottingham Forest",
    "Nottham Forest": "Nottingham Forest",
    "Newcastle Utd": "Newcastle United",
    "Newcastle United": "Newcastle United",
    "Manchester Utd": "Manchester United",
    "Manchester United": "Manchester United",
    "Manchester City": "Manchester City",
    "West Ham": "West Ham United",
    "West Ham United": "West Ham United",
    "Wolves": "Wolverhampton",
    "Wolverhampton": "Wolverhampton",
    "Brighton": "Brighton & Hove Albion",
    "Brighton & Hove Albion": "Brighton & Hove Albion",
    "Leeds United": "Leeds United",
    "Leicester City": "Leicester City",
    "West Brom": "West Bromwich Albion",
    "West Bromwich Albion": "West Bromwich Albion",
}

def normalize_team_name(team_name):
    """Normalizar nombre de equipo usando el mapeo"""
    if team_name is None:
        return None
    return TEAM_NAME_MAPPING.get(team_name, team_name)


In [52]:
# Database Configuration
class DatabaseConfig:
    HOST = "localhost"
    PORT = 5432
    DATABASE = "2. PremierLeague"  # Add your database name here
    USER = "admin"
    PASSWORD = "GadumUNITEC123"

# SQL Queries
INSERT_TEAM_QUERY = """
INSERT INTO team_meta (team_id, team_name) VALUES (%s, %s) 
ON CONFLICT (team_id) DO UPDATE SET
    team_name = EXCLUDED.team_name;
"""

INSERT_MATCH_QUERY = """
INSERT INTO matches_registered
(season_id, match_id, matchday, home_team, home_team_id, home_team_score, away_team, away_team_id, away_team_score, date_game)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) 
ON CONFLICT (match_id) DO UPDATE SET
    season_id = EXCLUDED.season_id,
    matchday = EXCLUDED.matchday,
    home_team = EXCLUDED.home_team,
    home_team_id = EXCLUDED.home_team_id,
    home_team_score = EXCLUDED.home_team_score,
    away_team = EXCLUDED.away_team,
    away_team_id = EXCLUDED.away_team_id,
    away_team_score = EXCLUDED.away_team_score,
    date_game = EXCLUDED.date_game;
"""

# SQL Query for team match statistics - CORREGIDO SIN team_id
INSERT_TEAM_MATCH_STATS_QUERY = """
INSERT INTO team_match_stats (
    match_id, season_id, team_name, home_away,
    ttl_gls, ttl_ast, ttl_xg, ttl_xag, ttl_pk_made, ttl_pk_att, 
    ttl_yellow_cards, ttl_red_cards, ttl_gls_ag, ttl_sot_ag, ttl_saves, 
    clean_sheets, ttl_pk_att_ag, ttl_pk_saved, ttl_sh, ttl_sot, pct_sot, 
    ttl_avg_sh, ttl_gls_per_sot, ttl_gls_xg_diff, ttl_pass_cmp, ttl_pass_att, 
    pct_pass_cmp, ttl_pass_prog, ttl_key_passes, ttl_pass_opp_box, 
    ttl_cross_opp_box, ttl_pass_live, ttl_pass_dead, ttl_pass_fk, 
    ttl_through_balls, ttl_switches, ttl_crosses, ttl_pass_offside, 
    ttl_pass_blocked, ttl_throw_ins, ttl_ck, ttl_tkl, ttl_tkl_won, 
    ttl_tkl_drb, ttl_tkl_drb_att, pct_tkl_drb_suc, ttl_blocks, 
    ttl_sh_blocked, ttl_int, ttl_clearances, ttl_def_error, avg_poss, 
    ttl_touches, ttl_take_on_att, ttl_take_on_suc, ttl_carries, 
    ttl_carries_miscontrolled, ttl_carries_dispossessed, ttl_pass_rcvd, 
    ttl_pass_prog_rcvd, ttl_fls_ag, ttl_fls_for, ttl_offside, ttl_og, 
    ttl_ball_recov, ttl_air_dual_won, ttl_air_dual_lost, players_count
    ) VALUES (
    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
    %s, %s, %s, %s, %s, %s, %s, %s, %s
    ) ON CONFLICT (match_id, team_name) DO UPDATE SET
    home_away = EXCLUDED.home_away,
    ttl_gls = EXCLUDED.ttl_gls,
    ttl_ast = EXCLUDED.ttl_ast,
    ttl_xg = EXCLUDED.ttl_xg,
    ttl_xag = EXCLUDED.ttl_xag,
    ttl_pk_made = EXCLUDED.ttl_pk_made,
    ttl_pk_att = EXCLUDED.ttl_pk_att,
    ttl_yellow_cards = EXCLUDED.ttl_yellow_cards,
    ttl_red_cards = EXCLUDED.ttl_red_cards,
    ttl_gls_ag = EXCLUDED.ttl_gls_ag,
    ttl_sot_ag = EXCLUDED.ttl_sot_ag,
    ttl_saves = EXCLUDED.ttl_saves,
    clean_sheets = EXCLUDED.clean_sheets,
    ttl_pk_att_ag = EXCLUDED.ttl_pk_att_ag,
    ttl_pk_saved = EXCLUDED.ttl_pk_saved,
    ttl_sh = EXCLUDED.ttl_sh,
    ttl_sot = EXCLUDED.ttl_sot,
    pct_sot = EXCLUDED.pct_sot,
    ttl_avg_sh = EXCLUDED.ttl_avg_sh,
    ttl_gls_per_sot = EXCLUDED.ttl_gls_per_sot,
    ttl_gls_xg_diff = EXCLUDED.ttl_gls_xg_diff,
    ttl_pass_cmp = EXCLUDED.ttl_pass_cmp,
    ttl_pass_att = EXCLUDED.ttl_pass_att,
    pct_pass_cmp = EXCLUDED.pct_pass_cmp,
    ttl_pass_prog = EXCLUDED.ttl_pass_prog,
    ttl_key_passes = EXCLUDED.ttl_key_passes,
    ttl_pass_opp_box = EXCLUDED.ttl_pass_opp_box,
    ttl_cross_opp_box = EXCLUDED.ttl_cross_opp_box,
    ttl_pass_live = EXCLUDED.ttl_pass_live,
    ttl_pass_dead = EXCLUDED.ttl_pass_dead,
    ttl_pass_fk = EXCLUDED.ttl_pass_fk,
    ttl_through_balls = EXCLUDED.ttl_through_balls,
    ttl_switches = EXCLUDED.ttl_switches,
    ttl_crosses = EXCLUDED.ttl_crosses,
    ttl_pass_offside = EXCLUDED.ttl_pass_offside,
    ttl_pass_blocked = EXCLUDED.ttl_pass_blocked,
    ttl_throw_ins = EXCLUDED.ttl_throw_ins,
    ttl_ck = EXCLUDED.ttl_ck,
    ttl_tkl = EXCLUDED.ttl_tkl,
    ttl_tkl_won = EXCLUDED.ttl_tkl_won,
    ttl_tkl_drb = EXCLUDED.ttl_tkl_drb,
    ttl_tkl_drb_att = EXCLUDED.ttl_tkl_drb_att,
    pct_tkl_drb_suc = EXCLUDED.pct_tkl_drb_suc,
    ttl_blocks = EXCLUDED.ttl_blocks,
    ttl_sh_blocked = EXCLUDED.ttl_sh_blocked,
    ttl_int = EXCLUDED.ttl_int,
    ttl_clearances = EXCLUDED.ttl_clearances,
    ttl_def_error = EXCLUDED.ttl_def_error,
    avg_poss = EXCLUDED.avg_poss,
    ttl_touches = EXCLUDED.ttl_touches,
    ttl_take_on_att = EXCLUDED.ttl_take_on_att,
    ttl_take_on_suc = EXCLUDED.ttl_take_on_suc,
    ttl_carries = EXCLUDED.ttl_carries,
    ttl_carries_miscontrolled = EXCLUDED.ttl_carries_miscontrolled,
    ttl_carries_dispossessed = EXCLUDED.ttl_carries_dispossessed,
    ttl_pass_rcvd = EXCLUDED.ttl_pass_rcvd,
    ttl_pass_prog_rcvd = EXCLUDED.ttl_pass_prog_rcvd,
    ttl_fls_ag = EXCLUDED.ttl_fls_ag,
    ttl_fls_for = EXCLUDED.ttl_fls_for,
    ttl_offside = EXCLUDED.ttl_offside,
    ttl_og = EXCLUDED.ttl_og,
    ttl_ball_recov = EXCLUDED.ttl_ball_recov,
    ttl_air_dual_won = EXCLUDED.ttl_air_dual_won,
    ttl_air_dual_lost = EXCLUDED.ttl_air_dual_lost,
    players_count = EXCLUDED.players_count;
"""

In [53]:
# Conexi√≥n a la base de datos PostgreSQL
def connect_to_database():
    try:
        conn = psycopg2.connect(
            host=DatabaseConfig.HOST,
            port=DatabaseConfig.PORT,
            dbname=DatabaseConfig.DATABASE,
            user=DatabaseConfig.USER,
            password=DatabaseConfig.PASSWORD
        )
        return conn
    except Exception as e:
        print(f"Error connecting to database: {e}")
        return None

In [27]:
# Create Database Tables
def create_tables():
    """Create the required database tables if they don't exist"""
    conn = connect_to_database()
    if not conn:
        print("No se pudo conectar a la base de datos")
        return
    try:
        cursor = conn.cursor()
        CREATE_TEAM_META_TABLE = """
        CREATE TABLE IF NOT EXISTS team_meta (
            team_id VARCHAR(50) PRIMARY KEY,
            team_name VARCHAR(100) NOT NULL,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        );
        """
        CREATE_MATCHES_TABLE = """
        CREATE TABLE IF NOT EXISTS matches_registered (
            match_id VARCHAR(50) PRIMARY KEY,
            season_id VARCHAR(20) NOT NULL,
            matchday INTEGER NOT NULL,
            home_team VARCHAR(100) NOT NULL,
            home_team_id VARCHAR(50) NOT NULL,
            home_team_score INTEGER NOT NULL,
            away_team VARCHAR(100) NOT NULL,
            away_team_id VARCHAR(50) NOT NULL,
            away_team_score INTEGER NOT NULL,
            date_game TIMESTAMP NOT NULL,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            FOREIGN KEY (home_team_id) REFERENCES team_meta(team_id),
            FOREIGN KEY (away_team_id) REFERENCES team_meta(team_id)
        );
        """
        CREATE_TEAM_MATCH_STATS_TABLE = """
        CREATE TABLE IF NOT EXISTS team_match_stats (
            match_id VARCHAR(50) NOT NULL,
            season_id VARCHAR(20) NOT NULL,
            team_name VARCHAR(100) NOT NULL,
            home_away VARCHAR(10) NOT NULL,
            ttl_gls NUMERIC,
            ttl_ast NUMERIC,
            ttl_xg NUMERIC,
            ttl_xag NUMERIC,
            ttl_pk_made NUMERIC,
            ttl_pk_att NUMERIC,
            ttl_yellow_cards NUMERIC,
            ttl_red_cards NUMERIC,
            ttl_gls_ag NUMERIC,
            ttl_sot_ag NUMERIC,
            ttl_saves NUMERIC,
            clean_sheets NUMERIC,
            ttl_pk_att_ag NUMERIC,
            ttl_pk_saved NUMERIC,
            ttl_sh NUMERIC,
            ttl_sot NUMERIC,
            pct_sot NUMERIC,
            ttl_avg_sh NUMERIC,
            ttl_gls_per_sot NUMERIC,
            ttl_gls_xg_diff NUMERIC,
            ttl_pass_cmp NUMERIC,
            ttl_pass_att NUMERIC,
            pct_pass_cmp NUMERIC,
            ttl_pass_prog NUMERIC,
            ttl_key_passes NUMERIC,
            ttl_pass_opp_box NUMERIC,
            ttl_cross_opp_box NUMERIC,
            ttl_pass_live NUMERIC,
            ttl_pass_dead NUMERIC,
            ttl_pass_fk NUMERIC,
            ttl_through_balls NUMERIC,
            ttl_switches NUMERIC,
            ttl_crosses NUMERIC,
            ttl_pass_offside NUMERIC,
            ttl_pass_blocked NUMERIC,
            ttl_throw_ins NUMERIC,
            ttl_ck NUMERIC,
            ttl_tkl NUMERIC,
            ttl_tkl_won NUMERIC,
            ttl_tkl_drb NUMERIC,
            ttl_tkl_drb_att NUMERIC,
            pct_tkl_drb_suc NUMERIC,
            ttl_blocks NUMERIC,
            ttl_sh_blocked NUMERIC,
            ttl_int NUMERIC,
            ttl_clearances NUMERIC,
            ttl_def_error NUMERIC,
            avg_poss NUMERIC,
            ttl_touches NUMERIC,
            ttl_take_on_att NUMERIC,
            ttl_take_on_suc NUMERIC,
            ttl_carries NUMERIC,
            ttl_carries_miscontrolled NUMERIC,
            ttl_carries_dispossessed NUMERIC,
            ttl_pass_rcvd NUMERIC,
            ttl_pass_prog_rcvd NUMERIC,
            ttl_fls_ag NUMERIC,
            ttl_fls_for NUMERIC,
            ttl_offside NUMERIC,
            ttl_og NUMERIC,
            ttl_ball_recov NUMERIC,
            ttl_air_dual_won NUMERIC,
            ttl_air_dual_lost NUMERIC,
            players_count INTEGER,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            UNIQUE (match_id, team_name),
            FOREIGN KEY (match_id) REFERENCES matches_registered(match_id)
        );
        """
        cursor.execute(CREATE_TEAM_META_TABLE)
        cursor.execute(CREATE_MATCHES_TABLE)
        cursor.execute(CREATE_TEAM_MATCH_STATS_TABLE)
        conn.commit()
        print("Tablas creadas correctamente.")
    except Exception as e:
        print(f"Error creando tablas: {e}")
    finally:
        cursor.close()
        conn.close()


In [28]:
# Ejecutar la creaci√≥n de tablas
create_tables()

Tablas creadas correctamente.


In [54]:
def clean_value(v):
    try:
        # Convierte a float o int si es posible
        val = float(v)
        return int(val)  # PostgreSQL suele usar integer
    except (TypeError, ValueError):
        return 0


# Funci√≥n para limpiar team_id
def clean_team_id(v):
    return v if v is not None else 0

def calculate_missing(stats):
    """Calcula m√©tricas faltantes si se pueden derivar."""
    
    # % tiros a puerta
    if stats.get("pct_sot") is None:
        sh = stats.get("ttl_sh")
        sot = stats.get("ttl_sot")
        if sh and sh > 0 and sot is not None:
            stats["pct_sot"] = (sot / sh) * 100
    
    # % pases completados
    if stats.get("pct_pass_cmp") is None:
        cmp_ = stats.get("ttl_pass_cmp")
        att_ = stats.get("ttl_pass_att")
        if cmp_ and att_ and att_ > 0:
            stats["pct_pass_cmp"] = (cmp_ / att_) * 100

    return stats



def insert_json_match(json_data, cursor):
    """
    Inserta team_meta, matches_registered y team_match_stats de un JSON.
    """

    # ------------------------------------------------------
    # INSERT 1: team_meta (home y away)
    # ------------------------------------------------------
    for t in json_data["teams"]:
        cursor.execute("""
            INSERT INTO team_meta (team_id, team_name)
            VALUES (%s, %s)
            ON CONFLICT (team_id) DO UPDATE
            SET team_name = EXCLUDED.team_name,
                updated_at = CURRENT_TIMESTAMP;
        """, (t["team_id"], t["team_name"]))

    # ------------------------------------------------------
    # INSERT 2: matches_registered
    # ------------------------------------------------------
    m = json_data["match_info"]

    cursor.execute("""
        INSERT INTO matches_registered (
            match_id, season_id, matchday,
            home_team, home_team_id, home_team_score,
            away_team, away_team_id, away_team_score,
            date_game
        )
        VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
        ON CONFLICT (match_id) DO UPDATE
        SET updated_at = CURRENT_TIMESTAMP;
    """, (
        m["match_id"], m["season_id"], m["matchday"],
        m["home_team"], m["home_team_id"], m["home_team_score"],
        m["away_team"], m["away_team_id"], m["away_team_score"],
        m["date_game"]
    ))

    # ------------------------------------------------------
    # INSERT 3: team_match_stats
    # ------------------------------------------------------

    # Orden EXACTO segun tu tabla
    db_fields = [
        "match_id", "season_id", "team_name", "home_away",
        "ttl_gls", "ttl_ast", "ttl_xg", "ttl_xag",
        "ttl_pk_made", "ttl_pk_att",
        "ttl_yellow_cards", "ttl_red_cards",
        "ttl_gls_ag", "ttl_sot_ag", "ttl_saves", "clean_sheets",
        "ttl_pk_att_ag", "ttl_pk_saved",
        "ttl_sh", "ttl_sot", "pct_sot",
        "ttl_avg_sh", "ttl_gls_per_sot", "ttl_gls_xg_diff",
        "ttl_pass_cmp", "ttl_pass_att", "pct_pass_cmp",
        "ttl_pass_prog", "ttl_key_passes",
        "ttl_pass_opp_box", "ttl_cross_opp_box",
        "ttl_pass_live", "ttl_pass_dead", "ttl_pass_fk",
        "ttl_through_balls", "ttl_switches", "ttl_crosses",
        "ttl_pass_offside", "ttl_pass_blocked", "ttl_throw_ins", "ttl_ck",
        "ttl_tkl", "ttl_tkl_won",
        "ttl_tkl_drb", "ttl_tkl_drb_att", "pct_tkl_drb_suc",
        "ttl_blocks", "ttl_sh_blocked", "ttl_int", "ttl_clearances", "ttl_def_error",
        "avg_poss",
        "ttl_touches", "ttl_take_on_att", "ttl_take_on_suc",
        "ttl_carries", "ttl_carries_miscontrolled", "ttl_carries_dispossessed",
        "ttl_pass_rcvd", "ttl_pass_prog_rcvd",
        "ttl_fls_ag", "ttl_fls_for",
        "ttl_offside", "ttl_og", "ttl_ball_recov",
        "ttl_air_dual_won", "ttl_air_dual_lost",
        "players_count"
    ]

    for t in json_data["teams"]:
        
        # COPIAR stats crudos
        stats = {k: t.get(k) for k in db_fields if k in t or k in ["match_id","season_id","team_name","home_away"]}

        # LIMPIEZA
        for key in stats:
            if key not in ["match_id", "team_name", "home_away", "season_id"]:
                stats[key] = clean_value(stats[key])

        # CALCULAR M√âTRICAS FALTANTES
        stats = calculate_missing(stats)

        # RELLENAR CAMPOS FALTANTES COMO NULL
        for f in db_fields:
            if f not in stats:
                stats[f] = None

        # ARMAR LISTA DE VALORES EN ORDEN CORRECTO
        values = [stats[f] for f in db_fields]

        # INSERT FINAL
        cursor.execute(f"""
            INSERT INTO team_match_stats ({",".join(db_fields)})
            VALUES ({",".join(["%s"] * len(db_fields))})
            ON CONFLICT (match_id, team_name)
            DO UPDATE SET updated_at = CURRENT_TIMESTAMP;
        """, values)



## Data Loading Process

**‚ö†Ô∏è IMPORTANT: Make sure to set the DATABASE name in the DatabaseConfig class above before running!**

The following sections will process each league's data and upload it to the database.

### üá™üá∏ La Liga (Spanish Primera Divisi√≥n)

In [31]:
import glob
import json
import os

# Carpeta principal de La Liga
data_path = "Data_Extracted"
laliga_folder = os.path.join(data_path, "Data_La_Liga")

# Lista para almacenar todos los partidos
laliga_stats = []

# Contadores para el resumen
total_files = 0
loaded_files = 0
json_errors = []
missing_match_info = []

for season_folder in sorted(os.listdir(laliga_folder)):
    matches_folder = os.path.join(laliga_folder, season_folder, "Matches", "Matches_Stats")
    if os.path.exists(matches_folder):
        # Buscar todos los JSON recursivamente
        files = glob.glob(os.path.join(matches_folder, "**", "*.json"), recursive=True)
        print(f"{season_folder}: {len(files)} archivos encontrados")
        total_files += len(files)

        for file in files:
            try:
                with open(file, "r", encoding="utf-8") as f:
                    match_data = json.load(f)
                
                # Si no existe 'match_info', crear uno m√≠nimo con match_id
                if "match_info" not in match_data:
                    match_data["match_info"] = {
                        "match_id": match_data.get("match_id", None),
                        "season_id": season_folder,
                        "matchday": None,
                        "home_team": None,
                        "home_team_id": None,
                        "home_team_score": 0,
                        "away_team": None,
                        "away_team_id": None,
                        "away_team_score": 0,
                        "date_game": None
                    }
                    missing_match_info.append(file)

                laliga_stats.append(match_data)
                loaded_files += 1
            except Exception as e:
                json_errors.append((file, str(e)))

    else:
        print(f"{season_folder}: carpeta {matches_folder} NO existe")

# Resumen de carga
print("\nResumen de carga:")
print(f"Archivos JSON totales encontrados: {total_files}")
print(f"Archivos cargados correctamente: {loaded_files}")
print(f"Archivos con error de JSON: {len(json_errors)}")
print(f"Archivos sin 'match_info': {len(missing_match_info)}\n")

if json_errors:
    print("Archivos con error JSON:")
    for f, e in json_errors:
        print(f"  {f}: {e}")

if missing_match_info:
    print("Archivos sin 'match_info':")
    for f in missing_match_info:
        print(f"  {f}")

print(f"\nTotal partidos cargados en laliga_stats: {len(laliga_stats)}")


2017-2018: 380 archivos encontrados
2018-2019: 380 archivos encontrados
2019-2020: 380 archivos encontrados
2020-2021: 380 archivos encontrados
2021-2022: carpeta Data_Extracted\Data_La_Liga\2021-2022\Matches\Matches_Stats NO existe
2022-2023: 380 archivos encontrados
2023-2024: 380 archivos encontrados
2024-2025: 380 archivos encontrados
2025-2026: 150 archivos encontrados

Resumen de carga:
Archivos JSON totales encontrados: 2810
Archivos cargados correctamente: 2810
Archivos con error de JSON: 0
Archivos sin 'match_info': 0


Total partidos cargados en laliga_stats: 2810


In [33]:
# Conectar a la base de datos
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

teams_data = []

# Recorremos todos los partidos y sus equipos
for match in laliga_stats:
    for t in match["teams"]:
        team_id = t.get("team_id")
        team_name = t.get("team_name")
        if team_id and team_name:
            teams_data.append((team_id, team_name))

# Eliminar duplicados por team_id
teams_data = list({t[0]: t for t in teams_data}.values())

# Insertar equipos
try:
    cursor = conn.cursor()
    cursor.executemany(INSERT_TEAM_QUERY, teams_data)
    conn.commit()
    print(f"Insertados {len(teams_data)} equipos en team_meta")
except Exception as e:
    print(f"Error insertando equipos: {e}")
    conn.rollback()
finally:
    cursor.close()
    conn.close()


Insertados 174 equipos en team_meta


In [36]:
# Conectar a la base de datos
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

cursor = conn.cursor()

# PASO 1: Primero insertar TODOS los equipos que aparecen en los partidos
teams_to_insert = set()
for match in laliga_stats:
    info = match.get("match_info", {})
    home_team_id = info.get("home_team_id")
    home_team = info.get("home_team")
    away_team_id = info.get("away_team_id")
    away_team = info.get("away_team")
    
    if home_team_id and home_team:
        teams_to_insert.add((home_team_id, home_team))
    if away_team_id and away_team:
        teams_to_insert.add((away_team_id, away_team))

# Insertar equipos primero
for team_id, team_name in teams_to_insert:
    try:
        cursor.execute(INSERT_TEAM_QUERY, (team_id, team_name))
    except Exception as e:
        print(f"Error insertando equipo {team_name}: {e}")

conn.commit()
print(f"Insertados {len(teams_to_insert)} equipos √∫nicos en team_meta")

# PASO 2: Ahora insertar los partidos
matches_data = []
ignored_count = 0

for match in laliga_stats:
    info = match.get("match_info", None)

    if not info:
        ignored_count += 1
        continue

    season_id = info.get("season_id", "Unknown")
    match_id = info.get("match_id", "Unknown")
    matchday = info.get("matchday", 0) or 0
    home_team = info.get("home_team", "Unknown")
    home_team_id = info.get("home_team_id")
    home_team_score = info.get("home_team_score", 0) or 0
    away_team = info.get("away_team", "Unknown")
    away_team_id = info.get("away_team_id")
    away_team_score = info.get("away_team_score", 0) or 0
    date_game = info.get("date_game", "1970-01-01")

    if home_team_id is not None and away_team_id is not None:
        matches_data.append((
            season_id, match_id, matchday,
            home_team, home_team_id, home_team_score,
            away_team, away_team_id, away_team_score,
            date_game
        ))
    else:
        ignored_count += 1

try:
    cursor.executemany(INSERT_MATCH_QUERY, matches_data)
    conn.commit()
    print(f"Insertados {len(matches_data)} partidos en matches_registered")
    print(f"Ignorados {ignored_count} partidos sin match_info o con datos incompletos")
except Exception as e:
    print(f"Error insertando partidos: {e}")
    conn.rollback()
finally:
    cursor.close()
    conn.close()

Insertados 112 equipos √∫nicos en team_meta
Insertados 2810 partidos en matches_registered
Ignorados 0 partidos sin match_info o con datos incompletos


In [37]:
# Cargar datos de La Liga a la base de datos
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

cursor = conn.cursor()

def clean_db_value(v):
    if v is None or v == "NA":
        return 0
    try:
        val = float(v)
        return int(val)
    except (TypeError, ValueError):
        return 0

inserted_matches = 0
inserted_stats = 0
for match in laliga_stats:
    # Insertar equipos (ya se hizo en celda anterior, pero por seguridad)
    for t in match["teams"]:
        team_id = t.get("team_id")
        team_name = t.get("team_name")
        if team_id and team_name:
            cursor.execute(INSERT_TEAM_QUERY, (team_id, team_name))

    # Insertar partido
    m = match.get("match_info", {})
    match_id = m.get("match_id")
    season_id = m.get("season_id", "Unknown")
    matchday = clean_db_value(m.get("matchday", 0))
    home_team = m.get("home_team", "Unknown")
    home_team_id = m.get("home_team_id")
    home_team_score = clean_db_value(m.get("home_team_score", 0))
    away_team = m.get("away_team", "Unknown")
    away_team_id = m.get("away_team_id")
    away_team_score = clean_db_value(m.get("away_team_score", 0))
    date_game = m.get("date_game", "1970-01-01")

    if home_team_id is not None and away_team_id is not None and match_id is not None:
        cursor.execute(INSERT_MATCH_QUERY, (
            season_id, match_id, matchday,
            home_team, home_team_id, home_team_score,
            away_team, away_team_id, away_team_score,
            date_game
        ))
        inserted_matches += 1

    # Insertar estad√≠sticas de equipos por partido
    db_fields = [
        "match_id", "season_id", "team_name", "home_away",
        "ttl_gls", "ttl_ast", "ttl_xg", "ttl_xag",
        "ttl_pk_made", "ttl_pk_att",
        "ttl_yellow_cards", "ttl_red_cards",
        "ttl_gls_ag", "ttl_sot_ag", "ttl_saves", "clean_sheets",
        "ttl_pk_att_ag", "ttl_pk_saved",
        "ttl_sh", "ttl_sot", "pct_sot",
        "ttl_avg_sh", "ttl_gls_per_sot", "ttl_gls_xg_diff",
        "ttl_pass_cmp", "ttl_pass_att", "pct_pass_cmp",
        "ttl_pass_prog", "ttl_key_passes",
        "ttl_pass_opp_box", "ttl_cross_opp_box",
        "ttl_pass_live", "ttl_pass_dead", "ttl_pass_fk",
        "ttl_through_balls", "ttl_switches", "ttl_crosses",
        "ttl_pass_offside", "ttl_pass_blocked", "ttl_throw_ins", "ttl_ck",
        "ttl_tkl", "ttl_tkl_won",
        "ttl_tkl_drb", "ttl_tkl_drb_att", "pct_tkl_drb_suc",
        "ttl_blocks", "ttl_sh_blocked", "ttl_int", "ttl_clearances", "ttl_def_error",
        "avg_poss",
        "ttl_touches", "ttl_take_on_att", "ttl_take_on_suc",
        "ttl_carries", "ttl_carries_miscontrolled", "ttl_carries_dispossessed",
        "ttl_pass_rcvd", "ttl_pass_prog_rcvd",
        "ttl_fls_ag", "ttl_fls_for",
        "ttl_offside", "ttl_og", "ttl_ball_recov",
        "ttl_air_dual_won", "ttl_air_dual_lost",
        "players_count"
    ]

    for t in match["teams"]:
        stats = {k: t.get(k) for k in db_fields}
        # Limpiar valores
        for key in stats:
            if key not in ["match_id", "team_name", "home_away", "season_id"]:
                stats[key] = clean_db_value(stats[key])
        # Rellenar campos faltantes
        for f in db_fields:
            if stats[f] is None:
                stats[f] = 0
        values = [stats[f] for f in db_fields]
        cursor.execute(f"""
            INSERT INTO team_match_stats ({','.join(db_fields)})
            VALUES ({','.join(['%s'] * len(db_fields))})
            ON CONFLICT (match_id, team_name)
            DO UPDATE SET updated_at = CURRENT_TIMESTAMP;
        """, values)
        inserted_stats += 1

conn.commit()
print(f"Partidos insertados: {inserted_matches}")
print(f"Estad√≠sticas insertadas: {inserted_stats}")

cursor.close()
conn.close()

Partidos insertados: 2810
Estad√≠sticas insertadas: 5620


### üá©üá™ Bundesliga (German First Division)

In [24]:
import glob
import json
import os

# Carpeta principal de La Liga
data_path = "Data_Extracted"
laliga_folder = os.path.join(data_path, "Data_BundesLiga")

# Lista para almacenar todos los partidos
laliga_stats = []

# Contadores para el resumen
total_files = 0
loaded_files = 0
json_errors = []
missing_match_info = []

for season_folder in sorted(os.listdir(laliga_folder)):
    matches_folder = os.path.join(laliga_folder, season_folder, "Matches", "Matches_Stats")
    if os.path.exists(matches_folder):
        # Buscar todos los JSON recursivamente
        files = glob.glob(os.path.join(matches_folder, "**", "*.json"), recursive=True)
        print(f"{season_folder}: {len(files)} archivos encontrados")
        total_files += len(files)

        for file in files:
            try:
                with open(file, "r", encoding="utf-8") as f:
                    match_data = json.load(f)
                
                # Si no existe 'match_info', crear uno m√≠nimo con match_id
                if "match_info" not in match_data:
                    match_data["match_info"] = {
                        "match_id": match_data.get("match_id", None),
                        "season_id": season_folder,
                        "matchday": None,
                        "home_team": None,
                        "home_team_id": None,
                        "home_team_score": 0,
                        "away_team": None,
                        "away_team_id": None,
                        "away_team_score": 0,
                        "date_game": None
                    }
                    missing_match_info.append(file)

                laliga_stats.append(match_data)
                loaded_files += 1
            except Exception as e:
                json_errors.append((file, str(e)))

    else:
        print(f"{season_folder}: carpeta {matches_folder} NO existe")

# Resumen de carga
print("\nResumen de carga:")
print(f"Archivos JSON totales encontrados: {total_files}")
print(f"Archivos cargados correctamente: {loaded_files}")
print(f"Archivos con error de JSON: {len(json_errors)}")
print(f"Archivos sin 'match_info': {len(missing_match_info)}\n")

if json_errors:
    print("Archivos con error JSON:")
    for f, e in json_errors:
        print(f"  {f}: {e}")

if missing_match_info:
    print("Archivos sin 'match_info':")
    for f in missing_match_info:
        print(f"  {f}")

print(f"\nTotal partidos cargados en laliga_stats: {len(laliga_stats)}")


2017-2018: 308 archivos encontrados
2018-2019: 308 archivos encontrados
2019-2020: 308 archivos encontrados
2020-2021: 308 archivos encontrados
2021-2022: carpeta Data_Extracted\Data_BundesLiga\2021-2022\Matches\Matches_Stats NO existe
2022-2023: 308 archivos encontrados
2023-2024: 308 archivos encontrados
2023-24: carpeta Data_Extracted\Data_BundesLiga\2023-24\Matches\Matches_Stats NO existe
2024-2025: 308 archivos encontrados
2025-2026: 117 archivos encontrados

Resumen de carga:
Archivos JSON totales encontrados: 2273
Archivos cargados correctamente: 2273
Archivos con error de JSON: 0
Archivos sin 'match_info': 0


Total partidos cargados en laliga_stats: 2273


In [8]:
# Conectar a la base de datos
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

teams_data = []

# Recorremos todos los partidos y sus equipos
for match in laliga_stats:
    for t in match["teams"]:
        team_id = t.get("team_id")
        team_name = t.get("team_name")
        if team_id and team_name:
            teams_data.append((team_id, team_name))

# Eliminar duplicados por team_id
teams_data = list({t[0]: t for t in teams_data}.values())

# Insertar equipos
try:
    cursor = conn.cursor()
    cursor.executemany(INSERT_TEAM_QUERY, teams_data)
    conn.commit()
    print(f"Insertados {len(teams_data)} equipos en team_meta")
except Exception as e:
    print(f"Error insertando equipos: {e}")
    conn.rollback()
finally:
    cursor.close()
    conn.close()



Insertados 92 equipos en team_meta


In [25]:
# Conectar a la base de datos
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

cursor = conn.cursor()

# PASO 1: Primero insertar TODOS los equipos que aparecen en los partidos
teams_to_insert = set()
for match in laliga_stats:
    info = match.get("match_info", {})
    home_team_id = info.get("home_team_id")
    home_team = info.get("home_team")
    away_team_id = info.get("away_team_id")
    away_team = info.get("away_team")
    
    if home_team_id and home_team:
        teams_to_insert.add((home_team_id, home_team))
    if away_team_id and away_team:
        teams_to_insert.add((away_team_id, away_team))

# Insertar equipos primero
for team_id, team_name in teams_to_insert:
    try:
        cursor.execute(INSERT_TEAM_QUERY, (team_id, team_name))
    except Exception as e:
        print(f"Error insertando equipo {team_name}: {e}")

conn.commit()
print(f"Insertados {len(teams_to_insert)} equipos √∫nicos en team_meta")

# PASO 2: Ahora insertar los partidos
matches_data = []
ignored_count = 0

for match in laliga_stats:
    info = match.get("match_info", None)

    if not info:
        ignored_count += 1
        continue

    season_id = info.get("season_id", "Unknown")
    match_id = info.get("match_id", "Unknown")
    matchday = info.get("matchday", 0) or 0
    home_team = info.get("home_team", "Unknown")
    home_team_id = info.get("home_team_id")
    home_team_score = info.get("home_team_score", 0) or 0
    away_team = info.get("away_team", "Unknown")
    away_team_id = info.get("away_team_id")
    away_team_score = info.get("away_team_score", 0) or 0
    date_game = info.get("date_game", "1970-01-01")

    if home_team_id is not None and away_team_id is not None:
        matches_data.append((
            season_id, match_id, matchday,
            home_team, home_team_id, home_team_score,
            away_team, away_team_id, away_team_score,
            date_game
        ))
    else:
        ignored_count += 1

try:
    cursor.executemany(INSERT_MATCH_QUERY, matches_data)
    conn.commit()
    print(f"Insertados {len(matches_data)} partidos en matches_registered")
    print(f"Ignorados {ignored_count} partidos sin match_info o con datos incompletos")
except Exception as e:
    print(f"Error insertando partidos: {e}")
    conn.rollback()
finally:
    cursor.close()
    conn.close()

Insertados 65 equipos √∫nicos en team_meta
Insertados 2273 partidos en matches_registered
Ignorados 0 partidos sin match_info o con datos incompletos


In [26]:
# Cargar datos de Bundesliga a la base de datos
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

cursor = conn.cursor()

def clean_db_value(v):
    if v is None or v == "NA":
        return 0
    try:
        val = float(v)
        return int(val)
    except (TypeError, ValueError):
        return 0

inserted_matches = 0
inserted_stats = 0

for match in laliga_stats:
    # Obtener match_info
    m = match.get("match_info", {})
    match_id = m.get("match_id")
    season_id = m.get("season_id", "Unknown")
    
    # Saltar si no hay match_id
    if not match_id:
        continue
    
    # Insertar equipos (por seguridad)
    for t in match["teams"]:
        team_id = t.get("team_id")
        team_name = t.get("team_name")
        if team_id and team_name:
            cursor.execute(INSERT_TEAM_QUERY, (team_id, team_name))

    # Insertar partido
    matchday = clean_db_value(m.get("matchday", 0))
    home_team = m.get("home_team", "Unknown")
    home_team_id = m.get("home_team_id")
    home_team_score = clean_db_value(m.get("home_team_score", 0))
    away_team = m.get("away_team", "Unknown")
    away_team_id = m.get("away_team_id")
    away_team_score = clean_db_value(m.get("away_team_score", 0))
    date_game = m.get("date_game", "1970-01-01")

    if home_team_id is not None and away_team_id is not None:
        cursor.execute(INSERT_MATCH_QUERY, (
            season_id, match_id, matchday,
            home_team, home_team_id, home_team_score,
            away_team, away_team_id, away_team_score,
            date_game
        ))
        inserted_matches += 1

    # Insertar estad√≠sticas de equipos por partido
    db_fields = [
        "match_id", "season_id", "team_name", "home_away",
        "ttl_gls", "ttl_ast", "ttl_xg", "ttl_xag",
        "ttl_pk_made", "ttl_pk_att",
        "ttl_yellow_cards", "ttl_red_cards",
        "ttl_gls_ag", "ttl_sot_ag", "ttl_saves", "clean_sheets",
        "ttl_pk_att_ag", "ttl_pk_saved",
        "ttl_sh", "ttl_sot", "pct_sot",
        "ttl_avg_sh", "ttl_gls_per_sot", "ttl_gls_xg_diff",
        "ttl_pass_cmp", "ttl_pass_att", "pct_pass_cmp",
        "ttl_pass_prog", "ttl_key_passes",
        "ttl_pass_opp_box", "ttl_cross_opp_box",
        "ttl_pass_live", "ttl_pass_dead", "ttl_pass_fk",
        "ttl_through_balls", "ttl_switches", "ttl_crosses",
        "ttl_pass_offside", "ttl_pass_blocked", "ttl_throw_ins", "ttl_ck",
        "ttl_tkl", "ttl_tkl_won",
        "ttl_tkl_drb", "ttl_tkl_drb_att", "pct_tkl_drb_suc",
        "ttl_blocks", "ttl_sh_blocked", "ttl_int", "ttl_clearances", "ttl_def_error",
        "avg_poss",
        "ttl_touches", "ttl_take_on_att", "ttl_take_on_suc",
        "ttl_carries", "ttl_carries_miscontrolled", "ttl_carries_dispossessed",
        "ttl_pass_rcvd", "ttl_pass_prog_rcvd",
        "ttl_fls_ag", "ttl_fls_for",
        "ttl_offside", "ttl_og", "ttl_ball_recov",
        "ttl_air_dual_won", "ttl_air_dual_lost",
        "players_count"
    ]

    for t in match["teams"]:
        # Crear diccionario base con match_id y season_id
        stats = {
            "match_id": match_id,
            "season_id": season_id,
            "team_name": t.get("team_name"),
            "home_away": t.get("home_away")
        }
        
        # Agregar el resto de los campos desde el equipo
        for field in db_fields[4:]:  # Saltar los primeros 4 ya asignados
            stats[field] = t.get(field)
        
        # Limpiar valores num√©ricos
        for key in stats:
            if key not in ["match_id", "team_name", "home_away", "season_id"]:
                stats[key] = clean_db_value(stats[key])
        
        # Rellenar campos faltantes
        for f in db_fields:
            if f not in stats or stats[f] is None:
                stats[f] = 0 if f not in ["match_id", "season_id", "team_name", "home_away"] else stats.get(f, "")
        
        values = [stats[f] for f in db_fields]
        cursor.execute(f"""
            INSERT INTO team_match_stats ({','.join(db_fields)})
            VALUES ({','.join(['%s'] * len(db_fields))})
            ON CONFLICT (match_id, team_name)
            DO NOTHING;
        """, values)
        inserted_stats += 1

conn.commit()
print(f"Partidos insertados: {inserted_matches}")
print(f"Estad√≠sticas insertadas: {inserted_stats}")

cursor.close()
conn.close()

Partidos insertados: 2273
Estad√≠sticas insertadas: 4546


### üáÆüáπ Serie A (Italian First Division)

In [41]:
import glob
import json
import os

# Carpeta principal de La Liga
data_path = "Data_Extracted"
laliga_folder = os.path.join(data_path, "Data_SerieA")

# Lista para almacenar todos los partidos
laliga_stats = []

# Contadores para el resumen
total_files = 0
loaded_files = 0
json_errors = []
missing_match_info = []

for season_folder in sorted(os.listdir(laliga_folder)):
    matches_folder = os.path.join(laliga_folder, season_folder, "Matches", "Matches_Stats")
    if os.path.exists(matches_folder):
        # Buscar todos los JSON recursivamente
        files = glob.glob(os.path.join(matches_folder, "**", "*.json"), recursive=True)
        print(f"{season_folder}: {len(files)} archivos encontrados")
        total_files += len(files)

        for file in files:
            try:
                with open(file, "r", encoding="utf-8") as f:
                    match_data = json.load(f)
                
                # Si no existe 'match_info', crear uno m√≠nimo con match_id
                if "match_info" not in match_data:
                    match_data["match_info"] = {
                        "match_id": match_data.get("match_id", None),
                        "season_id": season_folder,
                        "matchday": None,
                        "home_team": None,
                        "home_team_id": None,
                        "home_team_score": 0,
                        "away_team": None,
                        "away_team_id": None,
                        "away_team_score": 0,
                        "date_game": None
                    }
                    missing_match_info.append(file)

                laliga_stats.append(match_data)
                loaded_files += 1
            except Exception as e:
                json_errors.append((file, str(e)))

    else:
        print(f"{season_folder}: carpeta {matches_folder} NO existe")

# Resumen de carga
print("\nResumen de carga:")
print(f"Archivos JSON totales encontrados: {total_files}")
print(f"Archivos cargados correctamente: {loaded_files}")
print(f"Archivos con error de JSON: {len(json_errors)}")
print(f"Archivos sin 'match_info': {len(missing_match_info)}\n")

if json_errors:
    print("Archivos con error JSON:")
    for f, e in json_errors:
        print(f"  {f}: {e}")

if missing_match_info:
    print("Archivos sin 'match_info':")
    for f in missing_match_info:
        print(f"  {f}")

print(f"\nTotal partidos cargados en laliga_stats: {len(laliga_stats)}")


2017-2018: 380 archivos encontrados
2018-2019: 380 archivos encontrados
2019-2020: 380 archivos encontrados
2020-2021: 380 archivos encontrados
2021-2022: carpeta Data_Extracted\Data_SerieA\2021-2022\Matches\Matches_Stats NO existe
2022-2023: 381 archivos encontrados
2023-2024: 380 archivos encontrados
2024-2025: 380 archivos encontrados
2025-2026: 140 archivos encontrados

Resumen de carga:
Archivos JSON totales encontrados: 2801
Archivos cargados correctamente: 2801
Archivos con error de JSON: 0
Archivos sin 'match_info': 0


Total partidos cargados en laliga_stats: 2801


In [42]:
# Conectar a la base de datos
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

teams_data = []

# Recorremos todos los partidos y sus equipos
for match in laliga_stats:
    for t in match["teams"]:
        team_id = t.get("team_id")
        team_name = t.get("team_name")
        if team_id and team_name:
            teams_data.append((team_id, team_name))

# Eliminar duplicados por team_id
teams_data = list({t[0]: t for t in teams_data}.values())

# Insertar equipos
try:
    cursor = conn.cursor()
    cursor.executemany(INSERT_TEAM_QUERY, teams_data)
    conn.commit()
    print(f"Insertados {len(teams_data)} equipos en team_meta")
except Exception as e:
    print(f"Error insertando equipos: {e}")
    conn.rollback()
finally:
    cursor.close()
    conn.close()



Insertados 72 equipos en team_meta


In [43]:
# Conectar a la base de datos
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

cursor = conn.cursor()

# PASO 1: Primero insertar TODOS los equipos que aparecen en los partidos
teams_to_insert = set()
for match in laliga_stats:
    info = match.get("match_info", {})
    home_team_id = info.get("home_team_id")
    home_team = info.get("home_team")
    away_team_id = info.get("away_team_id")
    away_team = info.get("away_team")
    
    if home_team_id and home_team:
        teams_to_insert.add((home_team_id, home_team))
    if away_team_id and away_team:
        teams_to_insert.add((away_team_id, away_team))

# Insertar equipos primero
for team_id, team_name in teams_to_insert:
    try:
        cursor.execute(INSERT_TEAM_QUERY, (team_id, team_name))
    except Exception as e:
        print(f"Error insertando equipo {team_name}: {e}")

conn.commit()
print(f"Insertados {len(teams_to_insert)} equipos √∫nicos en team_meta")

# PASO 2: Ahora insertar los partidos
matches_data = []
ignored_count = 0

for match in laliga_stats:
    info = match.get("match_info", None)

    if not info:
        ignored_count += 1
        continue

    season_id = info.get("season_id", "Unknown")
    match_id = info.get("match_id", "Unknown")
    matchday = info.get("matchday", 0) or 0
    home_team = info.get("home_team", "Unknown")
    home_team_id = info.get("home_team_id")
    home_team_score = info.get("home_team_score", 0) or 0
    away_team = info.get("away_team", "Unknown")
    away_team_id = info.get("away_team_id")
    away_team_score = info.get("away_team_score", 0) or 0
    date_game = info.get("date_game", "1970-01-01")

    if home_team_id is not None and away_team_id is not None:
        matches_data.append((
            season_id, match_id, matchday,
            home_team, home_team_id, home_team_score,
            away_team, away_team_id, away_team_score,
            date_game
        ))
    else:
        ignored_count += 1

try:
    cursor.executemany(INSERT_MATCH_QUERY, matches_data)
    conn.commit()
    print(f"Insertados {len(matches_data)} partidos en matches_registered")
    print(f"Ignorados {ignored_count} partidos sin match_info o con datos incompletos")
except Exception as e:
    print(f"Error insertando partidos: {e}")
    conn.rollback()
finally:
    cursor.close()
    conn.close()

Insertados 52 equipos √∫nicos en team_meta
Insertados 2801 partidos en matches_registered
Ignorados 0 partidos sin match_info o con datos incompletos


In [44]:
# Cargar datos de  a la base de datos
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

cursor = conn.cursor()

def clean_db_value(v):
    if v is None or v == "NA":
        return 0
    try:
        val = float(v)
        return int(val)
    except (TypeError, ValueError):
        return 0

inserted_matches = 0
inserted_stats = 0

for match in laliga_stats:
    # Obtener match_info
    m = match.get("match_info", {})
    match_id = m.get("match_id")
    season_id = m.get("season_id", "Unknown")
    
    # Saltar si no hay match_id
    if not match_id:
        continue
    
    # Insertar equipos (por seguridad)
    for t in match["teams"]:
        team_id = t.get("team_id")
        team_name = t.get("team_name")
        if team_id and team_name:
            cursor.execute(INSERT_TEAM_QUERY, (team_id, team_name))

    # Insertar partido
    matchday = clean_db_value(m.get("matchday", 0))
    home_team = m.get("home_team", "Unknown")
    home_team_id = m.get("home_team_id")
    home_team_score = clean_db_value(m.get("home_team_score", 0))
    away_team = m.get("away_team", "Unknown")
    away_team_id = m.get("away_team_id")
    away_team_score = clean_db_value(m.get("away_team_score", 0))
    date_game = m.get("date_game", "1970-01-01")

    if home_team_id is not None and away_team_id is not None:
        cursor.execute(INSERT_MATCH_QUERY, (
            season_id, match_id, matchday,
            home_team, home_team_id, home_team_score,
            away_team, away_team_id, away_team_score,
            date_game
        ))
        inserted_matches += 1

    # Insertar estad√≠sticas de equipos por partido
    db_fields = [
        "match_id", "season_id", "team_name", "home_away",
        "ttl_gls", "ttl_ast", "ttl_xg", "ttl_xag",
        "ttl_pk_made", "ttl_pk_att",
        "ttl_yellow_cards", "ttl_red_cards",
        "ttl_gls_ag", "ttl_sot_ag", "ttl_saves", "clean_sheets",
        "ttl_pk_att_ag", "ttl_pk_saved",
        "ttl_sh", "ttl_sot", "pct_sot",
        "ttl_avg_sh", "ttl_gls_per_sot", "ttl_gls_xg_diff",
        "ttl_pass_cmp", "ttl_pass_att", "pct_pass_cmp",
        "ttl_pass_prog", "ttl_key_passes",
        "ttl_pass_opp_box", "ttl_cross_opp_box",
        "ttl_pass_live", "ttl_pass_dead", "ttl_pass_fk",
        "ttl_through_balls", "ttl_switches", "ttl_crosses",
        "ttl_pass_offside", "ttl_pass_blocked", "ttl_throw_ins", "ttl_ck",
        "ttl_tkl", "ttl_tkl_won",
        "ttl_tkl_drb", "ttl_tkl_drb_att", "pct_tkl_drb_suc",
        "ttl_blocks", "ttl_sh_blocked", "ttl_int", "ttl_clearances", "ttl_def_error",
        "avg_poss",
        "ttl_touches", "ttl_take_on_att", "ttl_take_on_suc",
        "ttl_carries", "ttl_carries_miscontrolled", "ttl_carries_dispossessed",
        "ttl_pass_rcvd", "ttl_pass_prog_rcvd",
        "ttl_fls_ag", "ttl_fls_for",
        "ttl_offside", "ttl_og", "ttl_ball_recov",
        "ttl_air_dual_won", "ttl_air_dual_lost",
        "players_count"
    ]

    for t in match["teams"]:
        # Crear diccionario base con match_id y season_id
        stats = {
            "match_id": match_id,
            "season_id": season_id,
            "team_name": t.get("team_name"),
            "home_away": t.get("home_away")
        }
        
        # Agregar el resto de los campos desde el equipo
        for field in db_fields[4:]:  # Saltar los primeros 4 ya asignados
            stats[field] = t.get(field)
        
        # Limpiar valores num√©ricos
        for key in stats:
            if key not in ["match_id", "team_name", "home_away", "season_id"]:
                stats[key] = clean_db_value(stats[key])
        
        # Rellenar campos faltantes
        for f in db_fields:
            if f not in stats or stats[f] is None:
                stats[f] = 0 if f not in ["match_id", "season_id", "team_name", "home_away"] else stats.get(f, "")
        
        values = [stats[f] for f in db_fields]
        cursor.execute(f"""
            INSERT INTO team_match_stats ({','.join(db_fields)})
            VALUES ({','.join(['%s'] * len(db_fields))})
            ON CONFLICT (match_id, team_name)
            DO NOTHING;
        """, values)
        inserted_stats += 1

conn.commit()
print(f"Partidos insertados: {inserted_matches}")
print(f"Estad√≠sticas insertadas: {inserted_stats}")

cursor.close()
conn.close()

Partidos insertados: 2801
Estad√≠sticas insertadas: 5602


### üá´üá∑ Ligue 1 (French First Division)

In [48]:
import glob
import json
import os

# Carpeta principal de La Liga
data_path = "Data_Extracted"
laliga_folder = os.path.join(data_path, "Data_Ligue1")

# Lista para almacenar todos los partidos
laliga_stats = []

# Contadores para el resumen
total_files = 0
loaded_files = 0
json_errors = []
missing_match_info = []

for season_folder in sorted(os.listdir(laliga_folder)):
    matches_folder = os.path.join(laliga_folder, season_folder, "Matches", "Matches_Stats")
    if os.path.exists(matches_folder):
        # Buscar todos los JSON recursivamente
        files = glob.glob(os.path.join(matches_folder, "**", "*.json"), recursive=True)
        print(f"{season_folder}: {len(files)} archivos encontrados")
        total_files += len(files)

        for file in files:
            try:
                with open(file, "r", encoding="utf-8") as f:
                    match_data = json.load(f)
                
                # Si no existe 'match_info', crear uno m√≠nimo con match_id
                if "match_info" not in match_data:
                    match_data["match_info"] = {
                        "match_id": match_data.get("match_id", None),
                        "season_id": season_folder,
                        "matchday": None,
                        "home_team": None,
                        "home_team_id": None,
                        "home_team_score": 0,
                        "away_team": None,
                        "away_team_id": None,
                        "away_team_score": 0,
                        "date_game": None
                    }
                    missing_match_info.append(file)

                laliga_stats.append(match_data)
                loaded_files += 1
            except Exception as e:
                json_errors.append((file, str(e)))

    else:
        print(f"{season_folder}: carpeta {matches_folder} NO existe")

# Resumen de carga
print("\nResumen de carga:")
print(f"Archivos JSON totales encontrados: {total_files}")
print(f"Archivos cargados correctamente: {loaded_files}")
print(f"Archivos con error de JSON: {len(json_errors)}")
print(f"Archivos sin 'match_info': {len(missing_match_info)}\n")

if json_errors:
    print("Archivos con error JSON:")
    for f, e in json_errors:
        print(f"  {f}: {e}")

if missing_match_info:
    print("Archivos sin 'match_info':")
    for f in missing_match_info:
        print(f"  {f}")

print(f"\nTotal partidos cargados en laliga_stats: {len(laliga_stats)}")


2017-2018: 290 archivos encontrados
2018-2019: 382 archivos encontrados
2019-2020: 380 archivos encontrados
2020-2021: 382 archivos encontrados
2021-2022: carpeta Data_Extracted\Data_Ligue1\2021-2022\Matches\Matches_Stats NO existe
2022-2023: 380 archivos encontrados
2023-2024: 308 archivos encontrados
2024-2025: 308 archivos encontrados
2025-2026: 135 archivos encontrados

Resumen de carga:
Archivos JSON totales encontrados: 2565
Archivos cargados correctamente: 2565
Archivos con error de JSON: 0
Archivos sin 'match_info': 0


Total partidos cargados en laliga_stats: 2565


In [49]:
# Conectar a la base de datos
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

teams_data = []

# Recorremos todos los partidos y sus equipos
for match in laliga_stats:
    for t in match["teams"]:
        team_id = t.get("team_id")
        team_name = t.get("team_name")
        if team_id and team_name:
            teams_data.append((team_id, team_name))

# Eliminar duplicados por team_id
teams_data = list({t[0]: t for t in teams_data}.values())

# Insertar equipos
try:
    cursor = conn.cursor()
    cursor.executemany(INSERT_TEAM_QUERY, teams_data)
    conn.commit()
    print(f"Insertados {len(teams_data)} equipos en team_meta")
except Exception as e:
    print(f"Error insertando equipos: {e}")
    conn.rollback()
finally:
    cursor.close()
    conn.close()



Insertados 48 equipos en team_meta


In [50]:
# Conectar a la base de datos
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

cursor = conn.cursor()

# PASO 1: Primero insertar TODOS los equipos que aparecen en los partidos
teams_to_insert = set()
for match in laliga_stats:
    info = match.get("match_info", {})
    home_team_id = info.get("home_team_id")
    home_team = info.get("home_team")
    away_team_id = info.get("away_team_id")
    away_team = info.get("away_team")
    
    if home_team_id and home_team:
        teams_to_insert.add((home_team_id, home_team))
    if away_team_id and away_team:
        teams_to_insert.add((away_team_id, away_team))

# Insertar equipos primero
for team_id, team_name in teams_to_insert:
    try:
        cursor.execute(INSERT_TEAM_QUERY, (team_id, team_name))
    except Exception as e:
        print(f"Error insertando equipo {team_name}: {e}")

conn.commit()
print(f"Insertados {len(teams_to_insert)} equipos √∫nicos en team_meta")

# PASO 2: Ahora insertar los partidos
matches_data = []
ignored_count = 0

for match in laliga_stats:
    info = match.get("match_info", None)

    if not info:
        ignored_count += 1
        continue

    season_id = info.get("season_id", "Unknown")
    match_id = info.get("match_id", "Unknown")
    matchday = info.get("matchday", 0) or 0
    home_team = info.get("home_team", "Unknown")
    home_team_id = info.get("home_team_id")
    home_team_score = info.get("home_team_score", 0) or 0
    away_team = info.get("away_team", "Unknown")
    away_team_id = info.get("away_team_id")
    away_team_score = info.get("away_team_score", 0) or 0
    date_game = info.get("date_game", "1970-01-01")

    if home_team_id is not None and away_team_id is not None:
        matches_data.append((
            season_id, match_id, matchday,
            home_team, home_team_id, home_team_score,
            away_team, away_team_id, away_team_score,
            date_game
        ))
    else:
        ignored_count += 1

try:
    cursor.executemany(INSERT_MATCH_QUERY, matches_data)
    conn.commit()
    print(f"Insertados {len(matches_data)} partidos en matches_registered")
    print(f"Ignorados {ignored_count} partidos sin match_info o con datos incompletos")
except Exception as e:
    print(f"Error insertando partidos: {e}")
    conn.rollback()
finally:
    cursor.close()
    conn.close()

Insertados 48 equipos √∫nicos en team_meta
Insertados 2565 partidos en matches_registered
Ignorados 0 partidos sin match_info o con datos incompletos


In [51]:
# Cargar datos de  a la base de datos
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

cursor = conn.cursor()

def clean_db_value(v):
    if v is None or v == "NA":
        return 0
    try:
        val = float(v)
        return int(val)
    except (TypeError, ValueError):
        return 0

inserted_matches = 0
inserted_stats = 0

for match in laliga_stats:
    # Obtener match_info
    m = match.get("match_info", {})
    match_id = m.get("match_id")
    season_id = m.get("season_id", "Unknown")
    
    # Saltar si no hay match_id
    if not match_id:
        continue
    
    # Insertar equipos (por seguridad)
    for t in match["teams"]:
        team_id = t.get("team_id")
        team_name = t.get("team_name")
        if team_id and team_name:
            cursor.execute(INSERT_TEAM_QUERY, (team_id, team_name))

    # Insertar partido
    matchday = clean_db_value(m.get("matchday", 0))
    home_team = m.get("home_team", "Unknown")
    home_team_id = m.get("home_team_id")
    home_team_score = clean_db_value(m.get("home_team_score", 0))
    away_team = m.get("away_team", "Unknown")
    away_team_id = m.get("away_team_id")
    away_team_score = clean_db_value(m.get("away_team_score", 0))
    date_game = m.get("date_game", "1970-01-01")

    if home_team_id is not None and away_team_id is not None:
        cursor.execute(INSERT_MATCH_QUERY, (
            season_id, match_id, matchday,
            home_team, home_team_id, home_team_score,
            away_team, away_team_id, away_team_score,
            date_game
        ))
        inserted_matches += 1

    # Insertar estad√≠sticas de equipos por partido
    db_fields = [
        "match_id", "season_id", "team_name", "home_away",
        "ttl_gls", "ttl_ast", "ttl_xg", "ttl_xag",
        "ttl_pk_made", "ttl_pk_att",
        "ttl_yellow_cards", "ttl_red_cards",
        "ttl_gls_ag", "ttl_sot_ag", "ttl_saves", "clean_sheets",
        "ttl_pk_att_ag", "ttl_pk_saved",
        "ttl_sh", "ttl_sot", "pct_sot",
        "ttl_avg_sh", "ttl_gls_per_sot", "ttl_gls_xg_diff",
        "ttl_pass_cmp", "ttl_pass_att", "pct_pass_cmp",
        "ttl_pass_prog", "ttl_key_passes",
        "ttl_pass_opp_box", "ttl_cross_opp_box",
        "ttl_pass_live", "ttl_pass_dead", "ttl_pass_fk",
        "ttl_through_balls", "ttl_switches", "ttl_crosses",
        "ttl_pass_offside", "ttl_pass_blocked", "ttl_throw_ins", "ttl_ck",
        "ttl_tkl", "ttl_tkl_won",
        "ttl_tkl_drb", "ttl_tkl_drb_att", "pct_tkl_drb_suc",
        "ttl_blocks", "ttl_sh_blocked", "ttl_int", "ttl_clearances", "ttl_def_error",
        "avg_poss",
        "ttl_touches", "ttl_take_on_att", "ttl_take_on_suc",
        "ttl_carries", "ttl_carries_miscontrolled", "ttl_carries_dispossessed",
        "ttl_pass_rcvd", "ttl_pass_prog_rcvd",
        "ttl_fls_ag", "ttl_fls_for",
        "ttl_offside", "ttl_og", "ttl_ball_recov",
        "ttl_air_dual_won", "ttl_air_dual_lost",
        "players_count"
    ]

    for t in match["teams"]:
        # Crear diccionario base con match_id y season_id
        stats = {
            "match_id": match_id,
            "season_id": season_id,
            "team_name": t.get("team_name"),
            "home_away": t.get("home_away")
        }
        
        # Agregar el resto de los campos desde el equipo
        for field in db_fields[4:]:  # Saltar los primeros 4 ya asignados
            stats[field] = t.get(field)
        
        # Limpiar valores num√©ricos
        for key in stats:
            if key not in ["match_id", "team_name", "home_away", "season_id"]:
                stats[key] = clean_db_value(stats[key])
        
        # Rellenar campos faltantes
        for f in db_fields:
            if f not in stats or stats[f] is None:
                stats[f] = 0 if f not in ["match_id", "season_id", "team_name", "home_away"] else stats.get(f, "")
        
        values = [stats[f] for f in db_fields]
        cursor.execute(f"""
            INSERT INTO team_match_stats ({','.join(db_fields)})
            VALUES ({','.join(['%s'] * len(db_fields))})
            ON CONFLICT (match_id, team_name)
            DO NOTHING;
        """, values)
        inserted_stats += 1

conn.commit()
print(f"Partidos insertados: {inserted_matches}")
print(f"Estad√≠sticas insertadas: {inserted_stats}")

cursor.close()
conn.close()

Partidos insertados: 2565
Estad√≠sticas insertadas: 5130


### ENG Premier League (England First Division)

In [55]:
import glob
import json
import os

# Carpeta principal de La Liga
data_path = "Data_Extracted"
laliga_folder = os.path.join(data_path, "Data_PremierLeague")

# Lista para almacenar todos los partidos
laliga_stats = []

# Contadores para el resumen
total_files = 0
loaded_files = 0
json_errors = []
missing_match_info = []

for season_folder in sorted(os.listdir(laliga_folder)):
    matches_folder = os.path.join(laliga_folder, season_folder, "Matches", "Matches_Stats")
    if os.path.exists(matches_folder):
        # Buscar todos los JSON recursivamente
        files = glob.glob(os.path.join(matches_folder, "**", "*.json"), recursive=True)
        print(f"{season_folder}: {len(files)} archivos encontrados")
        total_files += len(files)

        for file in files:
            try:
                with open(file, "r", encoding="utf-8") as f:
                    match_data = json.load(f)
                
                # Si no existe 'match_info', crear uno m√≠nimo con match_id
                if "match_info" not in match_data:
                    match_data["match_info"] = {
                        "match_id": match_data.get("match_id", None),
                        "season_id": season_folder,
                        "matchday": None,
                        "home_team": None,
                        "home_team_id": None,
                        "home_team_score": 0,
                        "away_team": None,
                        "away_team_id": None,
                        "away_team_score": 0,
                        "date_game": None
                    }
                    missing_match_info.append(file)

                laliga_stats.append(match_data)
                loaded_files += 1
            except Exception as e:
                json_errors.append((file, str(e)))

    else:
        print(f"{season_folder}: carpeta {matches_folder} NO existe")

# Resumen de carga
print("\nResumen de carga:")
print(f"Archivos JSON totales encontrados: {total_files}")
print(f"Archivos cargados correctamente: {loaded_files}")
print(f"Archivos con error de JSON: {len(json_errors)}")
print(f"Archivos sin 'match_info': {len(missing_match_info)}\n")

if json_errors:
    print("Archivos con error JSON:")
    for f, e in json_errors:
        print(f"  {f}: {e}")

if missing_match_info:
    print("Archivos sin 'match_info':")
    for f in missing_match_info:
        print(f"  {f}")

print(f"\nTotal partidos cargados en laliga_stats: {len(laliga_stats)}")


2025-2026: 150 archivos encontrados

Resumen de carga:
Archivos JSON totales encontrados: 150
Archivos cargados correctamente: 150
Archivos con error de JSON: 0
Archivos sin 'match_info': 0


Total partidos cargados en laliga_stats: 150


In [57]:
# Conectar a la base de datos
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

teams_data = []

# Recorremos todos los partidos y sus equipos
for match in laliga_stats:
    for t in match["teams"]:
        team_id = t.get("team_id")
        team_name = t.get("team_name")
        if team_id and team_name:
            teams_data.append((team_id, team_name))

# Eliminar duplicados por team_id
teams_data = list({t[0]: t for t in teams_data}.values())

# Insertar equipos
try:
    cursor = conn.cursor()
    cursor.executemany(INSERT_TEAM_QUERY, teams_data)
    conn.commit()
    print(f"Insertados {len(teams_data)} equipos en team_meta")
except Exception as e:
    print(f"Error insertando equipos: {e}")
    conn.rollback()
finally:
    cursor.close()
    conn.close()



Insertados 40 equipos en team_meta


In [58]:
# Conectar a la base de datos
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

matches_data = []
ignored_count = 0  # Contador de partidos ignorados

for match in laliga_stats:
    info = match.get("match_info", None)

    if not info:
        ignored_count += 1
        continue  # Ignoramos partidos sin match_info

    # Extraemos datos seguros de match_info
    season_id = info.get("season_id", "Unknown")
    match_id = info.get("match_id", "Unknown")
    matchday = info.get("matchday", 0) or 0
    home_team = info.get("home_team", "Unknown")
    home_team_id = info.get("home_team_id")  # No reemplazar por 0, para no violar FK
    home_team_score = info.get("home_team_score", 0) or 0
    away_team = info.get("away_team", "Unknown")
    away_team_id = info.get("away_team_id")  # No reemplazar por 0, para no violar FK
    away_team_score = info.get("away_team_score", 0) or 0
    date_game = info.get("date_game", "1970-01-01")

    # Solo agregar si los campos obligatorios existen para respetar FK y NOT NULL
    if home_team_id is not None and away_team_id is not None:
        matches_data.append((
            season_id, match_id, matchday,
            home_team, home_team_id, home_team_score,
            away_team, away_team_id, away_team_score,
            date_game
        ))
    else:
        ignored_count += 1  # Ignoramos si faltan IDs de equipo

try:
    cursor = conn.cursor()
    cursor.executemany(INSERT_MATCH_QUERY, matches_data)
    conn.commit()
    print(f"Insertados {len(matches_data)} partidos en matches_registered")
    print(f"Ignorados {ignored_count} partidos sin match_info o con datos incompletos")
except Exception as e:
    print(f"Error insertando partidos: {e}")
    conn.rollback()
finally:
    cursor.close()
    conn.close()


Insertados 150 partidos en matches_registered
Ignorados 0 partidos sin match_info o con datos incompletos


In [59]:
# Cargar datos de Premier League a la base de datos (solo team_match_stats)
conn = connect_to_database()
if not conn:
    raise Exception("No se pudo conectar a la base de datos")

cursor = conn.cursor()

def clean_db_value(v):
    if v is None or v == "NA":
        return 0
    try:
        val = float(v)
        return int(val)
    except (TypeError, ValueError):
        return 0

inserted_stats = 0
skipped_stats = 0
error_samples = []

for match in laliga_stats:
    # Insertar estad√≠sticas de equipos por partido
    db_fields = [
        "match_id", "season_id", "team_id", "team_name", "home_away",
        "ttl_gls", "ttl_ast", "ttl_xg", "ttl_xag",
        "ttl_pk_made", "ttl_pk_att",
        "ttl_yellow_cards", "ttl_red_cards",
        "ttl_gls_ag", "ttl_sot_ag", "ttl_saves", "clean_sheets",
        "ttl_pk_att_ag", "ttl_pk_saved",
        "ttl_sh", "ttl_sot", "pct_sot",
        "ttl_avg_sh", "ttl_gls_per_sot", "ttl_gls_xg_diff",
        "ttl_pass_cmp", "ttl_pass_att", "pct_pass_cmp",
        "ttl_pass_prog", "ttl_key_passes",
        "ttl_pass_opp_box", "ttl_cross_opp_box",
        "ttl_pass_live", "ttl_pass_dead", "ttl_pass_fk",
        "ttl_through_balls", "ttl_switches", "ttl_crosses",
        "ttl_pass_offside", "ttl_pass_blocked", "ttl_throw_ins", "ttl_ck",
        "ttl_tkl", "ttl_tkl_won",
        "ttl_tkl_drb", "ttl_tkl_drb_att", "pct_tkl_drb_suc",
        "ttl_blocks", "ttl_sh_blocked", "ttl_int", "ttl_clearances", "ttl_def_error",
        "avg_poss",
        "ttl_touches", "ttl_take_on_att", "ttl_take_on_suc",
        "ttl_carries", "ttl_carries_miscontrolled", "ttl_carries_dispossessed",
        "ttl_pass_rcvd", "ttl_pass_prog_rcvd",
        "ttl_fls_ag", "ttl_fls_for",
        "ttl_offside", "ttl_og", "ttl_ball_recov",
        "ttl_air_dual_won", "ttl_air_dual_lost",
        "players_count"
    ]

    for t in match["teams"]:
        stats = {k: t.get(k) for k in db_fields}
        
        # NORMALIZAR team_name
        if stats.get("team_name"):
            stats["team_name"] = normalize_team_name(stats["team_name"])
        
        # Limpiar valores
        for key in stats:
            if key not in ["match_id", "team_id", "team_name", "home_away", "season_id"]:
                stats[key] = clean_db_value(stats[key])
        # Rellenar campos faltantes
        for f in db_fields:
            if stats[f] is None:
                if f == "team_id":
                    stats[f] = "unknown"  # Valor por defecto para team_id
                else:
                    stats[f] = 0
        values = [stats[f] for f in db_fields]
        
        try:
            cursor.execute(f"""
                INSERT INTO team_match_stats ({','.join(db_fields)})
                VALUES ({','.join(['%s'] * len(db_fields))});
            """, values)
            inserted_stats += 1
            conn.commit()
        except Exception as e:
            # Guardar muestra de errores
            if len(error_samples) < 3:
                error_samples.append(str(e))
            skipped_stats += 1
            conn.rollback()

print(f"Estad√≠sticas insertadas: {inserted_stats}")
print(f"Estad√≠sticas ignoradas (duplicados o errores): {skipped_stats}")
if error_samples:
    print("\nMuestra de errores:")
    for i, err in enumerate(error_samples, 1):
        print(f"{i}. {err}")

cursor.close()
conn.close()


Estad√≠sticas insertadas: 0
Estad√≠sticas ignoradas (duplicados o errores): 300

Muestra de errores:
1. duplicate key value violates unique constraint "team_match_stats_match_id_team_id_key"
DETAIL:  Key (match_id, team_id)=(0050df89, team_80307993) already exists.

2. duplicate key value violates unique constraint "team_match_stats_match_id_team_id_key"
DETAIL:  Key (match_id, team_id)=(0050df89, team_55092476) already exists.

3. duplicate key value violates unique constraint "team_match_stats_match_id_team_id_key"
DETAIL:  Key (match_id, team_id)=(03f6bccd, team_10424652) already exists.

