En este NoteBook se almacenarán los datos de los archivos .csv con los que se han estado trabajando en una base de datos PostgreSQL

In [117]:
import pandas as pd

In [118]:
df_mezcla = pd.read_csv("datos/normalizaciones_posicion/mezcla/mezcla_total.csv")
df_equipos = pd.read_csv("datos/datos_equipos/ligas_europeas16_equipos.csv")
df_total_v4 = pd.read_csv("datos/datos_jugadores_v4/jugadores_total.csv")
df_total_v5 = pd.read_csv("datos/datos_jugadores_v5/jugadores_total.csv")

In [119]:
# 1. Clasificación de columnas
cols_per90 = [col for col in df_total_v4.columns if '_per90' in col.lower()]
cols_profile = [
    'player_id', 'player_name', 'dob', 'age', 'nationality_id', 'nationality_name',
    'preferred_foot', 'height_cm', 'weight_kg', 'main_position', 'positions',
    'club_jersey_number', 'club_loaned_from', 'club_contract_valid_until',
    'value_eur', 'wage_eur', 'release_clause_eur', 'team_id', 'competition', 'rating'
]
cols_stats = [
    "matches_played", "minutes_played", "tackle_success_rate", "interception_success_rate",
    "head_clearance_success_rate", "headed_shot_duel_rate", "pass_completion_rate", "through_balls_completed",
    "crosses_completed", "switches_completed", "cutbacks_completed", "passes_final_third_accuracy",
    "progressive_carries_rate", "passes_to_box_accuracy", "duel_success_rate", "dribble_success_rate",
    "shot_accuracy", "avg_xg", "conversion_rate_inside_box", "penalty_save_percentage"
]

In [120]:
# 2. Separar los DataFrames
df_profile = df_total_v4[cols_profile].copy()
df_stats = df_total_v4[['player_id'] + cols_per90 + cols_stats].copy()

In [121]:
# Lista de tus DataFrames
dataframes = {
    "df_equipos": df_equipos,
    "df_player_stats": df_stats,
    "df_normalized_stats_posito": df_mezcla,
    "df_player_profile": df_profile
}

# Revisar nulos por DataFrame
for name, df in dataframes.items():
    nulls = df.isnull().sum()
    total_nulls = nulls.sum()
    print(f"➡️ {name}: {total_nulls} valores nulos")
    if total_nulls > 0:
        print(nulls[nulls > 0])
    print("-" * 50)

➡️ df_equipos: 0 valores nulos
--------------------------------------------------
➡️ df_player_stats: 0 valores nulos
--------------------------------------------------
➡️ df_normalized_stats_posito: 29544 valores nulos
saves_per90                              1184
save_percentage                          1184
xg_against_minus_goals_conceded_per90    1184
keeper_sweeper_per90                     1184
penalty_save_percentage                  1184
aerial_dominance_index_per90             1184
pass_completion_rate                      441
tackle_success_rate                       777
tackles_successful_per90                  777
interceptions_per90                       599
interception_success_rate                 777
pressures_per90                           396
chances_created_per90                      92
progressive_carries_per90                 366
crosses_completed_per90                   820
duels_won_per90                           441
fouls_committed_per90                     84

In [122]:
df_equipos.rename(columns={"team_name": "team"}, inplace=True)

In [123]:
# Eliminar la columna 'Unnamed: 0' si existe
if "Unnamed: 0" in df_mezcla.columns:
    df_mezcla = df_mezcla.drop(columns=["Unnamed: 0"])


In [124]:
# Diccionario de tus DataFrames
dataframes = {
    "df_equipos": df_equipos,
    "df_player_stats": df_stats,
    "df_normalized_stats_positon": df_mezcla,
    "df_player_profile": df_profile
}

# Recorrer e imprimir columnas
for nombre, df in dataframes.items():
    print(f"\n--- Columnas en {nombre} ---")
    for col in df.columns:
        print(f"- {col}")



--- Columnas en df_equipos ---
- team_id
- team
- transfer_budget
- wage_budget
- competition

--- Columnas en df_player_stats ---
- player_id
- tackles_successful_per90
- interceptions_per90
- clearances_per90
- blocks_per90
- head_clearances_per90
- head_clearances_won_per90
- times_dribbled_past_per90
- ball_recoveries_per90
- offensive_recoveries_per90
- pressures_per90
- counterpress_per90
- total_passes_per90
- completed_passes_per90
- incomplete_passes_per90
- passes_out_per90
- offside_passes_per90
- failed_passes_per90
- ground_passes_per90
- low_passes_per90
- high_passes_per90
- deflected_passes_per90
- crosses_total_per90
- crosses_completed_per90
- cutbacks_total_per90
- cutbacks_completed_per90
- switches_total_per90
- switches_completed_per90
- through_balls_total_per90
- through_balls_completed_per90
- passes_own_half_per90
- passes_opposition_half_per90
- progressive_passes_per90
- progressive_passes_completed_per90
- passes_final_third_per90
- passes_to_box_per90
- c

Eliminar columnas que se han decidido omitir

In [125]:
cols_eliminar = [
    'completed_passes_per90',
    'incomplete_passes_per90',
    'passes_out_per90',
    'offside_passes_per90',
    'failed_passes_per90',
    'ground_passes_per90',
    'low_passes_per90',
    'high_passes_per90',
    'deflected_passes_per90',
    'shots_on_target_per90',
    'shots_off_target_per90',
    'blocked_shots_per90'
]

df_player_stats = df_stats.drop(columns=cols_eliminar, errors='ignore')

In [126]:
server = '127.0.0.1'
db = 'scouting'
usr = 'postgres'
passw = '1234'
port = '5432'

In [127]:
from sqlalchemy import create_engine, text

# Conexión al servidor sin especificar una base de datos (usa la base por defecto: postgres)
admin_engine = create_engine("postgresql+psycopg2://postgres:1234@127.0.0.1:5432/postgres")

# Crear la base de datos desde Python
with admin_engine.connect() as conn:
    conn.execute(text("COMMIT"))  # Necesario para CREATE DATABASE
    conn.execute(text("CREATE DATABASE scouting"))

In [128]:
import pandas as pd
from sqlalchemy import create_engine

# 1. Conexión a PostgreSQL
pg_engine = create_engine("postgresql+psycopg2://postgres@localhost:5432/scouting")

# 2. Subida directa de los DataFrames
df_equipos.to_sql('teams', con=pg_engine, if_exists='replace', index=False)
df_profile.to_sql('player_profile', con=pg_engine, if_exists='replace', index=False)
df_stats.to_sql('player_stats', con=pg_engine, if_exists='replace', index=False)
df_mezcla.to_sql('normalized_stats_position', con=pg_engine, if_exists='replace', index=False)

print("✅ Todos los DataFrames fueron subidos a PostgreSQL con éxito.")


✅ Todos los DataFrames fueron subidos a PostgreSQL con éxito.


In [129]:
'''
from sqlalchemy import create_engine

# Crear motor para base de datos SQL
engine = create_engine('sqlite:///scouting.db')

# Subir DataFrames
df_equipos.to_sql('teams', con=engine, if_exists='replace', index=False)

# 5. Subida a la base de datos
df_profile.to_sql('player_profile', engine, if_exists='replace', index=False)
df_stats.to_sql('player_stats', engine, if_exists='replace', index=False)
df_per90.to_sql('player_stats_per90', engine, if_exists='replace', index=False)

df_mezcla.to_sql("normalized_stats_position", engine, if_exists='replace', index=False)
'''


'\nfrom sqlalchemy import create_engine\n\n# Crear motor para base de datos SQL\nengine = create_engine(\'sqlite:///scouting.db\')\n\n# Subir DataFrames\ndf_equipos.to_sql(\'teams\', con=engine, if_exists=\'replace\', index=False)\n\n# 5. Subida a la base de datos\ndf_profile.to_sql(\'player_profile\', engine, if_exists=\'replace\', index=False)\ndf_stats.to_sql(\'player_stats\', engine, if_exists=\'replace\', index=False)\ndf_per90.to_sql(\'player_stats_per90\', engine, if_exists=\'replace\', index=False)\n\ndf_mezcla.to_sql("normalized_stats_position", engine, if_exists=\'replace\', index=False)\n'

Almacenar en PostgreSQL

In [130]:
'''
import sqlite3

# 1. Conectar a SQLite y leer las tablas
sqlite_conn = sqlite3.connect("scouting.db")  # Asegúrate de que esté en el mismo directorio
tables = ["teams", "player_profile", "player_stats", "player_stats_per90", "normalized_stats_position"]

dfs = {table: pd.read_sql_query(f"SELECT * FROM {table}", sqlite_conn) for table in tables}

# 2. Conectar a PostgreSQL
pg_engine = create_engine("postgresql+psycopg2://postgres@localhost:5432/scouting")

# 3. Subir DataFrames a PostgreSQL
for name, df in dfs.items():
    df.to_sql(name, con=pg_engine, if_exists="replace", index=False)
    print(f"✅ Subida tabla: {name}")
    '''

'\nimport sqlite3\n\n# 1. Conectar a SQLite y leer las tablas\nsqlite_conn = sqlite3.connect("scouting.db")  # Asegúrate de que esté en el mismo directorio\ntables = ["teams", "player_profile", "player_stats", "player_stats_per90", "normalized_stats_position"]\n\ndfs = {table: pd.read_sql_query(f"SELECT * FROM {table}", sqlite_conn) for table in tables}\n\n# 2. Conectar a PostgreSQL\npg_engine = create_engine("postgresql+psycopg2://postgres@localhost:5432/scouting")\n\n# 3. Subir DataFrames a PostgreSQL\nfor name, df in dfs.items():\n    df.to_sql(name, con=pg_engine, if_exists="replace", index=False)\n    print(f"✅ Subida tabla: {name}")\n    '