En este NoteBook se almacenarán los datos de los archivos .csv con los que se han estado trabajando en una base de datos PostgreSQL

In [74]:
import pandas as pd

In [75]:
df_mezcla = pd.read_csv("datos/normalizaciones_posicion/mezcla/mezcla_total.csv")
df_equipos = pd.read_csv("datos/datos_equipos/ligas_europeas16_equipos.csv")
df_total_v4 = pd.read_csv("datos/datos_jugadores_v4/jugadores_total.csv")
df_total_v5 = pd.read_csv("datos/datos_jugadores_v5/jugadores_total.csv")

In [76]:
# 1. Clasificación de columnas
cols_per90 = [col for col in df_total_v4.columns if '_per90' in col.lower()]
cols_profile = [
    'player_id', 'player_name', 'dob', 'age', 'nationality_id', 'nationality_name',
    'preferred_foot', 'height_cm', 'weight_kg', 'main_position', 'positions',
    'club_jersey_number', 'club_loaned_from', 'club_contract_valid_until',
    'value_eur', 'wage_eur', 'release_clause_eur', 'team_id', 'team', 'rating'
]
cols_stats = [col for col in df_total_v4.columns if col not in cols_per90 and col not in cols_profile]

In [77]:
# 2. Separar los DataFrames
df_profile = df_total_v4[cols_profile].copy()
df_stats = df_total_v4[['player_id'] + cols_stats].copy()
df_per90 = df_total_v4[['player_id'] + cols_per90].copy()

In [78]:
# Lista de tus DataFrames
dataframes = {
    "df_equipos": df_equipos,
    "df_player_stats": df_stats,
    "df_player_stats_per90": df_per90,
    "df_normalized_stats_posito": df_mezcla,
    "df_player_profile": df_profile
}

# Revisar nulos por DataFrame
for name, df in dataframes.items():
    nulls = df.isnull().sum()
    total_nulls = nulls.sum()
    print(f"➡️ {name}: {total_nulls} valores nulos")
    if total_nulls > 0:
        print(nulls[nulls > 0])
    print("-" * 50)

➡️ df_equipos: 0 valores nulos
--------------------------------------------------
➡️ df_player_stats: 0 valores nulos
--------------------------------------------------
➡️ df_player_stats_per90: 0 valores nulos
--------------------------------------------------
➡️ df_normalized_stats_posito: 29544 valores nulos
saves_per90                              1184
save_percentage                          1184
xg_against_minus_goals_conceded_per90    1184
keeper_sweeper_per90                     1184
penalty_save_percentage                  1184
aerial_dominance_index_per90             1184
pass_completion_rate                      441
tackle_success_rate                       777
tackles_successful_per90                  777
interceptions_per90                       599
interception_success_rate                 777
pressures_per90                           396
chances_created_per90                      92
progressive_carries_per90                 366
crosses_completed_per90                   8

In [79]:
df_mezcla.columns

Index(['player_id', 'saves_per90', 'save_percentage',
       'xg_against_minus_goals_conceded_per90', 'keeper_sweeper_per90',
       'penalty_save_percentage', 'aerial_dominance_index_per90',
       'pass_completion_rate', 'tackle_success_rate',
       'tackles_successful_per90', 'interceptions_per90',
       'interception_success_rate', 'pressures_per90', 'chances_created_per90',
       'progressive_carries_per90', 'crosses_completed_per90',
       'duels_won_per90', 'fouls_committed_per90', 'goals_scored_per90',
       'goal_assists_per90', 'clearances_per90', 'blocks_per90',
       'times_dribbled_past_per90', 'progressive_passes_per90',
       'duel_success_rate', 'ball_recoveries_per90',
       'dribbles_completed_per90', 'dribble_success_rate', 'key_passes_per90',
       'shots_total_per90', 'xg_total_per90', 'fouls_won_per90',
       'goals_minus_xg_per90', 'progressive_carries_rate', 'shot_accuracy',
       'penalties_won_per90', 'headed_shot_duel_rate'],
      dtype='object')

In [80]:
df_profile.columns

Index(['player_id', 'player_name', 'dob', 'age', 'nationality_id',
       'nationality_name', 'preferred_foot', 'height_cm', 'weight_kg',
       'main_position', 'positions', 'club_jersey_number', 'club_loaned_from',
       'club_contract_valid_until', 'value_eur', 'wage_eur',
       'release_clause_eur', 'team_id', 'team', 'rating'],
      dtype='object')

In [81]:
for column in df_stats.columns:
    print(column)

player_id
matches_played
competition
tackle_success_rate
tackles_successful
interception_success_rate
interceptions
clearances
blocks
head_clearances
head_clearances_won
head_clearance_success_rate
headed_shots_total
headed_shots_after_duel
headed_shot_duel_rate
total_passes
completed_passes
incomplete_passes
passes_out
offside_passes
failed_passes
pass_completion_rate
avg_pass_length
ground_passes
low_passes
high_passes
ground_pass_percentage
low_pass_percentage
high_pass_percentage
crosses_total
crosses_completed
cutbacks_total
cutbacks_completed
switches_total
switches_completed
deflected_passes
goal_assists
key_passes
chances_created
through_balls_total
through_balls_completed
head_pass_percentage
right_foot_pass_percentage
left_foot_pass_percentage
right_foot_pass_accuracy
left_foot_pass_accuracy
passes_own_half
passes_opposition_half
passes_from_opposition_half_percentage
progressive_passes
progressive_passes_completed
progressive_passes_accuracy
passes_final_third
passes_final_t

In [82]:
df_per90.columns

Index(['player_id', 'tackles_successful_per90', 'interceptions_per90',
       'clearances_per90', 'blocks_per90', 'head_clearances_per90',
       'head_clearances_won_per90', 'times_dribbled_past_per90',
       'ball_recoveries_per90', 'offensive_recoveries_per90',
       'pressures_per90', 'counterpress_per90', 'total_passes_per90',
       'completed_passes_per90', 'incomplete_passes_per90', 'passes_out_per90',
       'offside_passes_per90', 'failed_passes_per90', 'ground_passes_per90',
       'low_passes_per90', 'high_passes_per90', 'deflected_passes_per90',
       'crosses_total_per90', 'crosses_completed_per90',
       'cutbacks_total_per90', 'cutbacks_completed_per90',
       'switches_total_per90', 'switches_completed_per90',
       'through_balls_total_per90', 'through_balls_completed_per90',
       'passes_own_half_per90', 'passes_opposition_half_per90',
       'progressive_passes_per90', 'progressive_passes_completed_per90',
       'passes_final_third_per90', 'passes_to_box_pe

In [83]:
server = '127.0.0.1'
db = 'scouting'
usr = 'postgres'
passw = '1234'
port = '5432'

In [84]:
from sqlalchemy import create_engine, text

# Conexión al servidor sin especificar una base de datos (usa la base por defecto: postgres)
admin_engine = create_engine("postgresql+psycopg2://postgres:1234@127.0.0.1:5432/postgres")

# Crear la base de datos desde Python
with admin_engine.connect() as conn:
    conn.execute(text("COMMIT"))  # Necesario para CREATE DATABASE
    conn.execute(text("CREATE DATABASE scoutingdb"))

In [85]:
import pandas as pd
from sqlalchemy import create_engine

# 1. Conexión a PostgreSQL
pg_engine = create_engine("postgresql+psycopg2://postgres@localhost:5432/scoutingdb")

# 2. Subida directa de los DataFrames
df_equipos.to_sql('teams', con=pg_engine, if_exists='replace', index=False)
df_profile.to_sql('player_profile', con=pg_engine, if_exists='replace', index=False)
df_stats.to_sql('player_stats', con=pg_engine, if_exists='replace', index=False)
df_per90.to_sql('player_stats_per90', con=pg_engine, if_exists='replace', index=False)
df_mezcla.to_sql('normalized_stats_position', con=pg_engine, if_exists='replace', index=False)

print("✅ Todos los DataFrames fueron subidos a PostgreSQL con éxito.")


✅ Todos los DataFrames fueron subidos a PostgreSQL con éxito.


In [86]:
'''
from sqlalchemy import create_engine

# Crear motor para base de datos SQL
engine = create_engine('sqlite:///scouting.db')

# Subir DataFrames
df_equipos.to_sql('teams', con=engine, if_exists='replace', index=False)

# 5. Subida a la base de datos
df_profile.to_sql('player_profile', engine, if_exists='replace', index=False)
df_stats.to_sql('player_stats', engine, if_exists='replace', index=False)
df_per90.to_sql('player_stats_per90', engine, if_exists='replace', index=False)

df_mezcla.to_sql("normalized_stats_position", engine, if_exists='replace', index=False)
'''


'\nfrom sqlalchemy import create_engine\n\n# Crear motor para base de datos SQL\nengine = create_engine(\'sqlite:///scouting.db\')\n\n# Subir DataFrames\ndf_equipos.to_sql(\'teams\', con=engine, if_exists=\'replace\', index=False)\n\n# 5. Subida a la base de datos\ndf_profile.to_sql(\'player_profile\', engine, if_exists=\'replace\', index=False)\ndf_stats.to_sql(\'player_stats\', engine, if_exists=\'replace\', index=False)\ndf_per90.to_sql(\'player_stats_per90\', engine, if_exists=\'replace\', index=False)\n\ndf_mezcla.to_sql("normalized_stats_position", engine, if_exists=\'replace\', index=False)\n'

Almacenar en PostgreSQL

In [87]:
'''
import sqlite3

# 1. Conectar a SQLite y leer las tablas
sqlite_conn = sqlite3.connect("scouting.db")  # Asegúrate de que esté en el mismo directorio
tables = ["teams", "player_profile", "player_stats", "player_stats_per90", "normalized_stats_position"]

dfs = {table: pd.read_sql_query(f"SELECT * FROM {table}", sqlite_conn) for table in tables}

# 2. Conectar a PostgreSQL
pg_engine = create_engine("postgresql+psycopg2://postgres@localhost:5432/scouting")

# 3. Subir DataFrames a PostgreSQL
for name, df in dfs.items():
    df.to_sql(name, con=pg_engine, if_exists="replace", index=False)
    print(f"✅ Subida tabla: {name}")
    '''

'\nimport sqlite3\n\n# 1. Conectar a SQLite y leer las tablas\nsqlite_conn = sqlite3.connect("scouting.db")  # Asegúrate de que esté en el mismo directorio\ntables = ["teams", "player_profile", "player_stats", "player_stats_per90", "normalized_stats_position"]\n\ndfs = {table: pd.read_sql_query(f"SELECT * FROM {table}", sqlite_conn) for table in tables}\n\n# 2. Conectar a PostgreSQL\npg_engine = create_engine("postgresql+psycopg2://postgres@localhost:5432/scouting")\n\n# 3. Subir DataFrames a PostgreSQL\nfor name, df in dfs.items():\n    df.to_sql(name, con=pg_engine, if_exists="replace", index=False)\n    print(f"✅ Subida tabla: {name}")\n    '