In [4]:
import soccerdata as sd
import pandas as pd
import numpy as np
from datetime import datetime
import os
from pathlib import Path

# --- CONFIGURA√á√ïES ---
SEASON = "2526"
LEAGUE = "ENG-Premier League"
CURRENT_DATE = datetime.now()

# Mostra todas as colunas no terminal
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

def extract_data():
    print(f"üîÑ [1/3] Conectando ao FBref ({SEASON})...")
    cache_path = Path("./data/cache")
    fbref = sd.FBref(leagues=LEAGUE, seasons=SEASON, data_dir=cache_path)
    
    print("‚¨áÔ∏è  [2/3] Baixando estat√≠sticas de Goleiros (Match Logs)...")
    # A tabela 'keepers' nos match logs j√° cont√©m PSxG, Crosses e Sweeper actions
    keepers = fbref.read_player_match_stats(stat_type="keepers")
    
    return keepers

def clean_and_process(df):
    print("‚öôÔ∏è  [3/3] Limpando e organizando...")
    df = df.reset_index()
    
    # PADRONIZA√á√ÉO DE COLUNAS (O Pulo do Gato)
    # O soccerdata devolve colunas MultiIndex (ex: ('Shot Stopping', 'PSxG')).
    # Vamos achatar para nomes simples como 'Shot_Stopping_PSxG'.
    new_cols = []
    for col in df.columns:
        if isinstance(col, tuple):
            # Junta os n√≠veis com underscore e remove espa√ßos
            new_name = f"{col[0]}_{col[1]}".strip().replace(" ", "_")
            # Se o primeiro n√≠vel for vazio ou in√∫til, pega s√≥ o segundo
            if col[0] in ['', 'Unnamed: 0_level_0']: 
                new_name = col[1]
            new_cols.append(new_name)
        else:
            new_cols.append(col)
    
    df.columns = new_cols

    # Filtro de data (apenas jogos passados)
    if 'date' in df.columns:
        df['date'] = pd.to_datetime(df['date'])
        df = df[(df['date'] <= CURRENT_DATE) & (df['minutes'] > 0)].copy()
    
    # Renomear para nomes amig√°veis para nosso projeto
    # Ajuste aqui baseado nos nomes que o 'achatar' gerou
    cols_map = {
        'date': 'Date',
        'player': 'Player',
        'team': 'Team',
        'opponent': 'Opponent',
        'Shot_Stopping_GA': 'GA',
        'Shot_Stopping_Saves': 'Saves',
        'Shot_Stopping_PSxG': 'PSxG',
        'Crosses_Opp': 'Crosses_Faced',
        'Crosses_Stp': 'Crosses_Stopped',
        'Sweeper_#OPA': 'OPA',
        'Sweeper_AvgDist': 'Sweeper_Dist'
    }
    
    # Renomeia apenas o que encontrar
    found_cols = {k: v for k, v in cols_map.items() if k in df.columns}
    df = df.rename(columns=found_cols)
    
    # Tratamento de Nulos
    cols_to_fill = ['PSxG', 'GA', 'Crosses_Faced', 'Crosses_Stopped', 'OPA']
    for c in cols_to_fill:
        if c in df.columns:
            df[c] = df[c].fillna(0)
            
    # Cria√ß√£o da M√©trica Principal (PSxG +/-)
    if 'PSxG' in df.columns and 'GA' in df.columns:
        df['PSxG_Net'] = df['PSxG'] - df['GA']
        
    return df

if __name__ == "__main__":
    os.makedirs("data/processed", exist_ok=True)
    
    try:
        raw_data = extract_data()
        df_final = clean_and_process(raw_data)
        
        output_path = "data/processed/pl_goalkeepers_2526_matches.csv"
        df_final.to_csv(output_path, index=False)
        
        print(f"\n‚úÖ Sucesso! Arquivo salvo: {output_path}")
        print(f"üìä Registros: {len(df_final)}")
        print("Colunas dispon√≠veis:", df_final.columns.tolist())
        
    except Exception as e:
        print(f"\n‚ùå Erro: {e}")

üîÑ [1/3] Conectando ao FBref (2526)...


‚¨áÔ∏è  [2/3] Baixando estat√≠sticas de Goleiros (Match Logs)...


‚öôÔ∏è  [3/3] Limpando e organizando...

‚úÖ Sucesso! Arquivo salvo: data/processed/pl_goalkeepers_2526_matches.csv
üìä Registros: 424
Colunas dispon√≠veis: ['league_', 'season_', 'game_', 'team_', 'player_', 'nation_', 'age_', 'min_', 'Shot_Stopping_SoTA', 'GA', 'Saves', 'Shot_Stopping_Save%', 'PSxG', 'Launched_Cmp', 'Launched_Att', 'Launched_Cmp%', 'Passes_Att_(GK)', 'Passes_Thr', 'Passes_Launch%', 'Passes_AvgLen', 'Goal_Kicks_Att', 'Goal_Kicks_Launch%', 'Goal_Kicks_AvgLen', 'Crosses_Faced', 'Crosses_Stopped', 'Crosses_Stp%', 'OPA', 'Sweeper_Dist', 'game_id_', 'PSxG_Net']
