<a href="https://colab.research.google.com/github/DanielFerreira3900/PremierLeague/blob/main/webscrappingtabela.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Mandei todo o código do VSCODE pra cá pois eu já tenho uma maior familiaridade com essa ferramenta.


In [None]:
import pandas as pd
import requests
from io import StringIO

def download_season(season_code):
    """
    Baixa os dados da temporada indicada pelo código.
    Exemplo: "0203" para 2002/03.
    """
    url = f"https://www.football-data.co.uk/mmz4281/{season_code}/E0.csv"
    print(f"Baixando dados da temporada {season_code} de {url} ...")
    response = requests.get(url)
    if response.status_code == 200:
        try:
            df = pd.read_csv(StringIO(response.text), on_bad_lines='skip')  # pula linhas corrompidas
            return df
        except Exception as e:
            print(f"Erro ao processar CSV da temporada {season_code}: {e}")
            return None
    else:
        print(f"Erro ao baixar dados para a temporada {season_code} (status {response.status_code})")
        return None

def compute_standings(matches):
    """
    A partir do DataFrame 'matches' (com colunas 'HomeTeam', 'AwayTeam',
    'FTHG' e 'FTAG'), calcula a tabela final com:
      - Team, Played, Won, Drawn, Lost, GF, GA, GD e Points.
    """
    teams = pd.concat([matches['HomeTeam'], matches['AwayTeam']]).unique()
    rows = []  # Acumula os dados de cada time
    for team in teams:
        # Resultados jogados em casa e fora
        home = matches[matches['HomeTeam'] == team]
        away = matches[matches['AwayTeam'] == team]

        played = len(home) + len(away)
        home_wins = sum(home['FTHG'] > home['FTAG'])
        home_draws = sum(home['FTHG'] == home['FTAG'])
        home_losses = sum(home['FTHG'] < home['FTAG'])
        away_wins = sum(away['FTAG'] > away['FTHG'])
        away_draws = sum(away['FTAG'] == away['FTHG'])
        away_losses = sum(away['FTAG'] < away['FTHG'])

        won = home_wins + away_wins
        drawn = home_draws + away_draws
        lost = home_losses + away_losses

        GF = home['FTHG'].sum() + away['FTAG'].sum()
        GA = home['FTAG'].sum() + away['FTHG'].sum()
        GD = GF - GA
        points = won * 3 + drawn

        rows.append({
            'Team': team,
            'Played': played,
            'Won': won,
            'Drawn': drawn,
            'Lost': lost,
            'GF': GF,
            'GA': GA,
            'GD': GD,
            'Points': points
        })

    standings = pd.DataFrame(rows)
    standings.sort_values(by=['Points', 'GD', 'GF'], ascending=False, inplace=True)
    standings['Position'] = range(1, len(standings) + 1)
    standings = standings[['Position', 'Team', 'Played', 'Won', 'Drawn', 'Lost', 'GF', 'GA', 'GD', 'Points']]
    return standings

# Dicionário com os códigos das temporadas
seasons = {
    "0203": "2002/03",
    "0304": "2003/04",
    "0405": "2004/05",
    "0506": "2005/06",
    "0607": "2006/07",
    "0708": "2007/08",
    "0809": "2008/09",
    "0910": "2009/10",
    "1011": "2010/11",
    "1112": "2011/12",
    "1213": "2012/13",
    "1314": "2013/14",
    "1415": "2014/15",
    "1516": "2015/16",
    "1617": "2016/17",
    "1718": "2017/18",
    "1819": "2018/19",
    "1920": "2019/20",
    "2021": "2020/21",
    "2122": "2021/22",
    "2223": "2022/23",
    "2324": "2023/24"
}

# Lista para armazenar as tabelas
all_standings = []

# Loop principal para processar cada temporada
for code, season_name in seasons.items():
    matches = download_season(code)
    if matches is not None and {'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG'}.issubset(matches.columns):
        standings = compute_standings(matches)
        standings['Season'] = season_name
        all_standings.append(standings)
    else:
        print(f"⚠️ Temporada {season_name} pulada por dados incompletos.")

# Salva o CSV final
if all_standings:
    final_table = pd.concat(all_standings, ignore_index=True)
    cols = ['Season'] + [col for col in final_table.columns if col != 'Season']
    final_table = final_table[cols]
    final_table.to_csv("premier_league_standings_2002_2023.csv", index=False)
    print("✅ CSV gerado com sucesso: premier_league_standings_2002_2023.csv")
else:
    print("❌ Nenhum dado foi coletado.")