In [1]:
import os
import pandas as pd
import numpy as np

# ====================== ELO评分算法实现 ======================
def initialize_elo_scores(team_positions_df):
    """初始化球队ELO分数"""
    teams = team_positions_df['team_name'].unique().tolist()
    team_elo = {team: 1500 for team in teams}
    for team in teams:
        team_data = team_positions_df[team_positions_df['team_name'] == team]
        rank = team_data['points_per_game'].rank().iloc[0]
        team_elo[team] += (20 * (len(teams) - rank))
    return team_elo

def update_elo_scores(elo_scores, home_team, away_team, home_score, away_score, K=30):
    """更新ELO分数"""
    home_elo, away_elo = elo_scores[home_team], elo_scores[away_team]
    expected_home = 1 / (1 + 10 ** ((away_elo - home_elo) / 400))
    expected_away = 1 / (1 + 10 ** ((home_elo - away_elo) / 400))

    if home_score > away_score:
        elo_scores[home_team] += K * (1 - expected_home)
        elo_scores[away_team] += K * (0 - expected_away)
    elif home_score < away_score:
        elo_scores[home_team] += K * (0 - expected_home)
        elo_scores[away_team] += K * (1 - expected_away)
    else:
        elo_scores[home_team] += K * (0.5 - expected_home)
        elo_scores[away_team] += K * (0.5 - expected_away)
    return elo_scores

# ====================== 数据加载并计算ELO ======================
def load_all_league_data(base_path, leagues, seasons):
    """加载所有联赛数据，计算ELO并输出最高和最低值"""
    for country_name, league_name in leagues:
        for season in seasons:
            print(f"加载数据: {country_name} - {league_name} - {season}")
            team_file = os.path.join(base_path, f"{country_name}-{league_name}-teams-{season}-stats.csv")
            match_file = os.path.join(base_path, f"{country_name}-{league_name}-matches-{season}-stats.csv")
            if not os.path.exists(team_file) or not os.path.exists(match_file):
                print(f"警告: {country_name} - {league_name} - {season} 文件缺失")
                continue

            team_df = pd.read_csv(team_file)
            match_df = pd.read_csv(match_file)

            # 统一球队名称格式
            team_df['team_name'] = team_df.get('common_name', team_df.get('team_name', None)).str.strip().str.lower()
            match_df['home_team_name'] = match_df['home_team_name'].str.strip().str.lower()
            match_df['away_team_name'] = match_df['away_team_name'].str.strip().str.lower()

            team_names = team_df['team_name'].unique()
            original_match_count = len(match_df)
            match_df = match_df[
                match_df['home_team_name'].isin(team_names) & match_df['away_team_name'].isin(team_names)]
            if len(match_df) < original_match_count:
                print(f"警告: 过滤了 {original_match_count - len(match_df)} 场比赛")

            if match_df.empty:
                print("警告: 无有效比赛数据")
                continue

            # 初始化ELO分数
            elo_scores = initialize_elo_scores(team_df)

            # 更新ELO分数
            for _, row in match_df.iterrows():
                home_team = row['home_team_name']
                away_team = row['away_team_name']
                home_score = row['home_team_goal_count']
                away_score = row['away_team_goal_count']
                elo_scores = update_elo_scores(elo_scores, home_team, away_team, home_score, away_score)

            # 找到ELO最高和最低的球队
            max_elo_team = max(elo_scores, key=elo_scores.get)
            min_elo_team = min(elo_scores, key=elo_scores.get)
            print(f"{league_name} - {season}: 最高ELO: {max_elo_team} ({elo_scores[max_elo_team]:.2f}), "
                  f"最低ELO: {min_elo_team} ({elo_scores[min_elo_team]:.2f})")

# ====================== 主函数 ======================
if __name__ == "__main__":
    base_path = '/Users/peixuanma/Downloads/data1'
    leagues = [
        ("england", "premier-league"), ("germany", "bundesliga"), ("spain", "la-liga"),
        ("france", "ligue-1"), ("italy", "serie-a"), ("netherlands", "eredivisie"),
        ("portugal", "ligapro"), ("denmark", "superliga"), ("england", "championship"),
        ("portugal", "liga-nos"), ("italy", "serie-b"), ("germany", "2-bundesliga")
    ]
    seasons = [
        "2013-to-2014", "2014-to-2015", "2015-to-2016", "2016-to-2017", "2017-to-2018",
        "2018-to-2019", "2020-to-2021", "2021-to-2022", "2022-to-2023", "2023-to-2024"
    ]

    # 加载数据并计算ELO
    load_all_league_data(base_path, leagues, seasons)

加载数据: england - premier-league - 2013-to-2014
premier-league - 2013-to-2014: 最高ELO: manchester city (2060.30), 最低ELO: cardiff city (1746.34)
加载数据: england - premier-league - 2014-to-2015
premier-league - 2014-to-2015: 最高ELO: chelsea (2049.61), 最低ELO: queens park rangers (1754.03)
加载数据: england - premier-league - 2015-to-2016
premier-league - 2015-to-2016: 最高ELO: leicester city (2037.13), 最低ELO: aston villa (1686.95)
加载数据: england - premier-league - 2016-to-2017
premier-league - 2016-to-2017: 最高ELO: chelsea (2076.84), 最低ELO: sunderland (1718.84)
加载数据: england - premier-league - 2017-to-2018
premier-league - 2017-to-2018: 最高ELO: manchester city (2103.06), 最低ELO: swansea city (1774.37)
加载数据: england - premier-league - 2018-to-2019
premier-league - 2018-to-2019: 最高ELO: manchester city (2108.67), 最低ELO: huddersfield town (1677.22)
加载数据: england - premier-league - 2020-to-2021
premier-league - 2020-to-2021: 最高ELO: manchester city (2039.39), 最低ELO: sheffield united (1738.13)
加载数据: england - p