# This is a sample Jupyter Notebook

Below is an example of a code cell. 
Put your cursor into the cell and press Shift+Enter to execute it and select the next one, or click 'Run Cell' button.

Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

To learn more about Jupyter Notebooks in PyCharm, see [help](https://www.jetbrains.com/help/pycharm/ipython-notebook-support.html).
For an overview of PyCharm, go to Help -> Learn IDE features or refer to [our documentation](https://www.jetbrains.com/help/pycharm/getting-started.html).

In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split



#ELO评分算法实现

def initialize_elo_scores(team_positions_df):
    teams = team_positions_df['team_name'].unique().tolist()
    team_elo = {team: 1500 for team in teams}
    for team in teams:
        # 赛季中的排名越高，ELO分数越高
        team_data = team_positions_df[team_positions_df['team_name'] == team]
        rank = team_data['points_per_game'].rank().iloc[0]
        team_elo[team] += (20 * (len(teams) - rank))  # 给高排名的球队加分
    return team_elo


def update_elo_scores(elo_scores, home_team, away_team, home_score, away_score, K=30):
    home_elo = elo_scores[home_team]
    away_elo = elo_scores[away_team]

    # 计算预期得分
    expected_home = 1 / (1 + 10 ** ((away_elo - home_elo) / 400))
    expected_away = 1 / (1 + 10 ** ((home_elo - away_elo) / 400))

    # 根据比赛结果调整ELO分数
    if home_score > away_score:  # 主队胜
        elo_scores[home_team] += K * (1 - expected_home)
        elo_scores[away_team] += K * (0 - expected_away)
    elif home_score < away_score:  # 客队胜
        elo_scores[home_team] += K * (0 - expected_home)
        elo_scores[away_team] += K * (1 - expected_away)
    else:  # 平局
        elo_scores[home_team] += K * (0.5 - expected_home)
        elo_scores[away_team] += K * (0.5 - expected_away)

    return elo_scores



# 数据加载与预处理
def load_all_league_data(base_path, leagues, seasons):
    """
    读取并合并所有联赛、所有赛季的球队数据和比赛数据，
    并使用 PCA 将球队特征降维到二维。
    返回以下数据结构
      - combined_team_positions: DataFrame，每行代表某球队在某赛季的降维结果
      - combined_match_positions: DataFrame，每行代表一场比赛的降维结果
    """
    all_team_positions = []
    all_match_positions = []
    for country_name, league_name in leagues:
        for season in seasons:
            print(f"Loading data for: {country_name} - {league_name} - {season}")
            team_file = os.path.join(base_path, f"{country_name}-{league_name}-teams-{season}-stats.csv")
            match_file = os.path.join(base_path, f"{country_name}-{league_name}-matches-{season}-stats.csv")
            if not os.path.exists(team_file) or not os.path.exists(match_file):
                print(f"Warning: Missing files for {country_name} - {league_name} - {season}")
                continue

            team_df = pd.read_csv(team_file)
            match_df = pd.read_csv(match_file)

            # 如果 "common_name" 列不存在，尝试使用其他列名
            team_names = team_df.get('common_name', team_df.get('team_name', None))
            if team_names is None:
                print(f"Error: Neither 'common_name' nor 'team_name' columns found in {team_file}")
                continue

            points_per_game = team_df['points_per_game']

            # 提取防守数据
            defensive_columns = [
                'goals_conceded', 'goals_conceded_home', 'goals_conceded_away',
                'goals_conceded_per_match', 'goals_conceded_per_match_home', 'goals_conceded_per_match_away',
                'clean_sheets', 'clean_sheets_home', 'clean_sheets_away',
                'clean_sheet_percentage', 'clean_sheet_percentage_home', 'clean_sheet_percentage_away',
                'minutes_per_goal_conceded', 'minutes_per_goal_conceded_home', 'minutes_per_goal_conceded_away',
                'goals_conceded_half_time', 'goals_conceded_half_time_home', 'goals_conceded_half_time_away',
                'clean_sheet_half_time', 'clean_sheet_half_time_percentage',
                'fouls', 'fouls_home', 'fouls_away', 'cards_total', 'cards_total_home', 'cards_total_away',
                'xg_against_avg_overall', 'xg_against_avg_home', 'xg_against_avg_away'
            ]
            team_df = team_df[defensive_columns].fillna(0)

            # 标准化防守数据
            scaler_defense = StandardScaler()
            X_scaled_defense = scaler_defense.fit_transform(team_df)

            # 使用PCA进行降维，提取防守的主成分
            pca_defense = PCA(n_components=2)  # Ensure we extract both PC1 and PC2
            X_pca_defense = pca_defense.fit_transform(X_scaled_defense)

            # Check if PCA generated two components
            if X_pca_defense.shape[1] < 2:
                print(f"Warning: PCA did not produce two components for {team_file}")
                continue

            team_positions = pd.DataFrame({
                'team_name': team_names,
                'PC1': X_pca_defense[:, 0],  # PC1 is the first principal component
                'PC2': X_pca_defense[:, 1],  # PC2 is the second principal component
                'points_per_game': points_per_game,
                'league': league_name,
                'season': season
            })
            all_team_positions.append(team_positions)

            # 处理比赛数据（保持原有处理方法）
            irrelevant_cols = ['timestamp', 'date_GMT', 'status', 'attendance', 'referee', 'stadium_name', 'Game Week']
            match_df = match_df.drop(columns=irrelevant_cols, errors='ignore')
            for col in match_df.columns:
                match_df[col] = match_df[col].astype(str).str.replace(',', '.', regex=True)
                try:
                    match_df[col] = pd.to_numeric(match_df[col])
                except ValueError:
                    pass
            numeric_cols = match_df.select_dtypes(include=[np.number]).columns
            match_df[numeric_cols] = match_df[numeric_cols].fillna(match_df[numeric_cols].mean())
            scaler_match = StandardScaler()
            X_scaled_match = scaler_match.fit_transform(match_df[numeric_cols])
            pca_match = PCA(n_components=2)
            X_pca_match = pca_match.fit_transform(X_scaled_match)
            match_positions = pd.DataFrame({
                'home_team_name': match_df['home_team_name'],
                'away_team_name': match_df['away_team_name'],
                'PC1': X_pca_match[:, 0],
                'PC2': X_pca_match[:, 1],
                'league': league_name,
                'season': season
            })
            all_match_positions.append(match_positions)
    combined_team_positions = pd.concat(all_team_positions, ignore_index=True)
    combined_match_positions = pd.concat(all_match_positions, ignore_index=True)
    return combined_team_positions, combined_match_positions


def compute_total_loss(positions, match_home_idx, match_away_idx,
                       match_PC1, match_PC2, points_per_game, rank_scale,
                       avg_conceded, def_shots, lambda_reg, elo_scores, alpha=2.0):
    epsilon = 1e-12
    num_matches = tf.shape(match_home_idx)[0]
    if num_matches == 0:
        return tf.constant(0.0, dtype=tf.float32)

    # 确保 elo_scores 是一维张量
    elo_scores = tf.convert_to_tensor(elo_scores, dtype=tf.float32)

    # 计算比赛损失部分
    home_pos = tf.gather(positions, match_home_idx)
    away_pos = tf.gather(positions, match_away_idx)
    match_points = tf.stack([match_PC1, match_PC2], axis=1)
    dist_home = tf.norm(home_pos - match_points, axis=1)
    dist_away = tf.norm(away_pos - match_points, axis=1)
    all_distances = tf.concat([dist_home, dist_away], axis=0)
    min_dist = tf.reduce_min(all_distances)
    max_dist = tf.reduce_max(all_distances)
    denom = (max_dist - min_dist) + epsilon
    dist_home_norm = (dist_home - min_dist) / denom
    dist_away_norm = (dist_away - min_dist) / denom

    # 更新距离占比：根据ELO差异调整
    home_elo = tf.gather(elo_scores, match_home_idx)
    away_elo = tf.gather(elo_scores, match_away_idx)
    elo_diff = tf.abs(home_elo - away_elo)

    # 计算比赛损失并调整重要性
    weight = 1.0 / (1.0 + elo_diff * rank_scale)
    match_loss = weight * (dist_home_norm + dist_away_norm)
    match_loss_mean = tf.reduce_mean(match_loss)

    # 防守正则化部分：计算防守主成分与实际防守的差异
    defense_target = 1.0 - (avg_conceded / (def_shots + epsilon))
    defense_error = tf.square(positions[:, 1] - defense_target)
    defense_loss = lambda_reg * tf.reduce_mean(defense_error)

    total_loss = match_loss_mean + defense_loss
    return total_loss




# Adam优化器训练 
def adam_optimize_positions(team_positions_df, match_positions_df, initial_lr=0.007,
                            decay_steps=50000, decay_rate=0.80, clipnorm=1.0,
                            iterations=50000, verbose_interval=500, random_seed=42,
                            lambda_reg=1.0, alpha=2.0):
    """
    在给定的训练比赛数据上，用 Adam 优化器同时更新球队坐标 (positions) 和可学习参数 rank_scale，
    使用ELO评分来调整每场比赛的权重。
    """
    # 初始化ELO分数字典
    elo_scores_dict = initialize_elo_scores(team_positions_df)
    teams = team_positions_df['team_name'].unique().tolist()
    # 根据teams列表的顺序生成ELO分数张量
    elo_scores = tf.convert_to_tensor([elo_scores_dict[team] for team in teams], dtype=tf.float32)

    team_name_to_idx = {t: i for i, t in enumerate(teams)}
    num_teams = len(teams)
    init_positions = np.zeros((num_teams, 2), dtype=np.float32)
    init_points_pg = np.zeros((num_teams,), dtype=np.float32)
    init_avg_conceded = np.zeros((num_teams,), dtype=np.float32)
    init_def_shots = np.zeros((num_teams,), dtype=np.float32)

    for i, tname in enumerate(teams):
        row = team_positions_df.loc[team_positions_df['team_name'] == tname].iloc[0]
        init_positions[i, 0] = row['PC1']
        init_positions[i, 1] = row['PC2']
        init_points_pg[i] = row['points_per_game']
        init_avg_conceded[i] = row.get("avg_goals_conceded", 0.0)
        init_def_shots[i] = row.get("avg_shots_on_target_conceded", 0.0)

    match_array = []
    for idx, row in match_positions_df.iterrows():
        hname = row['home_team_name']
        aname = row['away_team_name']
        if hname in team_name_to_idx and aname in team_name_to_idx:
            match_array.append([team_name_to_idx[hname],
                                team_name_to_idx[aname],
                                row['PC1'], row['PC2']])
    match_array = np.array(match_array, dtype=np.float32)
    if len(match_array) == 0:
        print("Warning: No valid matches found for training!")
        return [], team_positions_df, None
    match_home_idx = tf.constant(match_array[:, 0], dtype=tf.int32)
    match_away_idx = tf.constant(match_array[:, 1], dtype=tf.int32)
    match_PC1 = tf.constant(match_array[:, 2], dtype=tf.float32)
    match_PC2 = tf.constant(match_array[:, 3], dtype=tf.float32)
    points_per_game = tf.constant(init_points_pg, dtype=tf.float32)
    avg_conceded_tf = tf.constant(init_avg_conceded, dtype=tf.float32)
    def_shots_tf = tf.constant(init_def_shots, dtype=tf.float32)
    tf.random.set_seed(random_seed)
    positions = tf.Variable(init_positions, name="positions", dtype=tf.float32)
    rank_scale = tf.Variable(8.0, name="rank_scale", dtype=tf.float32)
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=initial_lr,
        decay_steps=decay_steps,
        decay_rate=decay_rate,
        staircase=False
    )
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=clipnorm)
    losses = []
    best_loss = np.inf
    best_positions = None
    best_rank_scale = None
    for i in range(iterations):
        with tf.GradientTape() as tape:
            loss = compute_total_loss(positions, match_home_idx, match_away_idx,
                                      match_PC1, match_PC2, points_per_game, rank_scale,
                                      avg_conceded_tf, def_shots_tf, lambda_reg, elo_scores, alpha)
        grads = tape.gradient(loss, [positions, rank_scale])
        optimizer.apply_gradients(zip(grads, [positions, rank_scale]))
        loss_val = float(loss.numpy())
        losses.append(loss_val)
        if loss_val < best_loss:
            best_loss = loss_val
            best_positions = positions.numpy().copy()
            best_rank_scale = float(rank_scale.numpy())
        if (i + 1) % verbose_interval == 0:
            step = optimizer.iterations.numpy()
            current_lr = lr_schedule(step).numpy()
            print(
                f"Iteration {i + 1}/{iterations}, Loss = {loss_val:.4f}, LR = {current_lr:.6f}, rank_scale = {rank_scale.numpy():.4f}")
    positions.assign(best_positions)
    rank_scale.assign(best_rank_scale)
    print(f"=> Finished training, best_loss={best_loss:.4f}, best rank_scale={best_rank_scale:.4f}")
    final_pos = positions.numpy()
    for tname, idx in team_name_to_idx.items():
        team_positions_df.loc[team_positions_df['team_name'] == tname, 'PC1'] = final_pos[idx, 0]
        team_positions_df.loc[team_positions_df['team_name'] == tname, 'PC2'] = final_pos[idx, 1]
    team_positions_df.to_csv("trained_team_positions.csv", index=False)
    with open("best_rank_scale.txt", "w") as f:
        f.write(str(best_rank_scale))
    return losses, team_positions_df, best_rank_scale

def run_cross_validation(team_positions, match_positions, n_splits=3, test_size=0.2,
                         random_state=42, **opt_kwargs):
    train_losses = []
    test_losses = []
    splits_info = []
    matches_df_all = match_positions.copy()
    all_indices = matches_df_all.index.values
    
    for i in range(n_splits):
        print(f"\n===== Split {i + 1}/{n_splits} =====")
        idx_train, idx_test = train_test_split(all_indices, test_size=test_size,
                                               random_state=random_state + i)
        train_matches_df = matches_df_all.loc[idx_train].copy()
        test_matches_df = matches_df_all.loc[idx_test].copy()
        
        losses, best_positions_df, best_scale_val = adam_optimize_positions(
            team_positions.copy(), train_matches_df, **opt_kwargs)
        train_losses.append(losses[-1])
        
        elo_scores_dict = initialize_elo_scores(best_positions_df)
        teams_test = best_positions_df['team_name'].unique().tolist()
        elo_scores = [elo_scores_dict[team] for team in teams_test]
        elo_scores = tf.convert_to_tensor(elo_scores, dtype=tf.float32)

        team_name_to_idx_test = {t: j for j, t in enumerate(teams_test)}
        final_positions = np.zeros((len(teams_test), 2), dtype=np.float32)
        final_points_pg = np.zeros((len(teams_test),), dtype=np.float32)
        final_avg_conceded = np.zeros((len(teams_test),), dtype=np.float32)
        final_def_shots = np.zeros((len(teams_test),), dtype=np.float32)

        for j, tname in enumerate(teams_test):
            row = best_positions_df.loc[best_positions_df['team_name'] == tname].iloc[0]
            final_positions[j, 0] = row['PC1']
            final_positions[j, 1] = row['PC2']
            final_points_pg[j] = row['points_per_game']
            final_avg_conceded[j] = row.get("avg_goals_conceded", 0.0)
            final_def_shots[j] = row.get("avg_shots_on_target_conceded", 0.0)

        # 显式转换为TensorFlow张量
        positions_tf = tf.constant(final_positions, dtype=tf.float32)
        points_pg_tf = tf.constant(final_points_pg, dtype=tf.float32)
        avg_conceded_tf_test = tf.constant(final_avg_conceded, dtype=tf.float32)
        def_shots_tf_test = tf.constant(final_def_shots, dtype=tf.float32)
        rank_scale_tf = tf.constant(best_scale_val, dtype=tf.float32)

        # 准备测试数据
        test_array = []
        for idx, row in test_matches_df.iterrows():
            hname = row['home_team_name']
            aname = row['away_team_name']
            if hname in team_name_to_idx_test and aname in team_name_to_idx_test:
                test_array.append([team_name_to_idx_test[hname],
                                   team_name_to_idx_test[aname],
                                   row['PC1'], row['PC2']])
        test_array = np.array(test_array, dtype=np.float32)
        if len(test_array) == 0:
            print("Warning: no valid test matches in this split!")
            test_losses.append(np.nan)
            splits_info.append((np.nan, losses[-1]))
            continue
            
        match_home_idx_test = tf.constant(test_array[:, 0], dtype=tf.int32)
        match_away_idx_test = tf.constant(test_array[:, 1], dtype=tf.int32)
        match_PC1_test = tf.constant(test_array[:, 2], dtype=tf.float32)
        match_PC2_test = tf.constant(test_array[:, 3], dtype=tf.float32)

        # 计算测试损失
        test_loss_val = compute_total_loss(
            positions_tf,  # 现在这个变量已正确定义
            match_home_idx_test,
            match_away_idx_test,
            match_PC1_test,
            match_PC2_test,
            points_pg_tf,
            rank_scale_tf,
            avg_conceded_tf_test,
            def_shots_tf_test,
            opt_kwargs.get("lambda_reg", 0.1),
            elo_scores,
            alpha=2.0
        ).numpy()
        
        test_losses.append(test_loss_val)
        splits_info.append((losses[-1], test_loss_val))
        print(f"Split {i + 1}: Train Loss = {losses[-1]:.4f}, Test Loss = {test_loss_val:.4f}")
    
    valid_train_losses = [x for x in train_losses if not np.isnan(x)]
    valid_test_losses = [x for x in test_losses if not np.isnan(x)]
    print("\n===== Cross Validation Summary =====")
    print(f"Train Losses = {valid_train_losses}")
    print(f"Test  Losses = {valid_test_losses}")
    if valid_train_losses:
        print(f"Avg Train Loss = {np.mean(valid_train_losses):.4f} +- {np.std(valid_train_losses):.4f}")
    if valid_test_losses:
        print(f"Avg Test  Loss = {np.mean(valid_test_losses):.4f} +- {np.std(valid_test_losses):.4f}")
    return splits_info

# 可视化
def visualize_team_evolution_by_league_static(team_positions_df, seasons_order):
    output_dir = "/Users/peixuanma/Downloads/Output_Graphs"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    leagues = team_positions_df['league'].unique().tolist()
    for league in leagues:
        league_df = team_positions_df.loc[team_positions_df['league'] == league].copy()
        team_counts = league_df.groupby("team_name")["season"].nunique()
        valid_teams = team_counts[team_counts == len(seasons_order)].index.tolist()
        if not valid_teams:
            print(f"No team continuously in {league} for all seasons. Using all teams instead.")
            valid_teams = league_df['team_name'].unique().tolist()
        valid_df = league_df.loc[league_df["team_name"].isin(valid_teams)].copy()
        valid_df.loc[:, "season"] = pd.Categorical(valid_df["season"], categories=seasons_order, ordered=True)
        valid_df = valid_df.sort_values(["team_name", "season"])

        plt.figure(figsize=(10, 8))
        plt.title(f"{league} - Team Evolution (Offense-Defense)")
        plt.xlabel("PC1 (Offense)")
        plt.ylabel("PC2 (Defense)")
        plt.grid(True)

        teams = valid_df['team_name'].unique().tolist()
        colors = plt.cm.tab10(np.linspace(0, 1, len(teams)))
        team_colors = dict(zip(teams, colors))

        for team in teams:
            sub = valid_df.loc[valid_df['team_name'] == team].sort_values("season")
            plt.plot(sub['PC1'], sub['PC2'], marker='o', linestyle='-', color=team_colors[team], label=team)
            for idx, row in sub.iterrows():
                plt.text(row['PC1'], row['PC2'], str(row['season']), fontsize=8, ha='right', va='bottom')

        plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        filename = os.path.join(output_dir, f"{league}_evolution_static.png")
        plt.savefig(filename, dpi=150)
        plt.close()
        print(f"Saved static evolution graph for {league} -> {filename}")


if __name__ == "__main__":
    base_path = '/Users/peixuanma/Downloads/data1'
    leagues = [
        ("england", "premier-league"),
        ("germany", "bundesliga"),
        ("spain", "la-liga"),
        ("france", "ligue-1"),
        ("italy", "serie-a"),
        ("netherlands", "eredivisie"),
        ("portugal", "ligapro"),
        ("denmark", "superliga"),
        ("england", "championship"),
        ("belgium", "pro-league")
    ]
    seasons = [
        "2013-to-2014", "2014-to-2015", "2015-to-2016", "2016-to-2017",
        "2017-to-2018", "2018-to-2019", "2020-to-2021",
        "2021-to-2022", "2022-to-2023", "2023-to-2024"
    ]

    # 第1步：加载数据
    all_team_positions, all_match_positions = load_all_league_data(base_path, leagues, seasons)

    # 第2步：交叉验证 - 随机拆分80%训练，20%测试
    print("\n===== Running Cross Validation =====")
    run_cross_validation(team_positions=all_team_positions, match_positions=all_match_positions,
                         n_splits=3, test_size=0.2, random_state=42,
                         initial_lr=0.005, decay_steps=100000,
                         decay_rate=0.80, clipnorm=1.0, iterations=100000,
                         verbose_interval=10000, lambda_reg=1.0, alpha=2.0)
    # 第3步：最终模型训练与保存（使用全部数据训练最终模型）
    print("\n===== Final Model Training and Saving =====")
    final_losses, final_team_positions, final_rank_scale = adam_optimize_positions(
        all_team_positions.copy(), all_match_positions.copy(),
        initial_lr=0.005, decay_steps=100000, decay_rate=0.80,
        clipnorm=1.0, iterations=100000, verbose_interval=10000, random_seed=42,
        lambda_reg=1.0, alpha=2.0
    )
    if final_losses:  # 确保final_losses不为空
        final_team_positions.to_csv("trained_team_positions.csv", index=False)
        with open("best_rank_scale.txt", "w") as f:
            f.write(str(final_rank_scale))
        print(f"Final training loss: {final_losses[-1]:.4f}, best rank_scale: {final_rank_scale:.4f}")

    # 第4步：静态可视化 - 针对每个联赛展示持续出现（未降级）的球队风格变化轨迹（攻防象限图）
    print("\n===== Static Visualization of Team Evolution by League =====")
    visualize_team_evolution_by_league_static(all_team_positions, seasons_order=seasons)

    print("\nDone!")

In [7]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from itertools import product

# ====================== 计算防守统计数据 ======================
def compute_defensive_stats(match_df, team_positions_df):
    """计算防守统计数据，包括各种比率和总丢球数，确保DataFrame结构正确。"""
    epsilon = 1e-12
    team_stats = {}
    for team in team_positions_df['team_name'].unique():
        team_stats[team] = {'total_goals_conceded': 0, 'ratio1_list': [], 'ratio2_list': [], 'ratio3_list': [],
                            'ratio4_list': [], 'ratio5_list': [], 'ratio6_list': [], 'num_home_matches': 0}

    # 处理主场比赛
    home_matches = match_df.copy()
    for idx, row in home_matches.iterrows():
        home_team, away_team = row['home_team_name'], row['away_team_name']
        if home_team not in team_stats or away_team not in team_stats:
            print(f"警告: 主队 {home_team} 或客队 {away_team} 未找到，跳过比赛 {idx}")
            continue
        if row['Pre-Match PPG (Away)'] > 0:
            team_stats[home_team]['ratio1_list'].append(
                row['away_team_goal_count'] / (row['Pre-Match PPG (Away)'] + epsilon))
        if row['away_team_corner_count'] > 0:
            team_stats[home_team]['ratio2_list'].append(
                row['away_team_goal_count'] / (row['away_team_corner_count'] + epsilon))
        denominator = row['home_team_yellow_cards'] + row['home_team_red_cards'] + row['home_team_fouls'] + epsilon
        team_stats[home_team]['ratio3_list'].append(row['away_team_goal_count'] / denominator)
        if row['team_b_xg'] > 0:
            team_stats[home_team]['ratio4_list'].append(row['away_team_goal_count'] / (row['team_b_xg'] + epsilon))
        shots_total = row['away_team_shots_on_target'] + row['away_team_shots_off_target'] + epsilon
        team_stats[home_team]['ratio5_list'].append(row['away_team_goal_count'] / shots_total)
        if row['away_team_possession'] > 0:
            team_stats[home_team]['ratio6_list'].append(
                row['away_team_goal_count'] / (row['away_team_possession'] + epsilon))
        team_stats[home_team]['total_goals_conceded'] += row['away_team_goal_count']
        team_stats[home_team]['num_home_matches'] += 1

    # 处理客场比赛
    away_matches = match_df.copy()
    for idx, row in away_matches.iterrows():
        away_team = row['away_team_name']
        if away_team not in team_stats:
            print(f"警告: 客队 {away_team} 未找到，跳过比赛 {idx}")
            continue
        team_stats[away_team]['total_goals_conceded'] += row['home_team_goal_count']

    # 计算平均比率并构建DataFrame
    data = []
    for team, stats in team_stats.items():
        avg_ratio1 = np.mean(stats['ratio1_list']) if stats['ratio1_list'] else 0
        avg_ratio2 = np.mean(stats['ratio2_list']) if stats['ratio2_list'] else 0
        avg_ratio3 = np.mean(stats['ratio3_list']) if stats['ratio3_list'] else 0
        avg_ratio4 = np.mean(stats['ratio4_list']) if stats['ratio4_list'] else 0
        avg_ratio5 = np.mean(stats['ratio5_list']) if stats['ratio5_list'] else 0
        avg_ratio6 = np.mean(stats['ratio6_list']) if stats['ratio6_list'] else 0
        total_goals_conceded = stats['total_goals_conceded']
        data.append({
            'team_name': team,
            'ratio1': avg_ratio1,
            'ratio2': avg_ratio2,
            'ratio3': avg_ratio3,
            'ratio4': avg_ratio4,
            'ratio5': avg_ratio5,
            'ratio6': avg_ratio6,
            'total_goals_conceded': total_goals_conceded
        })

    defensive_stats_df = pd.DataFrame(data)
    return defensive_stats_df


# ====================== ELO评分算法实现 ======================
def initialize_elo_scores(team_positions_df):
    teams = team_positions_df['team_name'].unique().tolist()
    team_elo = {team: 1500 for team in teams}
    for team in teams:
        team_data = team_positions_df[team_positions_df['team_name'] == team]
        rank = team_data['points_per_game'].rank().iloc[0]
        team_elo[team] += (20 * (len(teams) - rank))
    return team_elo


def update_elo_scores(elo_scores, home_team, away_team, home_score, away_score, K=30):
    home_elo = elo_scores[home_team]
    away_elo = elo_scores[away_team]

    expected_home = 1 / (1 + 10 ** ((away_elo - home_elo) / 400))
    expected_away = 1 / (1 + 10 ** ((home_elo - away_elo) / 400))

    if home_score > away_score:
        elo_scores[home_team] += K * (1 - expected_home)
        elo_scores[away_team] += K * (0 - expected_away)
    elif home_score < away_score:
        elo_scores[home_team] += K * (0 - expected_home)
        elo_scores[away_team] += K * (1 - expected_away)
    else:
        elo_scores[home_team] += K * (0.5 - expected_home)
        elo_scores[away_team] += K * (0.5 - expected_away)

    return elo_scores


# ====================== 数据加载函数 ======================
def load_all_league_data(base_path, leagues, seasons):
    all_team_positions = []
    all_match_positions = []
    for country_name, league_name in leagues:
        for season in seasons:
            print(f"加载数据: {country_name} - {league_name} - {season}")
            team_file = os.path.join(base_path, f"{country_name}-{league_name}-teams-{season}-stats.csv")
            match_file = os.path.join(base_path, f"{country_name}-{league_name}-matches-{season}-stats.csv")
            if not os.path.exists(team_file) or not os.path.exists(match_file):
                print(f"警告: {country_name} - {league_name} - {season} 文件缺失")
                continue

            team_df = pd.read_csv(team_file)
            match_df = pd.read_csv(match_file)

            # 标准化球队名称
            team_df['team_name'] = team_df.get('common_name', team_df.get('team_name', None)).str.strip().str.lower()
            match_df['home_team_name'] = match_df['home_team_name'].str.strip().str.lower()
            match_df['away_team_name'] = match_df['away_team_name'].str.strip().str.lower()

            team_names = team_df['team_name'].unique()
            original_match_count = len(match_df)
            match_df = match_df[
                match_df['home_team_name'].isin(team_names) & match_df['away_team_name'].isin(team_names)]
            if len(match_df) < original_match_count:
                print(f"警告: 由于球队缺失，过滤了 {original_match_count - len(match_df)} 场比赛")

            if match_df.empty:
                print("警告: 此联赛和赛季没有有效比赛")
                continue

            # 计算防守统计
            defensive_stats_df = compute_defensive_stats(match_df, team_df)
            if defensive_stats_df.empty:
                print("警告: 此联赛和赛季未计算出防守统计，跳过")
                continue

            team_df = team_df.merge(defensive_stats_df, on='team_name', how='left', suffixes=('', '_def'))
            for col in ['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'total_goals_conceded']:
                if f'{col}_def' in team_df.columns:
                    team_df[col] = team_df[f'{col}_def']
                    team_df.drop(f'{col}_def', axis=1, inplace=True)

            # 计算标准化防守得分
            max_total_goals = team_df['total_goals_conceded'].max()
            min_total_goals = team_df['total_goals_conceded'].min()
            if max_total_goals != min_total_goals:
                team_df['normalized_defense_score'] = (max_total_goals - team_df['total_goals_conceded']) / (
                            max_total_goals - min_total_goals + 1e-12)
            else:
                team_df['normalized_defense_score'] = 0

            # 标准化防守数据并应用PCA
            defensive_columns = [
                'goals_conceded', 'goals_conceded_home', 'goals_conceded_away',
                'goals_conceded_per_match', 'goals_conceded_per_match_home', 'goals_conceded_per_match_away',
                'clean_sheets', 'clean_sheets_home', 'clean_sheets_away',
                'clean_sheet_percentage', 'clean_sheet_percentage_home', 'clean_sheet_percentage_away',
                'minutes_per_goal_conceded', 'minutes_per_goal_conceded_home', 'minutes_per_goal_conceded_away',
                'goals_conceded_half_time', 'goals_conceded_half_time_home', 'goals_conceded_half_time_away',
                'clean_sheet_half_time', 'clean_sheet_half_time_percentage',
                'fouls', 'fouls_home', 'fouls_away', 'cards_total', 'cards_total_home', 'cards_total_away',
                'xg_against_avg_overall', 'xg_against_avg_home', 'xg_against_avg_away',
                'total_goals_conceded', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6',
                'normalized_defense_score'
            ]
            team_df_defensive = team_df[defensive_columns].fillna(0)
            scaler_defense = StandardScaler()
            X_scaled_defense = scaler_defense.fit_transform(team_df_defensive)
            pca_defense = PCA(n_components=2)
            X_pca_defense = pca_defense.fit_transform(X_scaled_defense)

            # 创建球队位置数据
            team_positions = pd.DataFrame({
                'team_name': team_df['team_name'],
                'PC1': X_pca_defense[:, 0],
                'PC2': X_pca_defense[:, 1],
                'points_per_game': team_df['points_per_game'],
                'league': league_name,
                'season': season,
                'ratio1': team_df['ratio1'],
                'ratio2': team_df['ratio2'],
                'ratio3': team_df['ratio3'],
                'ratio4': team_df['ratio4'],
                'ratio5': team_df['ratio5'],
                'ratio6': team_df['ratio6'],
                'normalized_defense_score': team_df['normalized_defense_score']
            })
            team_positions['team_season'] = league_name + '_' + team_positions['team_name'] + '_' + team_positions[
                'season']
            all_team_positions.append(team_positions)

            # 处理比赛数据
            irrelevant_cols = ['timestamp', 'date_GMT', 'status', 'attendance', 'referee', 'stadium_name', 'Game Week']
            match_df = match_df.drop(columns=irrelevant_cols, errors='ignore')
            for col in match_df.columns:
                match_df[col] = match_df[col].astype(str).str.replace(',', '.', regex=True)
                try:
                    match_df[col] = pd.to_numeric(match_df[col])
                except ValueError:
                    pass
            numeric_cols = match_df.select_dtypes(include=[np.number]).columns
            match_df[numeric_cols] = match_df[numeric_cols].fillna(match_df[numeric_cols].mean())
            scaler_match = StandardScaler()
            X_scaled_match = scaler_match.fit_transform(match_df[numeric_cols])
            pca_match = PCA(n_components=2)
            X_pca_match = pca_match.fit_transform(X_scaled_match)
            match_positions = pd.DataFrame({
                'home_team_name': match_df['home_team_name'],
                'away_team_name': match_df['away_team_name'],
                'PC1': X_pca_match[:, 0],
                'PC2': X_pca_match[:, 1],
                'league': league_name,
                'season': season
            })
            all_match_positions.append(match_positions)
    combined_team_positions = pd.concat(all_team_positions, ignore_index=True)
    combined_match_positions = pd.concat(all_match_positions, ignore_index=True)
    return combined_team_positions, combined_match_positions


# ====================== 修改后的损失函数 ======================
def compute_total_loss(positions, match_home_idx, match_away_idx, match_PC1, match_PC2, points_per_game, rank_scale,
                       ratios, w, normalized_defense_score, lambda_defense, lambda_supervision, elo_scores, alpha=2.0,
                       max_match_loss=1.0, max_defense_loss=1.0, max_supervision_loss=1.0):
    """
    计算总损失函数，所有损失项归一化后控制在 [0, 1] 范围内。
    
    参数：
        positions: 球队位置 (num_teams, 2)
        match_home_idx, match_away_idx: 主客队索引
        match_PC1, match_PC2: 比赛数据的 PCA 坐标
        points_per_game: 每场比赛得分
        rank_scale: 排名缩放因子
        ratios: 防守比率数据 (num_teams, 6)
        w: 防守比率权重 (6,)
        normalized_defense_score: 标准化防守得分
        lambda_defense, lambda_supervision: 超参数
        elo_scores: ELO 评分
        alpha: 超参数，默认 2.0
        max_match_loss, max_defense_loss, max_supervision_loss: 各损失项的最大值，用于归一化
        
    返回：
        total_loss: 总损失值，范围控制在 [0, 1] 内
    """
    epsilon = 1e-12
    num_matches = tf.shape(match_home_idx)[0]
    if num_matches == 0:
        return tf.constant(0.0, dtype=tf.float32)

    # 计算比赛损失
    home_pos = tf.gather(positions, match_home_idx)
    away_pos = tf.gather(positions, match_away_idx)
    match_points = tf.stack([match_PC1, match_PC2], axis=1)
    dist_home = tf.norm(home_pos - match_points, axis=1)
    dist_away = tf.norm(away_pos - match_points, axis=1)
    all_distances = tf.concat([dist_home, dist_away], axis=0)
    min_dist = tf.reduce_min(all_distances)
    max_dist = tf.reduce_max(all_distances)
    dist_range = max_dist - min_dist + epsilon
    dist_home_norm = (dist_home - min_dist) / dist_range
    dist_away_norm = (dist_away - min_dist) / dist_range

    home_elo = tf.gather(elo_scores, match_home_idx)
    away_elo = tf.gather(elo_scores, match_away_idx)
    elo_diff = tf.abs(home_elo - away_elo)
    weight = 1.0 / (1.0 + elo_diff * rank_scale)
    match_loss = weight * (dist_home_norm + dist_away_norm)
    match_loss_mean = tf.reduce_mean(match_loss)
    match_loss_norm = match_loss_mean / max_match_loss

    # 计算防守损失
    ratios = tf.ensure_shape(ratios, [None, 6])
    w = tf.reshape(w, [6])
    defense_target = -tf.reduce_sum(w * ratios, axis=1)
    defense_loss = tf.reduce_mean(tf.square(positions[:, 1] - defense_target))
    defense_loss_norm = defense_loss / max_defense_loss

    # 计算监督损失
    supervision_loss = tf.reduce_mean(tf.square(defense_target - normalized_defense_score))
    supervision_loss_norm = supervision_loss / max_supervision_loss

    # 总损失
    total_loss = match_loss_norm + lambda_defense * defense_loss_norm + lambda_supervision * supervision_loss_norm
    return total_loss


def compute_max_losses(team_positions_df, match_positions_df, **kwargs):
    """计算训练集中各损失项的最大值"""
    team_seasons = team_positions_df['team_season'].unique()
    team_season_to_idx = {t: i for i, t in enumerate(team_seasons)}
    positions = tf.constant(team_positions_df[['PC1', 'PC2']].values, dtype=tf.float32)
    points_per_game = tf.constant(team_positions_df['points_per_game'].values, dtype=tf.float32)
    ratios = tf.constant(team_positions_df[['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6']].values,
                         dtype=tf.float32)
    normalized_defense_score = tf.constant(team_positions_df['normalized_defense_score'].values, dtype=tf.float32)

    elo_scores_dict = initialize_elo_scores(team_positions_df)
    team_names = [t.split('_')[1] for t in team_seasons]
    elo_scores = tf.convert_to_tensor([elo_scores_dict[name] for name in team_names], dtype=tf.float32)

    match_array = []
    for _, row in match_positions_df.iterrows():
        hname, aname = row['home_team_name'], row['away_team_name']
        season, league = row['season'], row['league']
        team_season_h = f"{league}_{hname}_{season}"
        team_season_a = f"{league}_{aname}_{season}"
        if team_season_h in team_season_to_idx and team_season_a in team_season_to_idx:
            match_array.append(
                [team_season_to_idx[team_season_h], team_season_to_idx[team_season_a], row['PC1'], row['PC2']])
    match_array = np.array(match_array, dtype=np.float32)
    match_home_idx = tf.constant(match_array[:, 0], dtype=tf.int32)
    match_away_idx = tf.constant(match_array[:, 1], dtype=tf.int32)
    match_PC1 = tf.constant(match_array[:, 2], dtype=tf.float32)
    match_PC2 = tf.constant(match_array[:, 3], dtype=tf.float32)

    w = tf.ones([6], dtype=tf.float32)
    rank_scale = tf.constant(1.0, dtype=tf.float32)

    match_loss = tf.reduce_mean(
        compute_match_loss(positions, match_home_idx, match_away_idx, match_PC1, match_PC2, elo_scores, rank_scale))
    defense_loss = tf.reduce_mean(tf.square(positions[:, 1] - (-tf.reduce_sum(w * ratios, axis=1))))
    supervision_loss = tf.reduce_mean(tf.square((-tf.reduce_sum(w * ratios, axis=1)) - normalized_defense_score))

    return float(match_loss), float(defense_loss), float(supervision_loss)


def compute_match_loss(positions, match_home_idx, match_away_idx, match_PC1, match_PC2, elo_scores, rank_scale):
    home_pos = tf.gather(positions, match_home_idx)
    away_pos = tf.gather(positions, match_away_idx)
    match_points = tf.stack([match_PC1, match_PC2], axis=1)
    dist_home = tf.norm(home_pos - match_points, axis=1)
    dist_away = tf.norm(away_pos - match_points, axis=1)
    all_distances = tf.concat([dist_home, dist_away], axis=0)
    min_dist = tf.reduce_min(all_distances)
    max_dist = tf.reduce_max(all_distances)
    dist_range = max_dist - min_dist + 1e-12
    dist_home_norm = (dist_home - min_dist) / dist_range
    dist_away_norm = (dist_away - min_dist) / dist_range
    home_elo = tf.gather(elo_scores, match_home_idx)
    away_elo = tf.gather(elo_scores, match_away_idx)
    elo_diff = tf.abs(home_elo - away_elo)
    weight = 1.0 / (1.0 + elo_diff * rank_scale)
    return weight * (dist_home_norm + dist_away_norm)


# ====================== Adam优化函数 ======================
def adam_optimize_positions(team_positions_df, match_positions_df, initial_lr=0.001, decay_steps=50000, decay_rate=0.80,
                            clipnorm=1.0, iterations=50000, verbose_interval=500, random_seed=42,
                            lambda_defense=0.1, lambda_supervision=0.1, alpha=2.0,
                            max_match_loss=1.0, max_defense_loss=1.0, max_supervision_loss=1.0):
    """使用Adam优化器训练模型，更新球队坐标、rank_scale和权重w。"""
    team_seasons = team_positions_df['team_season'].unique()
    team_season_to_idx = {t: i for i, t in enumerate(team_seasons)}
    num_teams = len(team_seasons)

    elo_scores_dict = initialize_elo_scores(team_positions_df)
    team_names_for_seasons = [t.split('_')[1] for t in team_seasons]
    elo_scores = tf.convert_to_tensor([elo_scores_dict[team_name] for team_name in team_names_for_seasons],
                                      dtype=tf.float32)

    team_positions_df = team_positions_df.set_index('team_season')
    init_positions = np.zeros((num_teams, 2), dtype=np.float32)
    init_points_pg = np.zeros((num_teams,), dtype=np.float32)
    ratios_df = team_positions_df[['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6']]
    ratios = tf.constant(ratios_df.values, dtype=tf.float32)
    normalized_defense_score = tf.constant(team_positions_df['normalized_defense_score'].values, dtype=tf.float32)

    for i, team_season in enumerate(team_seasons):
        row = team_positions_df.loc[team_season]
        init_positions[i, 0] = row['PC1']
        init_positions[i, 1] = row['PC2']
        init_points_pg[i] = row['points_per_game']

    match_array = []
    for idx, row in match_positions_df.iterrows():
        hname = row['home_team_name']
        aname = row['away_team_name']
        season = row['season']
        league = row['league']
        team_season_h = f"{league}_{hname}_{season}"
        team_season_a = f"{league}_{aname}_{season}"
        if team_season_h in team_season_to_idx and team_season_a in team_season_to_idx:
            h_idx = team_season_to_idx[team_season_h]
            a_idx = team_season_to_idx[team_season_a]
            match_array.append([h_idx, a_idx, row['PC1'], row['PC2']])
    match_array = np.array(match_array, dtype=np.float32)
    if len(match_array) == 0:
        print("警告: 训练中未找到有效比赛！")
        return [], team_positions_df.reset_index(), None, None
    match_home_idx = tf.constant(match_array[:, 0], dtype=tf.int32)
    match_away_idx = tf.constant(match_array[:, 1], dtype=tf.int32)
    match_PC1 = tf.constant(match_array[:, 2], dtype=tf.float32)
    match_PC2 = tf.constant(match_array[:, 3], dtype=tf.float32)
    points_per_game = tf.constant(init_points_pg, dtype=tf.float32)

    tf.random.set_seed(random_seed)
    positions = tf.Variable(init_positions, name="positions", dtype=tf.float32)
    rank_scale = tf.Variable(1.0, name="rank_scale", dtype=tf.float32)
    w = tf.Variable(np.random.randn(6) * 0.01, name="weights", dtype=tf.float32)
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_lr, decay_steps, decay_rate, staircase=False)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=clipnorm)

    losses = []
    best_loss = np.inf
    best_positions = None
    best_rank_scale = None
    best_w = None

    for i in range(iterations):
        with tf.GradientTape() as tape:
            loss = compute_total_loss(positions, match_home_idx, match_away_idx, match_PC1, match_PC2, points_per_game,
                                      rank_scale, ratios, w, normalized_defense_score, lambda_defense,
                                      lambda_supervision,
                                      elo_scores, alpha, max_match_loss, max_defense_loss, max_supervision_loss)
        grads = tape.gradient(loss, [positions, rank_scale, w])
        optimizer.apply_gradients(zip(grads, [positions, rank_scale, w]))
        loss_val = float(loss.numpy())
        losses.append(loss_val)
        if loss_val < best_loss:
            best_loss = loss_val
            best_positions = positions.numpy().copy()
            best_rank_scale = float(rank_scale.numpy())
            best_w = w.numpy().copy()
        if (i + 1) % verbose_interval == 0:
            step = optimizer.iterations.numpy()
            current_lr = lr_schedule(step).numpy()
            print(
                f"迭代 {i + 1}/{iterations}, 损失 = {loss_val:.4f}, 学习率 = {current_lr:.6f}, rank_scale = {rank_scale.numpy():.4f}")

    positions.assign(best_positions)
    rank_scale.assign(best_rank_scale)
    w.assign(best_w)
    print(f"=> 训练完成, 最佳损失={best_loss:.4f}, 最佳 rank_scale={best_rank_scale:.4f}")

    final_pos = positions.numpy()
    for idx, team_season in enumerate(team_seasons):
        team_positions_df.loc[team_season, 'PC1'] = final_pos[idx, 0]
        team_positions_df.loc[team_season, 'PC2'] = final_pos[idx, 1]
    team_positions_df = team_positions_df.reset_index()
    team_positions_df.to_csv("trained_team_positions.csv", index=False)
    with open("best_rank_scale.txt", "w") as f:
        f.write(str(best_rank_scale))

    return losses, team_positions_df, best_rank_scale, best_w


# ====================== 交叉验证与网格搜索函数 ======================
def run_cross_validation(team_positions, match_positions, n_splits=3, test_size=0.2,
                         random_state=42, lambda_defense_values=[0.01, 0.1, 1.0], 
                         lambda_supervision_values=[0.01, 0.1, 1.0], **opt_kwargs):
    """
    执行交叉验证并进行超参数网格搜索。
    
    参数：
        team_positions (pd.DataFrame): 球队位置数据
        match_positions (pd.DataFrame): 比赛数据
        n_splits (int): 交叉验证拆分次数
        test_size (float): 测试集比例
        random_state (int): 随机种子
        lambda_defense_values (list): 防守损失权重的候选值
        lambda_supervision_values (list): 监督损失权重的候选值
        **opt_kwargs: 传递给 adam_optimize_positions 的其他参数
        
    返回：
        best_params (tuple): 最佳超参数组合 (lambda_defense, lambda_supervision)
    """
    best_params = None
    best_test_loss = np.inf
    param_combinations = list(product(lambda_defense_values, lambda_supervision_values))

    for lambda_defense, lambda_supervision in param_combinations:
        print(f"\n===== 网格搜索: lambda_defense={lambda_defense}, lambda_supervision={lambda_supervision} =====")
        train_losses = []
        test_losses = []
        matches_df_all = match_positions.copy()
        all_indices = matches_df_all.index.values

        for i in range(n_splits):
            print(f"\n===== 第 {i + 1}/{n_splits} 次拆分 =====")
            idx_train, idx_test = train_test_split(all_indices, test_size=test_size, random_state=random_state + i)
            train_matches_df = matches_df_all.loc[idx_train].copy()
            test_matches_df = matches_df_all.loc[idx_test].copy()

            max_match_loss, max_defense_loss, max_supervision_loss = compute_max_losses(team_positions, train_matches_df)

            losses, best_positions_df, best_scale_val, w_trained = adam_optimize_positions(
                team_positions.copy(), train_matches_df, lambda_defense=lambda_defense,
                lambda_supervision=lambda_supervision, max_match_loss=max_match_loss,
                max_defense_loss=max_defense_loss, max_supervision_loss=max_supervision_loss, **opt_kwargs)
            if not losses:
                print("警告: 此拆分中没有有效训练数据，跳过此拆分。")
                train_losses.append(np.nan)
                test_losses.append(np.nan)
                continue
            train_losses.append(losses[-1])

            elo_scores_dict = initialize_elo_scores(best_positions_df)
            team_seasons_test = best_positions_df['team_season'].unique().tolist()
            team_names_for_seasons_test = [team.split('_')[1] for team in team_seasons_test]
            elo_scores_test = tf.convert_to_tensor(
                [elo_scores_dict[team_name] for team_name in team_names_for_seasons_test], dtype=tf.float32)

            best_positions_df = best_positions_df.set_index('team_season')
            final_positions = best_positions_df[['PC1', 'PC2']].values
            final_points_pg = best_positions_df['points_per_game'].values
            ratios_df = best_positions_df[['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6']].values
            normalized_defense_score = best_positions_df['normalized_defense_score'].values

            match_array_test = []
            for idx, row in test_matches_df.iterrows():
                hname = row['home_team_name']
                aname = row['away_team_name']
                season = row['season']
                league = row['league']
                team_season_h = f"{league}_{hname}_{season}"
                team_season_a = f"{league}_{aname}_{season}"
                if team_season_h in team_seasons_test and team_season_a in team_seasons_test:
                    h_idx = team_seasons_test.index(team_season_h)
                    a_idx = team_seasons_test.index(team_season_a)
                    match_array_test.append([h_idx, a_idx, row['PC1'], row['PC2']])
            match_array_test = np.array(match_array_test, dtype=np.float32)
            if len(match_array_test) == 0:
                print("警告: 此拆分中没有有效测试比赛！")
                test_losses.append(np.nan)
                continue

            match_home_idx_test = tf.constant(match_array_test[:, 0], dtype=tf.int32)
            match_away_idx_test = tf.constant(match_array_test[:, 1], dtype=tf.int32)
            match_PC1_test = tf.constant(match_array_test[:, 2], dtype=tf.float32)
            match_PC2_test = tf.constant(match_array_test[:, 3], dtype=tf.float32)

            positions_tf = tf.constant(final_positions, dtype=tf.float32)
            points_pg_tf = tf.constant(final_points_pg, dtype=tf.float32)
            ratios_tf = tf.constant(ratios_df, dtype=tf.float32)
            normalized_defense_score_tf = tf.constant(normalized_defense_score, dtype=tf.float32)
            rank_scale_tf = tf.constant(best_scale_val, dtype=tf.float32)
            w_tf = tf.constant(w_trained, dtype=tf.float32)

            test_loss_val = compute_total_loss(
                positions_tf, match_home_idx_test, match_away_idx_test, match_PC1_test, match_PC2_test,
                points_pg_tf, rank_scale_tf, ratios_tf, w_tf, normalized_defense_score_tf,
                lambda_defense, lambda_supervision, elo_scores_test, alpha=2.0,
                max_match_loss=max_match_loss, max_defense_loss=max_defense_loss, max_supervision_loss=max_supervision_loss
            ).numpy()

            test_losses.append(test_loss_val)
            print(f"第 {i + 1} 次拆分: 训练损失 = {train_losses[-1]:.4f}, 测试损失 = {test_loss_val:.4f}")

        valid_test_losses = [x for x in test_losses if not np.isnan(x)]
        if valid_test_losses:
            mean_test_loss = np.mean(valid_test_losses)
            print(f"lambda_defense={lambda_defense}, lambda_supervision={lambda_supervision} 的平均测试损失 = {mean_test_loss:.4f}")
            if mean_test_loss < best_test_loss:
                best_test_loss = mean_test_loss
                best_params = (lambda_defense, lambda_supervision)

    print(f"\n最佳超参数: lambda_defense={best_params[0]}, lambda_supervision={best_params[1]}, 最佳测试损失={best_test_loss:.4f}")
    return best_params


# ====================== 可视化函数 ======================
def visualize_team_evolution_by_league_static(team_positions_df, seasons_order):
    output_dir = "/Users/peixuanma/Downloads/Output_Graphs"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    leagues = team_positions_df['league'].unique().tolist()
    for league in leagues:
        league_df = team_positions_df.loc[team_positions_df['league'] == league].copy()
        team_counts = league_df.groupby("team_name")["season"].nunique()
        valid_teams = team_counts[team_counts == len(seasons_order)].index.tolist()
        if not valid_teams:
            print(f"{league} 中没有球队连续出现在所有赛季，使用所有球队。")
            valid_teams = league_df['team_name'].unique().tolist()
        valid_df = league_df.loc[league_df["team_name"].isin(valid_teams)].copy()
        valid_df.loc[:, "season"] = pd.Categorical(valid_df["season"], categories=seasons_order, ordered=True)
        valid_df = valid_df.sort_values(["team_name", "season"])

        plt.figure(figsize=(10, 8))
        plt.title(f"{league} - 球队演变 (进攻-防守)")
        plt.xlabel("PC1 (进攻)")
        plt.ylabel("PC2 (防守)")
        plt.grid(True)

        teams = valid_df['team_name'].unique().tolist()
        colors = plt.cm.tab10(np.linspace(0, 1, len(teams)))
        team_colors = dict(zip(teams, colors))

        for team in teams:
            sub = valid_df.loc[valid_df['team_name'] == team].sort_values("season")
            plt.plot(sub['PC1'], sub['PC2'], marker='o', linestyle='-', color=team_colors[team], label=team)
            for idx, row in sub.iterrows():
                plt.text(row['PC1'], row['PC2'], str(row['season']), fontsize=8, ha='right', va='bottom')

        plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        filename = os.path.join(output_dir, f"{league}_evolution_static.png")
        plt.savefig(filename, dpi=150)
        plt.close()
        print(f"保存 {league} 的静态演变图 -> {filename}")


# ====================== 主函数 ======================
if __name__ == "__main__":
    base_path = '/Users/peixuanma/Downloads/data1'
    leagues = [
        ("england", "premier-league"), ("germany", "bundesliga"), ("spain", "la-liga"),
        ("france", "ligue-1"), ("italy", "serie-a"), ("netherlands", "eredivisie"),
        ("portugal", "ligapro"), ("denmark", "superliga"), ("england", "championship"),
        ("portugal", "liga-nos"), ("italy", "serie-b"), ("germany", "2-bundesliga")
    ]
    seasons = [
        "2013-to-2014", "2014-to-2015", "2015-to-2016", "2016-to-2017", "2017-to-2018",
        "2018-to-2019", "2020-to-2021", "2021-to-2022", "2022-to-2023", "2023-to-2024"
    ]

    # 第1步：加载数据
    all_team_positions, all_match_positions = load_all_league_data(base_path, leagues, seasons)

    # 第2步：网格搜索超参数（修改范围）
    print("\n===== 运行网格搜索 =====")
    lambda_defense_values = [0.001, 0.005, 0.01, 0.5, 1.0]  # 扩展范围
    lambda_supervision_values = [0.001, 0.005, 0.01, 0.5, 1.0]  # 扩展范围
    best_params = run_cross_validation(
        team_positions=all_team_positions, match_positions=all_match_positions,
        n_splits=3, test_size=0.2, random_state=42,
        lambda_defense_values=lambda_defense_values, lambda_supervision_values=lambda_supervision_values,
        initial_lr=0.001, decay_steps=50000, decay_rate=0.80,
        clipnorm=1.0, iterations=50000, verbose_interval=5000, alpha=2.0
    )

    # 第3步：使用最佳超参数训练最终模型
    print("\n===== 使用最佳超参数训练最终模型 =====")
    lambda_defense, lambda_supervision = best_params
    max_match_loss, max_defense_loss, max_supervision_loss = compute_max_losses(all_team_positions, all_match_positions)
    final_losses, final_team_positions, final_rank_scale, final_w = adam_optimize_positions(
        all_team_positions.copy(), all_match_positions.copy(),
        initial_lr=0.001, decay_steps=50000, decay_rate=0.80,
        clipnorm=1.0, iterations=50000, verbose_interval=5000, random_seed=42,
        lambda_defense=lambda_defense, lambda_supervision=lambda_supervision, alpha=2.0,
        max_match_loss=max_match_loss, max_defense_loss=max_defense_loss, max_supervision_loss=max_supervision_loss
    )
    if final_losses:
        final_team_positions.to_csv("trained_team_positions.csv", index=False)
        with open("best_rank_scale.txt", "w") as f:
            f.write(str(final_rank_scale))
        print(f"最终训练损失: {final_losses[-1]:.4f}, 最佳 rank_scale: {final_rank_scale:.4f}")

    # 第4步：可视化
    print("\n===== 按联赛可视化球队演变 =====")
    visualize_team_evolution_by_league_static(final_team_positions, seasons_order=seasons)

    print("\n完成！")

加载数据: england - premier-league - 2013-to-2014
加载数据: england - premier-league - 2014-to-2015
加载数据: england - premier-league - 2015-to-2016
加载数据: england - premier-league - 2016-to-2017
加载数据: england - premier-league - 2017-to-2018
加载数据: england - premier-league - 2018-to-2019
加载数据: england - premier-league - 2020-to-2021
加载数据: england - premier-league - 2021-to-2022
加载数据: england - premier-league - 2022-to-2023
加载数据: england - premier-league - 2023-to-2024
加载数据: germany - bundesliga - 2013-to-2014
加载数据: germany - bundesliga - 2014-to-2015
加载数据: germany - bundesliga - 2015-to-2016
加载数据: germany - bundesliga - 2016-to-2017
加载数据: germany - bundesliga - 2017-to-2018
加载数据: germany - bundesliga - 2018-to-2019
加载数据: germany - bundesliga - 2020-to-2021
加载数据: germany - bundesliga - 2021-to-2022
加载数据: germany - bundesliga - 2022-to-2023
加载数据: germany - bundesliga - 2023-to-2024
加载数据: spain - la-liga - 2013-to-2014
加载数据: spain - la-liga - 2014-to-2015
加载数据: spain - la-liga - 2015-to-2016
加载数据: spa

  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()


保存 premier-league 的静态演变图 -> /Users/peixuanma/Downloads/Output_Graphs/premier-league_evolution_static.png


  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()


保存 bundesliga 的静态演变图 -> /Users/peixuanma/Downloads/Output_Graphs/bundesliga_evolution_static.png
保存 la-liga 的静态演变图 -> /Users/peixuanma/Downloads/Output_Graphs/la-liga_evolution_static.png


  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 ligue-1 的静态演变图 -> /Users/peixuanma/Downloads/Output_Graphs/ligue-1_evolution_static.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 serie-a 的静态演变图 -> /Users/peixuanma/Downloads/Output_Graphs/serie-a_evolution_static.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 eredivisie 的静态演变图 -> /Users/peixuanma/Downloads/Output_Graphs/eredivisie_evolution_static.png
ligapro 中没有球队连续出现在所有赛季，使用所有球队。


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 ligapro 的静态演变图 -> /Users/peixuanma/Downloads/Output_Graphs/ligapro_evolution_static.png
保存 superliga 的静态演变图 -> /Users/peixuanma/Downloads/Output_Graphs/superliga_evolution_static.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 championship 的静态演变图 -> /Users/peixuanma/Downloads/Output_Graphs/championship_evolution_static.png
保存 liga-nos 的静态演变图 -> /Users/peixuanma/Downloads/Output_Graphs/liga-nos_evolution_static.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 serie-b 的静态演变图 -> /Users/peixuanma/Downloads/Output_Graphs/serie-b_evolution_static.png
保存 2-bundesliga 的静态演变图 -> /Users/peixuanma/Downloads/Output_Graphs/2-bundesliga_evolution_static.png

完成！


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
