In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
import sklearn

print(f"scikit-learn 版本: {sklearn.__version__}")

# ====================== 计算防守统计数据 ======================
def compute_defensive_stats(match_df, team_positions_df):
    """计算球队的防守统计数据，包括6种比率、总失球数和平均失球数"""
    epsilon = 1e-8  # 提高数值稳定性
    team_stats = {}
    for team in team_positions_df['team_name'].unique():
        team_stats[team] = {
            'total_goals_conceded': 0, 'ratio1_list': [], 'ratio2_list': [], 'ratio3_list': [],
            'ratio4_list': [], 'ratio5_list': [], 'ratio6_list': [], 'num_matches': 0
        }

    # 主场比赛统计
    for idx, row in match_df.iterrows():
        home_team, away_team = row['home_team_name'], row['away_team_name']
        if home_team not in team_stats or away_team not in team_stats:
            print(f"警告: 主队 {home_team} 或客队 {away_team} 未找到，跳过比赛 {idx}")
            continue
        if row['Pre-Match PPG (Away)'] > 0:
            team_stats[home_team]['ratio1_list'].append(
                row['away_team_goal_count'] / (row['Pre-Match PPG (Away)'] + epsilon))
        if row['away_team_corner_count'] > 0:
            team_stats[home_team]['ratio2_list'].append(
                row['away_team_goal_count'] / (row['away_team_corner_count'] + epsilon))
        denominator = row['home_team_yellow_cards'] + row['home_team_red_cards'] + row['home_team_fouls'] + epsilon
        team_stats[home_team]['ratio3_list'].append(row['away_team_goal_count'] / denominator)
        if row['team_b_xg'] > 0:
            team_stats[home_team]['ratio4_list'].append(row['away_team_goal_count'] / (row['team_b_xg'] + epsilon))
        shots_total = row['away_team_shots_on_target'] + row['away_team_shots_off_target'] + epsilon
        team_stats[home_team]['ratio5_list'].append(row['away_team_goal_count'] / shots_total)
        if row['away_team_possession'] > 0:
            team_stats[home_team]['ratio6_list'].append(
                row['away_team_goal_count'] / (row['away_team_possession'] + epsilon))
        team_stats[home_team]['total_goals_conceded'] += row['away_team_goal_count']
        team_stats[home_team]['num_matches'] += 1  # 记录主场比赛场次

    # 客场比赛统计w
    for idx, row in match_df.iterrows():
        away_team = row['away_team_name']
        if away_team not in team_stats:
            print(f"警告: 客队 {away_team} 未找到，跳过比赛 {idx}")
            continue
        team_stats[away_team]['total_goals_conceded'] += row['home_team_goal_count']
        team_stats[away_team]['num_matches'] += 1  # 记录客场比赛场次

    # 汇总数据
    data = []
    for team, stats in team_stats.items():
        num_matches = stats['num_matches']
        average_goals_conceded = stats['total_goals_conceded'] / num_matches if num_matches > 0 else 0
        data.append({
            'team_name': team,
            'ratio1': np.mean(stats['ratio1_list']) if stats['ratio1_list'] else 0,
            'ratio2': np.mean(stats['ratio2_list']) if stats['ratio2_list'] else 0,
            'ratio3': np.mean(stats['ratio3_list']) if stats['ratio3_list'] else 0,
            'ratio4': np.mean(stats['ratio4_list']) if stats['ratio4_list'] else 0,
            'ratio5': np.mean(stats['ratio5_list']) if stats['ratio5_list'] else 0,
            'ratio6': np.mean(stats['ratio6_list']) if stats['ratio6_list'] else 0,
            'total_goals_conceded': stats['total_goals_conceded'],
            'average_goals_conceded': average_goals_conceded
        })

    return pd.DataFrame(data)

# ====================== 计算进攻统计数据 ======================
def compute_offensive_stats(match_df, team_positions_df):
    """计算球队的进攻统计数据，包括6种比率、总进球数和平均进球数"""
    epsilon = 1e-8  # 提高数值稳定性
    team_stats = {}
    for team in team_positions_df['team_name'].unique():
        team_stats[team] = {
            'total_goals_scored': 0, 'off_ratio1_list': [], 'off_ratio2_list': [], 'off_ratio3_list': [],
            'off_ratio4_list': [], 'off_ratio5_list': [], 'off_ratio6_list': [], 'num_matches': 0
        }

    # 主场比赛统计
    for idx, row in match_df.iterrows():
        home_team, away_team = row['home_team_name'], row['away_team_name']
        if home_team not in team_stats or away_team not in team_stats:
            continue
        if row['Pre-Match PPG (Home)'] > 0:
            team_stats[home_team]['off_ratio1_list'].append(
                row['home_team_goal_count'] / (row['Pre-Match PPG (Home)'] + epsilon))
        if row['home_team_corner_count'] > 0:
            team_stats[home_team]['off_ratio2_list'].append(
                row['home_team_goal_count'] / (row['home_team_corner_count'] + epsilon))
        denominator = row['away_team_yellow_cards'] + row['away_team_red_cards'] + row['away_team_fouls'] + epsilon
        team_stats[home_team]['off_ratio3_list'].append(row['home_team_goal_count'] / denominator)
        if row['team_a_xg'] > 0:
            team_stats[home_team]['off_ratio4_list'].append(row['home_team_goal_count'] / (row['team_a_xg'] + epsilon))
        shots_total = row['home_team_shots_on_target'] + row['home_team_shots_off_target'] + epsilon
        team_stats[home_team]['off_ratio5_list'].append(row['home_team_goal_count'] / shots_total)
        if row['home_team_possession'] > 0:
            team_stats[home_team]['off_ratio6_list'].append(
                row['home_team_goal_count'] / (row['home_team_possession'] + epsilon))
        team_stats[home_team]['total_goals_scored'] += row['home_team_goal_count']
        team_stats[home_team]['num_matches'] += 1

    # 客场比赛统计
    for idx, row in match_df.iterrows():
        away_team = row['away_team_name']
        if away_team not in team_stats:
            continue
        team_stats[away_team]['total_goals_scored'] += row['away_team_goal_count']
        team_stats[away_team]['num_matches'] += 1

    # 汇总数据
    data = []
    for team, stats in team_stats.items():
        num_matches = stats['num_matches']
        average_goals_scored = stats['total_goals_scored'] / num_matches if num_matches > 0 else 0
        data.append({
            'team_name': team,
            'off_ratio1': np.mean(stats['off_ratio1_list']) if stats['off_ratio1_list'] else 0,
            'off_ratio2': np.mean(stats['off_ratio2_list']) if stats['off_ratio2_list'] else 0,
            'off_ratio3': np.mean(stats['off_ratio3_list']) if stats['off_ratio3_list'] else 0,
            'off_ratio4': np.mean(stats['off_ratio4_list']) if stats['off_ratio4_list'] else 0,
            'off_ratio5': np.mean(stats['off_ratio5_list']) if stats['off_ratio5_list'] else 0,
            'off_ratio6': np.mean(stats['off_ratio6_list']) if stats['off_ratio6_list'] else 0,
            'total_goals_scored': stats['total_goals_scored'],
            'average_goals_scored': average_goals_scored
        })

    return pd.DataFrame(data)

# ====================== ELO评分算法实现 ======================
def initialize_elo_scores(team_positions_df):
    """初始化球队ELO分数"""
    teams = team_positions_df['team_name'].unique().tolist()
    team_elo = {team: 1500 for team in teams}
    for team in teams:
        team_data = team_positions_df[team_positions_df['team_name'] == team]
        rank = team_data['points_per_game'].rank().iloc[0]
        team_elo[team] += (20 * (len(teams) - rank))
    return team_elo

def update_elo_scores(elo_scores, home_team, away_team, home_score, away_score, K=30):
    """更新ELO分数"""
    home_elo, away_elo = elo_scores[home_team], elo_scores[away_team]
    expected_home = 1 / (1 + 10 ** ((away_elo - home_elo) / 400))
    expected_away = 1 / (1 + 10 ** ((home_elo - away_elo) / 400))

    if home_score > away_score:
        elo_scores[home_team] += K * (1 - expected_home)
        elo_scores[away_team] += K * (0 - expected_away)
    elif home_score < away_score:
        elo_scores[home_team] += K * (0 - expected_home)
        elo_scores[away_team] += K * (1 - expected_away)
    else:
        elo_scores[home_team] += K * (0.5 - expected_home)
        elo_scores[away_team] += K * (0.5 - expected_away)
    return elo_scores

# ====================== 数据加载函数（含交叉验证） ======================
def load_all_league_data(base_path, leagues, seasons):
    """加载所有联赛数据并进行预处理"""
    all_team_positions, all_match_positions = [], []
    for country_name, league_name in leagues:
        for season in seasons:
            print(f"加载数据: {country_name} - {league_name} - {season}")
            team_file = os.path.join(base_path, f"{country_name}-{league_name}-teams-{season}-stats.csv")
            match_file = os.path.join(base_path, f"{country_name}-{league_name}-matches-{season}-stats.csv")
            if not os.path.exists(team_file) or not os.path.exists(match_file):
                print(f"警告: {country_name} - {league_name} - {season} 文件缺失")
                continue

            team_df = pd.read_csv(team_file)
            match_df = pd.read_csv(match_file)

            # 统一球队名称格式
            team_df['team_name'] = team_df.get('common_name', team_df.get('team_name', None)).str.strip().str.lower()
            match_df['home_team_name'] = match_df['home_team_name'].str.strip().str.lower()
            match_df['away_team_name'] = match_df['away_team_name'].str.strip().str.lower()

            team_names = team_df['team_name'].unique()
            original_match_count = len(match_df)
            match_df = match_df[
                match_df['home_team_name'].isin(team_names) & match_df['away_team_name'].isin(team_names)]
            if len(match_df) < original_match_count:
                print(f"警告: 过滤了 {original_match_count - len(match_df)} 场比赛")

            if match_df.empty:
                print("警告: 无有效比赛数据")
                continue

            # 计算防守和进攻统计
            defensive_stats_df = compute_defensive_stats(match_df, team_df)
            offensive_stats_df = compute_offensive_stats(match_df, team_df)
            if defensive_stats_df.empty or offensive_stats_df.empty:
                print("警告: 未计算出防守或进攻统计")
                continue

            team_df = team_df.merge(defensive_stats_df, on='team_name', how='left')
            team_df = team_df.merge(offensive_stats_df, on='team_name', how='left')

            for col in ['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'total_goals_conceded', 'average_goals_conceded',
                        'off_ratio1', 'off_ratio2', 'off_ratio3', 'off_ratio4', 'off_ratio5', 'off_ratio6', 'total_goals_scored', 'average_goals_scored']:
                if col not in team_df.columns:
                    print(f"警告: {col} 列缺失，设为0")
                    team_df[col] = 0

            # 归一化防守得分
            if 'total_goals_conceded' in team_df.columns and team_df['total_goals_conceded'].max() != team_df['total_goals_conceded'].min():
                team_df['normalized_defense_score'] = (team_df['total_goals_conceded'].max() - team_df['total_goals_conceded']) / (
                    team_df['total_goals_conceded'].max() - team_df['total_goals_conceded'].min() + 1e-8)
            else:
                team_df['normalized_defense_score'] = 0

            # PCA降维（防守和进攻数据）
            defensive_columns = ['total_goals_conceded', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'average_goals_conceded']
            offensive_columns = ['total_goals_scored', 'off_ratio1', 'off_ratio2', 'off_ratio3', 'off_ratio4', 'off_ratio5', 'off_ratio6', 'average_goals_scored']
            team_df_defensive = team_df[defensive_columns].fillna(0)
            kf = KFold(n_splits=5, shuffle=True, random_state=42)
            pca_results = []
            for train_idx, _ in kf.split(team_df_defensive):
                train_data = team_df_defensive.iloc[train_idx]
                scaler = StandardScaler()
                X_scaled = scaler.fit_transform(train_data)
                pca = PCA(n_components=2)
                X_pca = pca.fit_transform(X_scaled)
                pca_results.append(X_pca)
            X_pca_avg = np.mean([np.pad(r, ((0, len(team_df_defensive) - len(r)), (0, 0)), 'constant') for r in pca_results], axis=0)

            team_positions = pd.DataFrame({
                'team_name': team_df['team_name'], 'PC1': X_pca_avg[:, 0], 'PC2': X_pca_avg[:, 1],
                'points_per_game': team_df['points_per_game'], 'league': league_name, 'season': season,
                'ratio1': team_df['ratio1'], 'ratio2': team_df['ratio2'], 'ratio3': team_df['ratio3'],
                'ratio4': team_df['ratio4'], 'ratio5': team_df['ratio5'], 'ratio6': team_df['ratio6'],
                'off_ratio1': team_df['off_ratio1'], 'off_ratio2': team_df['off_ratio2'], 'off_ratio3': team_df['off_ratio3'],
                'off_ratio4': team_df['off_ratio4'], 'off_ratio5': team_df['off_ratio5'], 'off_ratio6': team_df['off_ratio6'],
                'normalized_defense_score': team_df['normalized_defense_score'],
                'total_goals_conceded': team_df['total_goals_conceded'],
                'average_goals_conceded': team_df['average_goals_conceded'],
                'total_goals_scored': team_df['total_goals_scored'],
                'average_goals_scored': team_df['average_goals_scored']
            })
            team_positions['team_season'] = league_name + '_' + team_positions['team_name'] + '_' + team_positions['season']
            all_team_positions.append(team_positions)

            # 比赛数据PCA
            match_df = match_df.drop(columns=['timestamp', 'date_GMT', 'status', 'attendance', 'referee', 'stadium_name', 'Game Week'], errors='ignore')
            numeric_cols = match_df.select_dtypes(include=[np.number]).columns
            match_df[numeric_cols] = match_df[numeric_cols].fillna(match_df[numeric_cols].mean())
            pca_match_results = []
            for train_idx, _ in kf.split(match_df[numeric_cols]):
                train_data = match_df[numeric_cols].iloc[train_idx]
                scaler = StandardScaler()
                X_scaled = scaler.fit_transform(train_data)
                pca = PCA(n_components=2)
                X_pca = pca.fit_transform(X_scaled)
                pca_match_results.append(X_pca)
            X_pca_match_avg = np.mean([np.pad(r, ((0, len(match_df) - len(r)), (0, 0)), 'constant') for r in pca_match_results], axis=0)

            match_positions = pd.DataFrame({
                'home_team_name': match_df['home_team_name'], 'away_team_name': match_df['away_team_name'],
                'PC1': X_pca_match_avg[:, 0], 'PC2': X_pca_match_avg[:, 1], 'league': league_name, 'season': season
            })
            all_match_positions.append(match_positions)

    return pd.concat(all_team_positions, ignore_index=True), pd.concat(all_match_positions, ignore_index=True)

# ====================== 损失函数 ======================
def compute_total_loss(positions, match_home_idx, match_away_idx, match_PC1, match_PC2, points_per_game, rank_scale,
                       def_ratios, w_def, off_ratios, w_off, normalized_defense_score, lambda_defense, lambda_offense,
                       lambda_supervision, lambda_reg, elo_scores):
    """计算总损失，包括匹配损失、防守损失、进攻损失和监督损失"""
    epsilon = 1e-8
    if tf.shape(match_home_idx)[0] == 0:
        return tf.constant(0.0, dtype=tf.float32)

    # 匹配损失
    home_pos = tf.gather(positions, match_home_idx)
    away_pos = tf.gather(positions, match_away_idx)
    match_points = tf.stack([match_PC1, match_PC2], axis=1)
    dist_home = tf.norm(home_pos - match_points + epsilon, axis=1)
    dist_away = tf.norm(away_pos - match_points + epsilon, axis=1)
    all_distances = tf.concat([dist_home, dist_away], axis=0)
    dist_range = tf.reduce_max(all_distances) - tf.reduce_min(all_distances) + epsilon
    dist_home_norm = (dist_home - tf.reduce_min(all_distances)) / dist_range
    dist_away_norm = (dist_away - tf.reduce_min(all_distances)) / dist_range

    home_elo = tf.gather(elo_scores, match_home_idx)
    away_elo = tf.gather(elo_scores, match_away_idx)
    elo_diff = tf.abs(home_elo - away_elo)
    weight = 1.0 / (1.0 + elo_diff * rank_scale + epsilon)
    match_loss = tf.reduce_mean(weight * (dist_home_norm + dist_away_norm))

    # 防守损失
    w_def = tf.abs(w_def)
    defense_target = -tf.reduce_sum(w_def * def_ratios, axis=1)
    defense_loss = tf.reduce_mean(tf.square(positions[:, 1] - defense_target))

    # 进攻损失
    w_off = tf.abs(w_off)
    offense_target = tf.reduce_sum(w_off * off_ratios, axis=1)
    offense_loss = tf.reduce_mean(tf.square(positions[:, 0] - offense_target))

    # 监督损失
    supervision_loss = tf.reduce_mean(tf.square(defense_target - normalized_defense_score))

    # 正则化
    regularization_loss = lambda_reg * (tf.reduce_sum(tf.square(w_def)) + tf.reduce_sum(tf.square(w_off)))

    total_loss = (match_loss + lambda_defense * defense_loss + lambda_offense * offense_loss +
                  lambda_supervision * supervision_loss + regularization_loss)
    return tf.where(tf.math.is_nan(total_loss) | tf.math.is_inf(total_loss), 0.0, total_loss)

# ====================== Adam优化函数 ======================
def adam_optimize_positions(team_positions_df, match_positions_df, initial_lr=0.005, decay_steps=20000, decay_rate=0.9,
                            clipnorm=0.5, iterations=30000, verbose_interval=1000, random_seed=42, lambda_defense=0.1,
                            lambda_offense=0.1, lambda_supervision=0.1, lambda_reg=0.01, w_def=None, w_off=None):
    """使用Adam优化球队位置，移除早停机制以完整训练"""
    team_seasons = team_positions_df['team_season'].unique()
    team_season_to_idx = {t: i for i, t in enumerate(team_seasons)}

    elo_scores_dict = initialize_elo_scores(team_positions_df)
    team_names = [t.split('_')[1] for t in team_seasons]
    elo_scores = tf.convert_to_tensor([elo_scores_dict[name] for name in team_names], dtype=tf.float32)

    team_positions_df = team_positions_df.set_index('team_season')
    init_positions = team_positions_df[['PC1', 'PC2']].values
    def_ratios = team_positions_df[['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'total_goals_conceded', 'average_goals_conceded']].values
    off_ratios = team_positions_df[['off_ratio1', 'off_ratio2', 'off_ratio3', 'off_ratio4', 'off_ratio5', 'off_ratio6', 'total_goals_scored', 'average_goals_scored']].values

    scaler_pos = StandardScaler()
    scaler_def = StandardScaler()
    scaler_off = StandardScaler()
    positions_scaled = scaler_pos.fit_transform(init_positions)
    def_ratios_scaled = scaler_def.fit_transform(def_ratios)
    off_ratios_scaled = scaler_off.fit_transform(off_ratios)

    positions = tf.Variable(positions_scaled, dtype=tf.float32)
    def_ratios = tf.constant(def_ratios_scaled, dtype=tf.float32)
    off_ratios = tf.constant(off_ratios_scaled, dtype=tf.float32)
    normalized_defense_score = tf.constant(team_positions_df['normalized_defense_score'].values, dtype=tf.float32)
    points_per_game = tf.constant(team_positions_df['points_per_game'].values, dtype=tf.float32)

    match_array = [
        [team_season_to_idx[f"{row['league']}_{row['home_team_name']}_{row['season']}"],
         team_season_to_idx[f"{row['league']}_{row['away_team_name']}_{row['season']}"], row['PC1'], row['PC2']]
        for _, row in match_positions_df.iterrows()
        if f"{row['league']}_{row['home_team_name']}_{row['season']}" in team_season_to_idx and
           f"{row['league']}_{row['away_team_name']}_{row['season']}" in team_season_to_idx
    ]
    match_array = np.array(match_array, dtype=np.float32)
    if len(match_array) == 0:
        print("警告: 无有效比赛数据")
        return [], team_positions_df.reset_index(), None, None, None

    match_home_idx = tf.constant(match_array[:, 0], dtype=tf.int32)
    match_away_idx = tf.constant(match_array[:, 1], dtype=tf.int32)
    match_PC1 = tf.constant(match_array[:, 2], dtype=tf.float32)
    match_PC2 = tf.constant(match_array[:, 3], dtype=tf.float32)

    tf.random.set_seed(random_seed)
    rank_scale = tf.Variable(1.0, dtype=tf.float32)
    w_def = tf.Variable(np.abs(w_def) if w_def is not None else np.ones(8, dtype=np.float32) / 8, dtype=tf.float32)
    w_off = tf.Variable(np.abs(w_off) if w_off is not None else np.ones(8, dtype=np.float32) / 8, dtype=tf.float32)
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_lr, decay_steps, decay_rate)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=clipnorm)

    losses, best_loss = [], float('inf')
    best_positions, best_rank_scale, best_w_def, best_w_off = positions.numpy().copy(), 1.0, w_def.numpy().copy(), w_off.numpy().copy()

    for i in range(iterations):
        with tf.GradientTape() as tape:
            loss = compute_total_loss(positions, match_home_idx, match_away_idx, match_PC1, match_PC2, points_per_game,
                                      rank_scale, def_ratios, w_def, off_ratios, w_off, normalized_defense_score,
                                      lambda_defense, lambda_offense, lambda_supervision, lambda_reg, elo_scores)
        grads = tape.gradient(loss, [positions, rank_scale, w_def, w_off])
        if any(g is None for g in grads):
            print(f"警告: 迭代 {i + 1} 梯度为None")
            continue
        optimizer.apply_gradients(zip(grads, [positions, rank_scale, w_def, w_off]))
        loss_val = float(loss.numpy())
        losses.append(loss_val)

        # 记录最佳模型但不中断训练
        if loss_val < best_loss:
            best_loss = loss_val
            best_positions, best_rank_scale, best_w_def, best_w_off = (positions.numpy().copy(), float(rank_scale.numpy()),
                                                                       w_def.numpy().copy(), w_off.numpy().copy())

        if (i + 1) % verbose_interval == 0:
            print(f"迭代 {i + 1}/{iterations}, 损失 = {loss_val:.4f}, rank_scale = {rank_scale.numpy():.4f}, 最佳损失 = {best_loss:.4f}")

    # 使用最佳模型更新最终结果
    positions.assign(best_positions)
    rank_scale.assign(best_rank_scale)
    w_def.assign(best_w_def)
    w_off.assign(best_w_off)
    final_pos = scaler_pos.inverse_transform(positions.numpy())
    for idx, team_season in enumerate(team_seasons):
        team_positions_df.loc[team_season, 'PC1'] = final_pos[idx, 0]
        team_positions_df.loc[team_season, 'PC2'] = final_pos[idx, 1]
    team_positions_df = team_positions_df.reset_index()
    return losses, team_positions_df, best_rank_scale, best_w_def, best_w_off

# ====================== 随机搜索超参数调整 ======================
def random_search_hyperparameters(team_positions, match_positions, w_def, w_off, n_iter=10, random_state=42):
    """随机搜索最佳超参数"""
    np.random.seed(random_state)
    best_loss, best_params = float('inf'), None

    for _ in range(n_iter):
        params = {
            'lambda_defense': np.random.uniform(0.01, 0.2),
            'lambda_offense': np.random.uniform(0.01, 0.2),
            'lambda_supervision': np.random.uniform(0.01, 0.2),
            'lambda_reg': np.random.uniform(0.001, 0.02)
        }
        print(f"\n随机搜索: {params}")

        losses, _, _, _, _ = adam_optimize_positions(
            team_positions.copy(), match_positions.copy(), w_def=w_def, w_off=w_off, **params
        )
        if losses and losses[-1] < best_loss:
            best_loss = losses[-1]
            best_params = params

    print(f"\n最佳超参数: {best_params}, 最佳损失: {best_loss:.4f}")
    return best_params

# ====================== 可视化函数 ======================
def visualize_team_evolution_by_league_static(team_positions_df, seasons_order):
    """按联赛可视化球队演变"""
    output_dir = "/Users/peixuanma/Downloads/Output_Graphs"
    os.makedirs(output_dir, exist_ok=True)

    for league in team_positions_df['league'].unique():
        league_df = team_positions_df[team_positions_df['league'] == league].copy()
        valid_teams = league_df.groupby("team_name").filter(lambda x: x['season'].nunique() == len(seasons_order))['team_name'].unique()
        if not valid_teams.size:
            valid_teams = league_df['team_name'].unique()
        valid_df = league_df[league_df['team_name'].isin(valid_teams)].sort_values(['team_name', 'season'])

        plt.figure(figsize=(10, 8))
        plt.title(f"{league} - 球队演变 (进攻-防守)")
        plt.xlabel("PC1 (进攻)")
        plt.ylabel("PC2 (防守)")
        plt.grid(True)

        for team in valid_df['team_name'].unique():
            sub = valid_df[valid_df['team_name'] == team]
            plt.plot(sub['PC1'], sub['PC2'], marker='o', label=team)
            for _, row in sub.iterrows():
                plt.text(row['PC1'], row['PC2'], row['season'], fontsize=8, ha='right')

        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        filename = os.path.join(output_dir, f"{league}_evolution.png")
        plt.savefig(filename, dpi=150)
        plt.close()
        print(f"保存 {league} 图 -> {filename}")

# ====================== 主函数 ======================
if __name__ == "__main__":
    base_path = '/Users/peixuanma/Downloads/data1'
    leagues = [
        ("england", "premier-league"), ("germany", "bundesliga"), ("spain", "la-liga"),
        ("france", "ligue-1"), ("france", "ligue-2"), ("italy", "serie-a"), ("netherlands", "eredivisie"),
        ("portugal", "ligapro"), ("denmark", "superliga"), ("england", "championship"), ("spain", "segunda-division"),
        ("switzerland", "super-league"), ("portugal", "liga-nos"), ("italy", "serie-b"), ("germany", "2-bundesliga"),
        ("scotland", "premiership"), ("belgium", "pro-league"), ("austria", "bundesliga"),
    ]
    seasons = [
        "2013-to-2014", "2014-to-2015", "2015-to-2016", "2016-to-2017", "2017-to-2018",
        "2018-to-2019", "2019-to-2020", "2020-to-2021", "2021-to-2022", "2022-to-2023", "2023-to-2024"
    ]

    # 手动输入防守和进攻权重（示例值）
    w_def = np.array([
        0.12714323,  # ratio1
        0.11415731,  # ratio2
        0.04532705,  # ratio3
        0.23075414,  # ratio4
        0.03240472,  # ratio5
        0.14205644,  # ratio6
        0.13323908,  # total_goals_conceded
        0.17491804   # average_goals_conceded
    ], dtype=np.float32)

    w_off = np.array([
        0.07438711,
        0.07787083,
        0.08495551,
        0.22353454,
        0.08152269,
        0.08466186,
        0.16022788,
        0.21283953 # average_goals_scored
    ], dtype=np.float32)

    # 加载数据
    print("===== 加载数据 =====")
    all_team_positions, all_match_positions = load_all_league_data(base_path, leagues, seasons)

    # 随机搜索超参数
    print("\n===== 随机搜索超参数 =====")
    best_params = random_search_hyperparameters(all_team_positions, all_match_positions, w_def, w_off)

    # 最终训练
    print("\n===== 最终训练 =====")
    final_losses, final_team_positions, final_rank_scale, final_w_def, final_w_off = adam_optimize_positions(
        all_team_positions.copy(), all_match_positions.copy(), w_def=w_def, w_off=w_off, **best_params
    )
    if final_losses:
        final_team_positions.to_csv("trained_team_positions.csv", index=False)
        print(f"最终损失: {final_losses[-1]:.4f}")
        print(f"最终防守权重: {final_w_def}")
        print(f"最终进攻权重: {final_w_off}")

    # 可视化
    print("\n===== 可视化 =====")
    visualize_team_evolution_by_league_static(final_team_positions, seasons)

    print("\n完成！")

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

scikit-learn 版本: 1.4.2
===== 加载数据 =====
加载数据: england - premier-league - 2013-to-2014
加载数据: england - premier-league - 2014-to-2015
加载数据: england - premier-league - 2015-to-2016
加载数据: england - premier-league - 2016-to-2017
加载数据: england - premier-league - 2017-to-2018
加载数据: england - premier-league - 2018-to-2019
加载数据: england - premier-league - 2019-to-2020
加载数据: england - premier-league - 2020-to-2021
加载数据: england - premier-league - 2021-to-2022
加载数据: england - premier-league - 2022-to-2023
加载数据: england - premier-league - 2023-to-2024
加载数据: germany - bundesliga - 2013-to-2014
加载数据: germany - bundesliga - 2014-to-2015
加载数据: germany - bundesliga - 2015-to-2016
加载数据: germany - bundesliga - 2016-to-2017
加载数据: germany - bundesliga - 2017-to-2018
加载数据: germany - bundesliga - 2018-to-2019
加载数据: germany - bundesliga - 2019-to-2020
加载数据: germany - bundesliga - 2020-to-2021
加载数据: germany - bundesliga - 2021-to-2022
加载数据: germany - bundesliga - 2022-to-2023
加载数据: germany - bundesliga - 2023-

  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 premier-league 图 -> /Users/peixuanma/Downloads/Output_Graphs/premier-league_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 bundesliga 图 -> /Users/peixuanma/Downloads/Output_Graphs/bundesliga_evolution.png
保存 la-liga 图 -> /Users/peixuanma/Downloads/Output_Graphs/la-liga_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 ligue-1 图 -> /Users/peixuanma/Downloads/Output_Graphs/ligue-1_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 ligue-2 图 -> /Users/peixuanma/Downloads/Output_Graphs/ligue-2_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 serie-a 图 -> /Users/peixuanma/Downloads/Output_Graphs/serie-a_evolution.png
保存 eredivisie 图 -> /Users/peixuanma/Downloads/Output_Graphs/eredivisie_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 ligapro 图 -> /Users/peixuanma/Downloads/Output_Graphs/ligapro_evolution.png
保存 superliga 图 -> /Users/peixuanma/Downloads/Output_Graphs/superliga_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 championship 图 -> /Users/peixuanma/Downloads/Output_Graphs/championship_evolution.png
保存 segunda-division 图 -> /Users/peixuanma/Downloads/Output_Graphs/segunda-division_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 super-league 图 -> /Users/peixuanma/Downloads/Output_Graphs/super-league_evolution.png
保存 liga-nos 图 -> /Users/peixuanma/Downloads/Output_Graphs/liga-nos_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 serie-b 图 -> /Users/peixuanma/Downloads/Output_Graphs/serie-b_evolution.png
保存 2-bundesliga 图 -> /Users/peixuanma/Downloads/Output_Graphs/2-bundesliga_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 premiership 图 -> /Users/peixuanma/Downloads/Output_Graphs/premiership_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 pro-league 图 -> /Users/peixuanma/Downloads/Output_Graphs/pro-league_evolution.png

完成！


In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
import sklearn

print(f"scikit-learn 版本: {sklearn.__version__}")

# ====================== 计算防守统计数据 ======================
def compute_defensive_stats(match_df, team_positions_df):
    """计算球队的防守统计数据，包括6种比率、总失球数和平均失球数"""
    epsilon = 1e-8  # 提高数值稳定性
    team_stats = {}
    for team in team_positions_df['team_name'].unique():
        team_stats[team] = {
            'total_goals_conceded': 0, 'ratio1_list': [], 'ratio2_list': [], 'ratio3_list': [],
            'ratio4_list': [], 'ratio5_list': [], 'ratio6_list': [], 'num_matches': 0
        }

    # 主场比赛统计
    for idx, row in match_df.iterrows():
        home_team, away_team = row['home_team_name'], row['away_team_name']
        if home_team not in team_stats or away_team not in team_stats:
            print(f"警告: 主队 {home_team} 或客队 {away_team} 未找到，跳过比赛 {idx}")
            continue
        if row['Pre-Match PPG (Away)'] > 0:
            team_stats[home_team]['ratio1_list'].append(
                row['away_team_goal_count'] / (row['Pre-Match PPG (Away)'] + epsilon))
        if row['away_team_corner_count'] > 0:
            team_stats[home_team]['ratio2_list'].append(
                row['away_team_goal_count'] / (row['away_team_corner_count'] + epsilon))
        denominator = row['home_team_yellow_cards'] + row['home_team_red_cards'] + row['home_team_fouls'] + epsilon
        team_stats[home_team]['ratio3_list'].append(row['away_team_goal_count'] / denominator)
        if row['team_b_xg'] > 0:
            team_stats[home_team]['ratio4_list'].append(row['away_team_goal_count'] / (row['team_b_xg'] + epsilon))
        shots_total = row['away_team_shots_on_target'] + row['away_team_shots_off_target'] + epsilon
        team_stats[home_team]['ratio5_list'].append(row['away_team_goal_count'] / shots_total)
        if row['away_team_possession'] > 0:
            team_stats[home_team]['ratio6_list'].append(
                row['away_team_goal_count'] / (row['away_team_possession'] + epsilon))
        team_stats[home_team]['total_goals_conceded'] += row['away_team_goal_count']
        team_stats[home_team]['num_matches'] += 1  # 记录主场比赛场次

    # 客场比赛统计
    for idx, row in match_df.iterrows():
        away_team = row['away_team_name']
        if away_team not in team_stats:
            print(f"警告: 客队 {away_team} 未找到，跳过比赛 {idx}")
            continue
        team_stats[away_team]['total_goals_conceded'] += row['home_team_goal_count']
        team_stats[away_team]['num_matches'] += 1  # 记录客场比赛场次

    # 汇总数据
    data = []
    for team, stats in team_stats.items():
        num_matches = stats['num_matches']
        average_goals_conceded = stats['total_goals_conceded'] / num_matches if num_matches > 0 else 0
        data.append({
            'team_name': team,
            'ratio1': np.mean(stats['ratio1_list']) if stats['ratio1_list'] else 0,
            'ratio2': np.mean(stats['ratio2_list']) if stats['ratio2_list'] else 0,
            'ratio3': np.mean(stats['ratio3_list']) if stats['ratio3_list'] else 0,
            'ratio4': np.mean(stats['ratio4_list']) if stats['ratio4_list'] else 0,
            'ratio5': np.mean(stats['ratio5_list']) if stats['ratio5_list'] else 0,
            'ratio6': np.mean(stats['ratio6_list']) if stats['ratio6_list'] else 0,
            'total_goals_conceded': stats['total_goals_conceded'],
            'average_goals_conceded': average_goals_conceded
        })

    return pd.DataFrame(data)

# ====================== 计算进攻统计数据 ======================
def compute_offensive_stats(match_df, team_positions_df):
    """计算球队的进攻统计数据，包括6种比率、总进球数和平均进球数"""
    epsilon = 1e-8  # 提高数值稳定性
    team_stats = {}
    for team in team_positions_df['team_name'].unique():
        team_stats[team] = {
            'total_goals_scored': 0, 'off_ratio1_list': [], 'off_ratio2_list': [], 'off_ratio3_list': [],
            'off_ratio4_list': [], 'off_ratio5_list': [], 'off_ratio6_list': [], 'num_matches': 0
        }

    # 主场比赛统计
    for idx, row in match_df.iterrows():
        home_team, away_team = row['home_team_name'], row['away_team_name']
        if home_team not in team_stats or away_team not in team_stats:
            continue
        if row['Pre-Match PPG (Home)'] > 0:
            team_stats[home_team]['off_ratio1_list'].append(
                row['home_team_goal_count'] / (row['Pre-Match PPG (Home)'] + epsilon))
        if row['home_team_corner_count'] > 0:
            team_stats[home_team]['off_ratio2_list'].append(
                row['home_team_goal_count'] / (row['home_team_corner_count'] + epsilon))
        denominator = row['away_team_yellow_cards'] + row['away_team_red_cards'] + row['away_team_fouls'] + epsilon
        team_stats[home_team]['off_ratio3_list'].append(row['home_team_goal_count'] / denominator)
        if row['team_a_xg'] > 0:
            team_stats[home_team]['off_ratio4_list'].append(row['home_team_goal_count'] / (row['team_a_xg'] + epsilon))
        shots_total = row['home_team_shots_on_target'] + row['home_team_shots_off_target'] + epsilon
        team_stats[home_team]['off_ratio5_list'].append(row['home_team_goal_count'] / shots_total)
        if row['home_team_possession'] > 0:
            team_stats[home_team]['off_ratio6_list'].append(
                row['home_team_goal_count'] / (row['home_team_possession'] + epsilon))
        team_stats[home_team]['total_goals_scored'] += row['home_team_goal_count']
        team_stats[home_team]['num_matches'] += 1

    # 客场比赛统计
    for idx, row in match_df.iterrows():
        away_team = row['away_team_name']
        if away_team not in team_stats:
            continue
        team_stats[away_team]['total_goals_scored'] += row['away_team_goal_count']
        team_stats[away_team]['num_matches'] += 1

    # 汇总数据
    data = []
    for team, stats in team_stats.items():
        num_matches = stats['num_matches']
        average_goals_scored = stats['total_goals_scored'] / num_matches if num_matches > 0 else 0
        data.append({
            'team_name': team,
            'off_ratio1': np.mean(stats['off_ratio1_list']) if stats['off_ratio1_list'] else 0,
            'off_ratio2': np.mean(stats['off_ratio2_list']) if stats['off_ratio2_list'] else 0,
            'off_ratio3': np.mean(stats['off_ratio3_list']) if stats['off_ratio3_list'] else 0,
            'off_ratio4': np.mean(stats['off_ratio4_list']) if stats['off_ratio4_list'] else 0,
            'off_ratio5': np.mean(stats['off_ratio5_list']) if stats['off_ratio5_list'] else 0,
            'off_ratio6': np.mean(stats['off_ratio6_list']) if stats['off_ratio6_list'] else 0,
            'total_goals_scored': stats['total_goals_scored'],
            'average_goals_scored': average_goals_scored
        })

    return pd.DataFrame(data)

# ====================== ELO评分算法实现 ======================
def initialize_elo_scores(team_positions_df):
    """初始化球队ELO分数"""
    teams = team_positions_df['team_name'].unique().tolist()
    team_elo = {team: 1500 for team in teams}
    for team in teams:
        team_data = team_positions_df[team_positions_df['team_name'] == team]
        rank = team_data['points_per_game'].rank().iloc[0]
        team_elo[team] += (20 * (len(teams) - rank))
    return team_elo

def update_elo_scores(elo_scores, home_team, away_team, home_score, away_score, K=30):
    """更新ELO分数"""
    home_elo, away_elo = elo_scores[home_team], elo_scores[away_team]
    expected_home = 1 / (1 + 10 ** ((away_elo - home_elo) / 400))
    expected_away = 1 / (1 + 10 ** ((home_elo - away_elo) / 400))

    if home_score > away_score:
        elo_scores[home_team] += K * (1 - expected_home)
        elo_scores[away_team] += K * (0 - expected_away)
    elif home_score < away_score:
        elo_scores[home_team] += K * (0 - expected_home)
        elo_scores[away_team] += K * (1 - expected_away)
    else:
        elo_scores[home_team] += K * (0.5 - expected_home)
        elo_scores[away_team] += K * (0.5 - expected_away)
    return elo_scores

# ====================== 数据加载函数（含交叉验证） ======================
def load_all_league_data(base_path, leagues, seasons):
    """加载所有联赛数据并进行预处理"""
    all_team_positions, all_match_positions = [], []
    for country_name, league_name in leagues:
        for season in seasons:
            print(f"加载数据: {country_name} - {league_name} - {season}")
            team_file = os.path.join(base_path, f"{country_name}-{league_name}-teams-{season}-stats.csv")
            match_file = os.path.join(base_path, f"{country_name}-{league_name}-matches-{season}-stats.csv")
            if not os.path.exists(team_file) or not os.path.exists(match_file):
                print(f"警告: {country_name} - {league_name} - {season} 文件缺失")
                continue

            team_df = pd.read_csv(team_file)
            match_df = pd.read_csv(match_file)

            # 统一球队名称格式
            team_df['team_name'] = team_df.get('common_name', team_df.get('team_name', None)).str.strip().str.lower()
            match_df['home_team_name'] = match_df['home_team_name'].str.strip().str.lower()
            match_df['away_team_name'] = match_df['away_team_name'].str.strip().str.lower()

            team_names = team_df['team_name'].unique()
            original_match_count = len(match_df)
            match_df = match_df[
                match_df['home_team_name'].isin(team_names) & match_df['away_team_name'].isin(team_names)]
            if len(match_df) < original_match_count:
                print(f"警告: 过滤了 {original_match_count - len(match_df)} 场比赛")

            if match_df.empty:
                print("警告: 无有效比赛数据")
                continue

            # 计算防守和进攻统计
            defensive_stats_df = compute_defensive_stats(match_df, team_df)
            offensive_stats_df = compute_offensive_stats(match_df, team_df)
            if defensive_stats_df.empty or offensive_stats_df.empty:
                print("警告: 未计算出防守或进攻统计")
                continue

            team_df = team_df.merge(defensive_stats_df, on='team_name', how='left')
            team_df = team_df.merge(offensive_stats_df, on='team_name', how='left')

            for col in ['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'total_goals_conceded', 'average_goals_conceded',
                        'off_ratio1', 'off_ratio2', 'off_ratio3', 'off_ratio4', 'off_ratio5', 'off_ratio6', 'total_goals_scored', 'average_goals_scored']:
                if col not in team_df.columns:
                    print(f"警告: {col} 列缺失，设为0")
                    team_df[col] = 0

            # 归一化防守得分
            if 'total_goals_conceded' in team_df.columns and team_df['total_goals_conceded'].max() != team_df['total_goals_conceded'].min():
                team_df['normalized_defense_score'] = (team_df['total_goals_conceded'].max() - team_df['total_goals_conceded']) / (
                    team_df['total_goals_conceded'].max() - team_df['total_goals_conceded'].min() + 1e-8)
            else:
                team_df['normalized_defense_score'] = 0

            # PCA降维（防守和进攻数据）
            defensive_columns = ['total_goals_conceded', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'average_goals_conceded']
            offensive_columns = ['total_goals_scored', 'off_ratio1', 'off_ratio2', 'off_ratio3', 'off_ratio4', 'off_ratio5', 'off_ratio6', 'average_goals_scored']
            team_df_defensive = team_df[defensive_columns].fillna(0)
            kf = KFold(n_splits=5, shuffle=True, random_state=42)
            pca_results = []
            for train_idx, _ in kf.split(team_df_defensive):
                train_data = team_df_defensive.iloc[train_idx]
                scaler = StandardScaler()
                X_scaled = scaler.fit_transform(train_data)
                pca = PCA(n_components=2)
                X_pca = pca.fit_transform(X_scaled)
                pca_results.append(X_pca)
            X_pca_avg = np.mean([np.pad(r, ((0, len(team_df_defensive) - len(r)), (0, 0)), 'constant') for r in pca_results], axis=0)

            team_positions = pd.DataFrame({
                'team_name': team_df['team_name'], 'PC1': X_pca_avg[:, 0], 'PC2': X_pca_avg[:, 1],
                'points_per_game': team_df['points_per_game'], 'league': league_name, 'season': season,
                'ratio1': team_df['ratio1'], 'ratio2': team_df['ratio2'], 'ratio3': team_df['ratio3'],
                'ratio4': team_df['ratio4'], 'ratio5': team_df['ratio5'], 'ratio6': team_df['ratio6'],
                'off_ratio1': team_df['off_ratio1'], 'off_ratio2': team_df['off_ratio2'], 'off_ratio3': team_df['off_ratio3'],
                'off_ratio4': team_df['off_ratio4'], 'off_ratio5': team_df['off_ratio5'], 'off_ratio6': team_df['off_ratio6'],
                'normalized_defense_score': team_df['normalized_defense_score'],
                'total_goals_conceded': team_df['total_goals_conceded'],
                'average_goals_conceded': team_df['average_goals_conceded'],
                'total_goals_scored': team_df['total_goals_scored'],
                'average_goals_scored': team_df['average_goals_scored']
            })
            team_positions['team_season'] = league_name + '_' + team_positions['team_name'] + '_' + team_positions['season']
            all_team_positions.append(team_positions)

            # 比赛数据PCA
            match_df = match_df.drop(columns=['timestamp', 'date_GMT', 'status', 'attendance', 'referee', 'stadium_name', 'Game Week'], errors='ignore')
            numeric_cols = match_df.select_dtypes(include=[np.number]).columns
            match_df[numeric_cols] = match_df[numeric_cols].fillna(match_df[numeric_cols].mean())
            pca_match_results = []
            for train_idx, _ in kf.split(match_df[numeric_cols]):
                train_data = match_df[numeric_cols].iloc[train_idx]
                scaler = StandardScaler()
                X_scaled = scaler.fit_transform(train_data)
                pca = PCA(n_components=2)
                X_pca = pca.fit_transform(X_scaled)
                pca_match_results.append(X_pca)
            X_pca_match_avg = np.mean([np.pad(r, ((0, len(match_df) - len(r)), (0, 0)), 'constant') for r in pca_match_results], axis=0)

            match_positions = pd.DataFrame({
                'home_team_name': match_df['home_team_name'], 'away_team_name': match_df['away_team_name'],
                'PC1': X_pca_match_avg[:, 0], 'PC2': X_pca_match_avg[:, 1], 'league': league_name, 'season': season
            })
            all_match_positions.append(match_positions)

    return pd.concat(all_team_positions, ignore_index=True), pd.concat(all_match_positions, ignore_index=True)

# ====================== 计算主客场差异比值 ======================
def compute_home_away_ratios(team_positions_df, match_positions_df, w_def, w_off):
    """计算每支球队在每个赛季面对每个对手的主客场差异比值"""
    epsilon = 1e-8
    ratios_dict = {}

    # 按赛季和联赛分组
    for (league, season), team_group in team_positions_df.groupby(['league', 'season']):
        match_group = match_positions_df[(match_positions_df['league'] == league) & (match_positions_df['season'] == season)]
        
        for team in team_group['team_name'].unique():
            team_data = team_group[team_group['team_name'] == team].iloc[0]
            
            # 计算主场进攻和防守参数
            home_offense = (team_data['off_ratio1'] * w_off[0] + team_data['off_ratio2'] * w_off[1] +
                            team_data['off_ratio3'] * w_off[2] + team_data['off_ratio4'] * w_off[3] +
                            team_data['off_ratio5'] * w_off[4] + team_data['off_ratio6'] * w_off[5] +
                            team_data['total_goals_scored'] * w_off[6] + team_data['average_goals_scored'] * w_off[7])
            home_defense = (team_data['ratio1'] * w_def[0] + team_data['ratio2'] * w_def[1] +
                            team_data['ratio3'] * w_def[2] + team_data['ratio4'] * w_def[3] +
                            team_data['ratio5'] * w_def[4] + team_data['ratio6'] * w_def[5] +
                            team_data['total_goals_conceded'] * w_def[6] + team_data['average_goals_conceded'] * w_def[7])
            
            # 计算主场综合能力参数
            home_comprehensive = 0.5 * home_offense + 0.5 * home_defense

            # 对每个对手计算客场表现
            for _, match in match_group.iterrows():
                if match['home_team_name'] == team:
                    opponent = match['away_team_name']
                elif match['away_team_name'] == team:
                    opponent = match['home_team_name']
                else:
                    continue
                
                # 假设客场表现基于主场表现的调整（此处可根据实际数据优化）
                # 示例：客场进攻减少20%，防守增加20%
                away_offense = home_offense * 0.8
                away_defense = home_defense * 1.2
                away_comprehensive = 0.5 * away_offense + 0.5 * away_defense
                
                # 计算主客场差异比值
                ratio = away_comprehensive / (home_comprehensive + epsilon)
                key = f"{league}_{team}_{season}_{opponent}"
                ratios_dict[key] = ratio

    return ratios_dict

# ====================== 损失函数 ======================
def compute_total_loss(positions, match_home_idx, match_away_idx, match_PC1, match_PC2, points_per_game, rank_scale,
                       def_ratios, w_def, off_ratios, w_off, normalized_defense_score, lambda_defense, lambda_offense,
                       lambda_supervision, lambda_reg, elo_scores, home_away_ratios, match_positions_df):
    """计算总损失，考虑主客场差异调整约束距离"""
    epsilon = 1e-8
    if tf.shape(match_home_idx)[0] == 0:
        return tf.constant(0.0, dtype=tf.float32)

    # 匹配损失
    home_pos = tf.gather(positions, match_home_idx)
    away_pos = tf.gather(positions, match_away_idx)
    match_points = tf.stack([match_PC1, match_PC2], axis=1)
    dist_home = tf.norm(home_pos - match_points + epsilon, axis=1)
    dist_away = tf.norm(away_pos - match_points + epsilon, axis=1)
    all_distances = tf.concat([dist_home, dist_away], axis=0)
    dist_range = tf.reduce_max(all_distances) - tf.reduce_min(all_distances) + epsilon
    dist_home_norm = (dist_home - tf.reduce_min(all_distances)) / dist_range
    dist_away_norm = (dist_away - tf.reduce_min(all_distances)) / dist_range

    home_elo = tf.gather(elo_scores, match_home_idx)
    away_elo = tf.gather(elo_scores, match_away_idx)
    elo_diff = tf.abs(home_elo - away_elo)
    weight = 1.0 / (1.0 + elo_diff * rank_scale + epsilon)

    # 计算主客场差异比值并调整约束距离
    adjusted_dist_home = []
    adjusted_dist_away = []

    for i in range(len(match_home_idx)):
        home_team = match_positions_df.iloc[i]['home_team_name']
        away_team = match_positions_df.iloc[i]['away_team_name']
        league = match_positions_df.iloc[i]['league']
        season = match_positions_df.iloc[i]['season']
        
        # 主场约束（正常）
        home_key = f"{league}_{home_team}_{season}_{away_team}"
        home_ratio = home_away_ratios.get(home_key, 1.0)  # 默认值1.0
        adjusted_dist_home.append(dist_home_norm[i])

        # 客场约束（根据ELO调整）
        away_key = f"{league}_{away_team}_{season}_{home_team}"
        away_ratio = home_away_ratios.get(away_key, 1.0)
        if home_elo[i] > away_elo[i]:
            adjusted_dist_away.append(dist_away_norm[i] * away_ratio * elo_diff[i])
        else:
            adjusted_dist_away.append(dist_away_norm[i] * away_ratio / (elo_diff[i] + epsilon))

    adjusted_dist_home = tf.stack(adjusted_dist_home)
    adjusted_dist_away = tf.stack(adjusted_dist_away)
    match_loss = tf.reduce_mean(weight * (adjusted_dist_home + adjusted_dist_away))

    # 防守损失
    w_def = tf.abs(w_def)
    defense_target = -tf.reduce_sum(w_def * def_ratios, axis=1)
    defense_loss = tf.reduce_mean(tf.square(positions[:, 1] - defense_target))

    # 进攻损失
    w_off = tf.abs(w_off)
    offense_target = tf.reduce_sum(w_off * off_ratios, axis=1)
    offense_loss = tf.reduce_mean(tf.square(positions[:, 0] - offense_target))

    # 监督损失
    supervision_loss = tf.reduce_mean(tf.square(defense_target - normalized_defense_score))

    # 正则化
    regularization_loss = lambda_reg * (tf.reduce_sum(tf.square(w_def)) + tf.reduce_sum(tf.square(w_off)))

    total_loss = (match_loss + lambda_defense * defense_loss + lambda_offense * offense_loss +
                  lambda_supervision * supervision_loss + regularization_loss)
    return tf.where(tf.math.is_nan(total_loss) | tf.math.is_inf(total_loss), 0.0, total_loss)

# ====================== Adam优化函数 ======================
def adam_optimize_positions(team_positions_df, match_positions_df, initial_lr=0.005, decay_steps=20000, decay_rate=0.9,
                            clipnorm=0.5, iterations=30000, verbose_interval=1000, random_seed=42, lambda_defense=0.1,
                            lambda_offense=0.1, lambda_supervision=0.1, lambda_reg=0.01, w_def=None, w_off=None):
    """使用Adam优化球队位置，考虑主客场差异"""
    team_seasons = team_positions_df['team_season'].unique()
    team_season_to_idx = {t: i for i, t in enumerate(team_seasons)}

    elo_scores_dict = initialize_elo_scores(team_positions_df)
    team_names = [t.split('_')[1] for t in team_seasons]
    elo_scores = tf.convert_to_tensor([elo_scores_dict[name] for name in team_names], dtype=tf.float32)

    team_positions_df = team_positions_df.set_index('team_season')
    init_positions = team_positions_df[['PC1', 'PC2']].values
    def_ratios = team_positions_df[['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'total_goals_conceded', 'average_goals_conceded']].values
    off_ratios = team_positions_df[['off_ratio1', 'off_ratio2', 'off_ratio3', 'off_ratio4', 'off_ratio5', 'off_ratio6', 'total_goals_scored', 'average_goals_scored']].values

    scaler_pos = StandardScaler()
    scaler_def = StandardScaler()
    scaler_off = StandardScaler()
    positions_scaled = scaler_pos.fit_transform(init_positions)
    def_ratios_scaled = scaler_def.fit_transform(def_ratios)
    off_ratios_scaled = scaler_off.fit_transform(off_ratios)

    positions = tf.Variable(positions_scaled, dtype=tf.float32)
    def_ratios = tf.constant(def_ratios_scaled, dtype=tf.float32)
    off_ratios = tf.constant(off_ratios_scaled, dtype=tf.float32)
    normalized_defense_score = tf.constant(team_positions_df['normalized_defense_score'].values, dtype=tf.float32)
    points_per_game = tf.constant(team_positions_df['points_per_game'].values, dtype=tf.float32)

    match_array = [
        [team_season_to_idx[f"{row['league']}_{row['home_team_name']}_{row['season']}"],
         team_season_to_idx[f"{row['league']}_{row['away_team_name']}_{row['season']}"], row['PC1'], row['PC2']]
        for _, row in match_positions_df.iterrows()
        if f"{row['league']}_{row['home_team_name']}_{row['season']}" in team_season_to_idx and
           f"{row['league']}_{row['away_team_name']}_{row['season']}" in team_season_to_idx
    ]
    match_array = np.array(match_array, dtype=np.float32)
    if len(match_array) == 0:
        print("警告: 无有效比赛数据")
        return [], team_positions_df.reset_index(), None, None, None

    match_home_idx = tf.constant(match_array[:, 0], dtype=tf.int32)
    match_away_idx = tf.constant(match_array[:, 1], dtype=tf.int32)
    match_PC1 = tf.constant(match_array[:, 2], dtype=tf.float32)
    match_PC2 = tf.constant(match_array[:, 3], dtype=tf.float32)

    # 计算主客场差异比值
    home_away_ratios = compute_home_away_ratios(team_positions_df.reset_index(), match_positions_df, w_def, w_off)

    tf.random.set_seed(random_seed)
    rank_scale = tf.Variable(1.0, dtype=tf.float32)
    w_def = tf.Variable(np.abs(w_def) if w_def is not None else np.ones(8, dtype=np.float32) / 8, dtype=tf.float32)
    w_off = tf.Variable(np.abs(w_off) if w_off is not None else np.ones(8, dtype=np.float32) / 8, dtype=tf.float32)
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_lr, decay_steps, decay_rate)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=clipnorm)

    losses, best_loss = [], float('inf')
    best_positions, best_rank_scale, best_w_def, best_w_off = positions.numpy().copy(), 1.0, w_def.numpy().copy(), w_off.numpy().copy()

    for i in range(iterations):
        with tf.GradientTape() as tape:
            loss = compute_total_loss(positions, match_home_idx, match_away_idx, match_PC1, match_PC2, points_per_game,
                                      rank_scale, def_ratios, w_def, off_ratios, w_off, normalized_defense_score,
                                      lambda_defense, lambda_offense, lambda_supervision, lambda_reg, elo_scores,
                                      home_away_ratios, match_positions_df)
        grads = tape.gradient(loss, [positions, rank_scale, w_def, w_off])
        if any(g is None for g in grads):
            print(f"警告: 迭代 {i + 1} 梯度为None")
            continue
        optimizer.apply_gradients(zip(grads, [positions, rank_scale, w_def, w_off]))
        loss_val = float(loss.numpy())
        losses.append(loss_val)

        if loss_val < best_loss:
            best_loss = loss_val
            best_positions, best_rank_scale, best_w_def, best_w_off = (positions.numpy().copy(), float(rank_scale.numpy()),
                                                                       w_def.numpy().copy(), w_off.numpy().copy())

        if (i + 1) % verbose_interval == 0:
            print(f"迭代 {i + 1}/{iterations}, 损失 = {loss_val:.4f}, rank_scale = {rank_scale.numpy():.4f}, 最佳损失 = {best_loss:.4f}")

    positions.assign(best_positions)
    rank_scale.assign(best_rank_scale)
    w_def.assign(best_w_def)
    w_off.assign(best_w_off)
    final_pos = scaler_pos.inverse_transform(positions.numpy())
    for idx, team_season in enumerate(team_seasons):
        team_positions_df.loc[team_season, 'PC1'] = final_pos[idx, 0]
        team_positions_df.loc[team_season, 'PC2'] = final_pos[idx, 1]
    team_positions_df = team_positions_df.reset_index()
    return losses, team_positions_df, best_rank_scale, best_w_def, best_w_off

# ====================== 随机搜索超参数调整 ======================
def random_search_hyperparameters(team_positions, match_positions, w_def, w_off, n_iter=10, random_state=42):
    """随机搜索最佳超参数"""
    np.random.seed(random_state)
    best_loss, best_params = float('inf'), None

    for _ in range(n_iter):
        params = {
            'lambda_defense': np.random.uniform(0.01, 0.2),
            'lambda_offense': np.random.uniform(0.01, 0.2),
            'lambda_supervision': np.random.uniform(0.01, 0.2),
            'lambda_reg': np.random.uniform(0.001, 0.02)
        }
        print(f"\n随机搜索: {params}")

        losses, _, _, _, _ = adam_optimize_positions(
            team_positions.copy(), match_positions.copy(), w_def=w_def, w_off=w_off, **params
        )
        if losses and losses[-1] < best_loss:
            best_loss = losses[-1]
            best_params = params

    print(f"\n最佳超参数: {best_params}, 最佳损失: {best_loss:.4f}")
    return best_params

# ====================== 可视化函数 ======================
def visualize_team_evolution_by_league_static(team_positions_df, seasons_order):
    """按联赛可视化球队演变"""
    output_dir = "/Users/peixuanma/Downloads/Output_Graphs"
    os.makedirs(output_dir, exist_ok=True)

    for league in team_positions_df['league'].unique():
        league_df = team_positions_df[team_positions_df['league'] == league].copy()
        valid_teams = league_df.groupby("team_name").filter(lambda x: x['season'].nunique() == len(seasons_order))['team_name'].unique()
        if not valid_teams.size:
            valid_teams = league_df['team_name'].unique()
        valid_df = league_df[league_df['team_name'].isin(valid_teams)].sort_values(['team_name', 'season'])

        plt.figure(figsize=(10, 8))
        plt.title(f"{league} - 球队演变 (进攻-防守)")
        plt.xlabel("PC1 (进攻)")
        plt.ylabel("PC2 (防守)")
        plt.grid(True)

        for team in valid_df['team_name'].unique():
            sub = valid_df[valid_df['team_name'] == team]
            plt.plot(sub['PC1'], sub['PC2'], marker='o', label=team)
            for _, row in sub.iterrows():
                plt.text(row['PC1'], row['PC2'], row['season'], fontsize=8, ha='right')

        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        filename = os.path.join(output_dir, f"{league}_evolution.png")
        plt.savefig(filename, dpi=150)
        plt.close()
        print(f"保存 {league} 图 -> {filename}")

# ====================== 主函数 ======================
if __name__ == "__main__":
    base_path = '/Users/peixuanma/Downloads/data1'
    leagues = [
        ("england", "premier-league"), ("germany", "bundesliga"), ("spain", "la-liga"),
        ("france", "ligue-1"), ("france", "ligue-2"), ("italy", "serie-a"), ("netherlands", "eredivisie"),
        ("portugal", "ligapro"), ("denmark", "superliga"), ("england", "championship"), ("spain", "segunda-division"),
        ("switzerland", "super-league"), ("portugal", "liga-nos"), ("italy", "serie-b"), ("germany", "2-bundesliga"),
        ("scotland", "premiership"), ("belgium", "pro-league"), ("austria", "bundesliga"),
    ]
    seasons = [
        "2013-to-2014", "2014-to-2015", "2015-to-2016", "2016-to-2017", "2017-to-2018",
        "2018-to-2019", "2019-to-2020", "2020-to-2021", "2021-to-2022", "2022-to-2023", "2023-to-2024"
    ]

    # 手动输入防守和进攻权重（示例值）
    w_def = np.array([
        0.12714323,  # ratio1
        0.11415731,  # ratio2
        0.04532705,  # ratio3
        0.23075414,  # ratio4
        0.03240472,  # ratio5
        0.14205644,  # ratio6
        0.13323908,  # total_goals_conceded
        0.17491804   # average_goals_conceded
    ], dtype=np.float32)

    w_off = np.array([
        0.07438711,  # off_ratio1
        0.07787083,  # off_ratio2
        0.08495551,  # off_ratio3
        0.22353454,  # off_ratio4
        0.08152269,  # off_ratio5
        0.08466186,  # off_ratio6
        0.16022788,  # total_goals_scored
        0.21283953   # average_goals_scored
    ], dtype=np.float32)

    # 加载数据
    print("===== 加载数据 =====")
    all_team_positions, all_match_positions = load_all_league_data(base_path, leagues, seasons)

    # 随机搜索超参数
    print("\n===== 随机搜索超参数 =====")
    best_params = random_search_hyperparameters(all_team_positions, all_match_positions, w_def, w_off)

    # 最终训练
    print("\n===== 最终训练 =====")
    final_losses, final_team_positions, final_rank_scale, final_w_def, final_w_off = adam_optimize_positions(
        all_team_positions.copy(), all_match_positions.copy(), w_def=w_def, w_off=w_off, **best_params
    )
    if final_losses:
        final_team_positions.to_csv("trained_team_positions.csv", index=False)
        print(f"最终损失: {final_losses[-1]:.4f}")
        print(f"最终防守权重: {final_w_def}")
        print(f"最终进攻权重: {final_w_off}")

    # 可视化
    print("\n===== 可视化 =====")
    visualize_team_evolution_by_league_static(final_team_positions, seasons)

    print("\n完成！")

scikit-learn 版本: 1.4.2
===== 加载数据 =====
加载数据: england - premier-league - 2013-to-2014
加载数据: england - premier-league - 2014-to-2015
加载数据: england - premier-league - 2015-to-2016
加载数据: england - premier-league - 2016-to-2017
加载数据: england - premier-league - 2017-to-2018
加载数据: england - premier-league - 2018-to-2019
加载数据: england - premier-league - 2019-to-2020
加载数据: england - premier-league - 2020-to-2021
加载数据: england - premier-league - 2021-to-2022
加载数据: england - premier-league - 2022-to-2023
加载数据: england - premier-league - 2023-to-2024
加载数据: germany - bundesliga - 2013-to-2014
加载数据: germany - bundesliga - 2014-to-2015
加载数据: germany - bundesliga - 2015-to-2016
加载数据: germany - bundesliga - 2016-to-2017
加载数据: germany - bundesliga - 2017-to-2018
加载数据: germany - bundesliga - 2018-to-2019
加载数据: germany - bundesliga - 2019-to-2020
加载数据: germany - bundesliga - 2020-to-2021
加载数据: germany - bundesliga - 2021-to-2022
加载数据: germany - bundesliga - 2022-to-2023
加载数据: germany - bundesliga - 2023-

KeyboardInterrupt: 

In [7]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.model_selection import KFold
import sklearn

print(f"scikit-learn 版本: {sklearn.__version__}")

# ====================== 计算防守统计数据 ======================
def compute_defensive_stats(match_df, team_positions_df):
    """计算球队的防守统计数据，包括6种比率、总失球数和平均失球数"""
    epsilon = 1e-8  # 提高数值稳定性
    team_stats = {}
    for team in team_positions_df['team_name'].unique():
        team_stats[team] = {
            'total_goals_conceded': 0, 'ratio1_list': [], 'ratio2_list': [], 'ratio3_list': [],
            'ratio4_list': [], 'ratio5_list': [], 'ratio6_list': [], 'num_matches': 0
        }

    # 主场比赛统计
    for idx, row in match_df.iterrows():
        home_team, away_team = row['home_team_name'], row['away_team_name']
        if home_team not in team_stats or away_team not in team_stats:
            print(f"警告: 主队 {home_team} 或客队 {away_team} 未找到，跳过比赛 {idx}")
            continue
        if row['Pre-Match PPG (Away)'] > 0:
            team_stats[home_team]['ratio1_list'].append(
                row['away_team_goal_count'] / (row['Pre-Match PPG (Away)'] + epsilon))
        if row['away_team_corner_count'] > 0:
            team_stats[home_team]['ratio2_list'].append(
                row['away_team_goal_count'] / (row['away_team_corner_count'] + epsilon))
        denominator = row['home_team_yellow_cards'] + row['home_team_red_cards'] + row['home_team_fouls'] + epsilon
        team_stats[home_team]['ratio3_list'].append(row['away_team_goal_count'] / denominator)
        if row['team_b_xg'] > 0:
            team_stats[home_team]['ratio4_list'].append(row['away_team_goal_count'] / (row['team_b_xg'] + epsilon))
        shots_total = row['away_team_shots_on_target'] + row['away_team_shots_off_target'] + epsilon
        team_stats[home_team]['ratio5_list'].append(row['away_team_goal_count'] / shots_total)
        if row['away_team_possession'] > 0:
            team_stats[home_team]['ratio6_list'].append(
                row['away_team_goal_count'] / (row['away_team_possession'] + epsilon))
        team_stats[home_team]['total_goals_conceded'] += row['away_team_goal_count']
        team_stats[home_team]['num_matches'] += 1

    # 客场比赛统计
    for idx, row in match_df.iterrows():
        away_team = row['away_team_name']
        if away_team not in team_stats:
            print(f"警告: 客队 {away_team} 未找到，跳过比赛 {idx}")
            continue
        team_stats[away_team]['total_goals_conceded'] += row['home_team_goal_count']
        team_stats[away_team]['num_matches'] += 1

    # 汇总数据
    data = []
    for team, stats in team_stats.items():
        num_matches = stats['num_matches']
        average_goals_conceded = stats['total_goals_conceded'] / num_matches if num_matches > 0 else 0
        data.append({
            'team_name': team,
            'ratio1': np.mean(stats['ratio1_list']) if stats['ratio1_list'] else 0,
            'ratio2': np.mean(stats['ratio2_list']) if stats['ratio2_list'] else 0,
            'ratio3': np.mean(stats['ratio3_list']) if stats['ratio3_list'] else 0,
            'ratio4': np.mean(stats['ratio4_list']) if stats['ratio4_list'] else 0,
            'ratio5': np.mean(stats['ratio5_list']) if stats['ratio5_list'] else 0,
            'ratio6': np.mean(stats['ratio6_list']) if stats['ratio6_list'] else 0,
            'total_goals_conceded': stats['total_goals_conceded'],
            'average_goals_conceded': average_goals_conceded
        })

    return pd.DataFrame(data)

# ====================== 计算进攻统计数据 ======================
def compute_offensive_stats(match_df, team_positions_df):
    """计算球队的进攻统计数据，包括6种比率、总进球数和平均进球数"""
    epsilon = 1e-8  # 提高数值稳定性
    team_stats = {}
    for team in team_positions_df['team_name'].unique():
        team_stats[team] = {
            'total_goals_scored': 0, 'off_ratio1_list': [], 'off_ratio2_list': [], 'off_ratio3_list': [],
            'off_ratio4_list': [], 'off_ratio5_list': [], 'off_ratio6_list': [], 'num_matches': 0
        }

    # 主场比赛统计
    for idx, row in match_df.iterrows():
        home_team, away_team = row['home_team_name'], row['away_team_name']
        if home_team not in team_stats or away_team not in team_stats:
            continue
        if row['Pre-Match PPG (Home)'] > 0:
            team_stats[home_team]['off_ratio1_list'].append(
                row['home_team_goal_count'] / (row['Pre-Match PPG (Home)'] + epsilon))
        if row['home_team_corner_count'] > 0:
            team_stats[home_team]['off_ratio2_list'].append(
                row['home_team_goal_count'] / (row['home_team_corner_count'] + epsilon))
        denominator = row['away_team_yellow_cards'] + row['away_team_red_cards'] + row['away_team_fouls'] + epsilon
        team_stats[home_team]['off_ratio3_list'].append(row['home_team_goal_count'] / denominator)
        if row['team_a_xg'] > 0:
            team_stats[home_team]['off_ratio4_list'].append(row['home_team_goal_count'] / (row['team_a_xg'] + epsilon))
        shots_total = row['home_team_shots_on_target'] + row['home_team_shots_off_target'] + epsilon
        team_stats[home_team]['off_ratio5_list'].append(row['home_team_goal_count'] / shots_total)
        if row['home_team_possession'] > 0:
            team_stats[home_team]['off_ratio6_list'].append(
                row['home_team_goal_count'] / (row['home_team_possession'] + epsilon))
        team_stats[home_team]['total_goals_scored'] += row['home_team_goal_count']
        team_stats[home_team]['num_matches'] += 1

    # 客场比赛统计
    for idx, row in match_df.iterrows():
        away_team = row['away_team_name']
        if away_team not in team_stats:
            continue
        team_stats[away_team]['total_goals_scored'] += row['away_team_goal_count']
        team_stats[away_team]['num_matches'] += 1

    # 汇总数据
    data = []
    for team, stats in team_stats.items():
        num_matches = stats['num_matches']
        average_goals_scored = stats['total_goals_scored'] / num_matches if num_matches > 0 else 0
        data.append({
            'team_name': team,
            'off_ratio1': np.mean(stats['off_ratio1_list']) if stats['off_ratio1_list'] else 0,
            'off_ratio2': np.mean(stats['off_ratio2_list']) if stats['off_ratio2_list'] else 0,
            'off_ratio3': np.mean(stats['off_ratio3_list']) if stats['off_ratio3_list'] else 0,
            'off_ratio4': np.mean(stats['off_ratio4_list']) if stats['off_ratio4_list'] else 0,
            'off_ratio5': np.mean(stats['off_ratio5_list']) if stats['off_ratio5_list'] else 0,
            'off_ratio6': np.mean(stats['off_ratio6_list']) if stats['off_ratio6_list'] else 0,
            'total_goals_scored': stats['total_goals_scored'],
            'average_goals_scored': average_goals_scored
        })

    return pd.DataFrame(data)

# ====================== ELO评分算法实现 ======================
def initialize_elo_scores(team_positions_df):
    """初始化球队ELO分数"""
    teams = team_positions_df['team_name'].unique().tolist()
    team_elo = {team: 1500 for team in teams}
    for team in teams:
        team_data = team_positions_df[team_positions_df['team_name'] == team]
        rank = team_data['points_per_game'].rank().iloc[0]
        team_elo[team] += (20 * (len(teams) - rank))
    return team_elo

def update_elo_scores(elo_scores, home_team, away_team, home_score, away_score, K=30):
    """更新ELO分数"""
    home_elo, away_elo = elo_scores[home_team], elo_scores[away_team]
    expected_home = 1 / (1 + 10 ** ((away_elo - home_elo) / 400))
    expected_away = 1 / (1 + 10 ** ((home_elo - away_elo) / 400))

    if home_score > away_score:
        elo_scores[home_team] += K * (1 - expected_home)
        elo_scores[away_team] += K * (0 - expected_away)
    elif home_score < away_score:
        elo_scores[home_team] += K * (0 - expected_home)
        elo_scores[away_team] += K * (1 - expected_away)
    else:
        elo_scores[home_team] += K * (0.5 - expected_home)
        elo_scores[away_team] += K * (0.5 - expected_away)
    return elo_scores

# ====================== 数据加载函数（含聚类） ======================
def load_all_league_data(base_path, leagues, seasons):
    """加载所有联赛数据并进行预处理，使用聚类质心作为比赛锚点"""
    all_team_positions, all_match_positions = [], []
    global_cluster_centroid_positions = {}

    for country_name, league_name in leagues:
        for season in seasons:
            print(f"加载数据: {country_name} - {league_name} - {season}")
            team_file = os.path.join(base_path, f"{country_name}-{league_name}-teams-{season}-stats.csv")
            match_file = os.path.join(base_path, f"{country_name}-{league_name}-matches-{season}-stats.csv")
            if not os.path.exists(team_file) or not os.path.exists(match_file):
                print(f"警告: {country_name} - {league_name} - {season} 文件缺失")
                continue

            team_df = pd.read_csv(team_file)
            match_df = pd.read_csv(match_file)

            # 统一球队名称格式
            team_df['team_name'] = team_df.get('common_name', team_df.get('team_name', None)).str.strip().str.lower()
            match_df['home_team_name'] = match_df['home_team_name'].str.strip().str.lower()
            match_df['away_team_name'] = match_df['away_team_name'].str.strip().str.lower()

            team_names = team_df['team_name'].unique()
            original_match_count = len(match_df)
            match_df = match_df[
                match_df['home_team_name'].isin(team_names) & match_df['away_team_name'].isin(team_names)]
            if len(match_df) < original_match_count:
                print(f"警告: 过滤了 {original_match_count - len(match_df)} 场比赛")

            if match_df.empty:
                print("警告: 无有效比赛数据")
                continue

            # 计算防守和进攻统计
            defensive_stats_df = compute_defensive_stats(match_df, team_df)
            offensive_stats_df = compute_offensive_stats(match_df, team_df)
            if defensive_stats_df.empty or offensive_stats_df.empty:
                print("警告: 未计算出防守或进攻统计")
                continue

            team_df = team_df.merge(defensive_stats_df, on='team_name', how='left')
            team_df = team_df.merge(offensive_stats_df, on='team_name', how='left')

            for col in ['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'total_goals_conceded', 'average_goals_conceded',
                        'off_ratio1', 'off_ratio2', 'off_ratio3', 'off_ratio4', 'off_ratio5', 'off_ratio6', 'total_goals_scored', 'average_goals_scored']:
                if col not in team_df.columns:
                    print(f"警告: {col} 列缺失，设为0")
                    team_df[col] = 0

            # 归一化防守得分（修复拼写错误）
            if 'total_goals_conceded' in team_df.columns and team_df['total_goals_conceded'].max() != team_df['total_goals_conceded'].min():
                team_df['normalized_defense_score'] = (team_df['total_goals_conceded'].max() - team_df['total_goals_conceded']) / (
                    team_df['total_goals_conceded'].max() - team_df['total_goals_conceded'].min() + 1e-8)
            else:
                team_df['normalized_defense_score'] = 0  # 修复变量名

            # PCA降维（防守和进攻数据）
            defensive_columns = ['total_goals_conceded', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'average_goals_conceded']
            offensive_columns = ['total_goals_scored', 'off_ratio1', 'off_ratio2', 'off_ratio3', 'off_ratio4', 'off_ratio5', 'off_ratio6', 'average_goals_scored']
            team_df_defensive = team_df[defensive_columns].fillna(0)
            kf = KFold(n_splits=5, shuffle=True, random_state=42)
            pca_results = []
            for train_idx, _ in kf.split(team_df_defensive):
                train_data = team_df_defensive.iloc[train_idx]
                scaler = StandardScaler()
                X_scaled = scaler.fit_transform(train_data)
                pca = PCA(n_components=2)
                X_pca = pca.fit_transform(X_scaled)
                pca_results.append(X_pca)
            X_pca_avg = np.mean([np.pad(r, ((0, len(team_df_defensive) - len(r)), (0, 0)), 'constant') for r in pca_results], axis=0)

            team_positions = pd.DataFrame({
                'team_name': team_df['team_name'], 'PC1': X_pca_avg[:, 0], 'PC2': X_pca_avg[:, 1],
                'points_per_game': team_df['points_per_game'], 'league': league_name, 'season': season,
                'ratio1': team_df['ratio1'], 'ratio2': team_df['ratio2'], 'ratio3': team_df['ratio3'],
                'ratio4': team_df['ratio4'], 'ratio5': team_df['ratio5'], 'ratio6': team_df['ratio6'],
                'off_ratio1': team_df['off_ratio1'], 'off_ratio2': team_df['off_ratio2'], 'off_ratio3': team_df['off_ratio3'],
                'off_ratio4': team_df['off_ratio4'], 'off_ratio5': team_df['off_ratio5'], 'off_ratio6': team_df['off_ratio6'],
                'normalized_defense_score': team_df['normalized_defense_score'],
                'total_goals_conceded': team_df['total_goals_conceded'],
                'average_goals_conceded': team_df['average_goals_conceded'],
                'total_goals_scored': team_df['total_goals_scored'],
                'average_goals_scored': team_df['average_goals_scored']
            })
            team_positions['team_season'] = league_name + '_' + team_positions['team_name'] + '_' + team_positions['season']
            all_team_positions.append(team_positions)

            # 比赛数据聚类
            match_df = match_df.drop(columns=['timestamp', 'date_GMT', 'status', 'attendance', 'referee', 'stadium_name', 'Game Week'], errors='ignore')
            numeric_cols = match_df.select_dtypes(include=[np.number]).columns
            match_df[numeric_cols] = match_df[numeric_cols].fillna(match_df[numeric_cols].mean())
            scaler = StandardScaler()
            X_scaled = scaler.fit_transform(match_df[numeric_cols])

            # 选择最佳簇数 k
            silhouette_scores = []
            max_k = min(10, len(X_scaled) // 2)
            for k in range(2, max_k + 1):
                kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
                cluster_labels = kmeans.fit_predict(X_scaled)
                silhouette_avg = silhouette_score(X_scaled, cluster_labels)
                silhouette_scores.append(silhouette_avg)
            if silhouette_scores:
                best_k = np.argmax(silhouette_scores) + 2
            else:
                best_k = 2

            # 聚类
            kmeans = KMeans(n_clusters=best_k, random_state=42, n_init=10)
            cluster_labels = kmeans.fit_predict(X_scaled)
            match_df['cluster_id'] = cluster_labels

            # 计算簇质心并降维
            centroids = kmeans.cluster_centers_
            if len(centroids) > 0:
                pca_centroids = PCA(n_components=2).fit_transform(centroids)
                cluster_centroid_positions = {i: pca_centroids[i] for i in range(best_k)}
                global_cluster_centroid_positions[f"{league_name}_{season}"] = cluster_centroid_positions

            # 存储比赛数据
            match_positions = pd.DataFrame({
                'home_team_name': match_df['home_team_name'], 'away_team_name': match_df['away_team_name'],
                'cluster_id': match_df['cluster_id'], 'league': league_name, 'season': season
            })
            all_match_positions.append(match_positions)

    return pd.concat(all_team_positions, ignore_index=True), pd.concat(all_match_positions, ignore_index=True), global_cluster_centroid_positions

# ====================== 损失函数 ======================
def compute_total_loss(positions, match_home_idx, match_away_idx, match_cluster_ids, cluster_centroid_positions, points_per_game, rank_scale,
                       def_ratios, w_def, off_ratios, w_off, normalized_defense_score, lambda_defense, lambda_offense,
                       lambda_supervision, lambda_reg, elo_scores):
    """计算总损失，包括匹配损失、防守损失、进攻损失和监督损失"""
    epsilon = 1e-8
    if tf.shape(match_home_idx)[0] == 0:
        return tf.constant(0.0, dtype=tf.float32)

    # 匹配损失
    home_pos = tf.gather(positions, match_home_idx)
    away_pos = tf.gather(positions, match_away_idx)
    match_target_pos = tf.gather(cluster_centroid_positions, match_cluster_ids)
    dist_home = tf.norm(home_pos - match_target_pos + epsilon, axis=1)
    dist_away = tf.norm(away_pos - match_target_pos + epsilon, axis=1)
    all_distances = tf.concat([dist_home, dist_away], axis=0)
    dist_range = tf.reduce_max(all_distances) - tf.reduce_min(all_distances) + epsilon
    dist_home_norm = (dist_home - tf.reduce_min(all_distances)) / dist_range
    dist_away_norm = (dist_away - tf.reduce_min(all_distances)) / dist_range

    home_elo = tf.gather(elo_scores, match_home_idx)
    away_elo = tf.gather(elo_scores, match_away_idx)
    elo_diff = tf.abs(home_elo - away_elo)
    weight = 1.0 / (1.0 + elo_diff * rank_scale + epsilon)
    match_loss = tf.reduce_mean(weight * (dist_home_norm + dist_away_norm))

    # 防守损失
    w_def = tf.abs(w_def)
    defense_target = -tf.reduce_sum(w_def * def_ratios, axis=1)
    defense_loss = tf.reduce_mean(tf.square(positions[:, 1] - defense_target))

    # 进攻损失
    w_off = tf.abs(w_off)
    offense_target = tf.reduce_sum(w_off * off_ratios, axis=1)
    offense_loss = tf.reduce_mean(tf.square(positions[:, 0] - offense_target))

    # 监督损失
    supervision_loss = tf.reduce_mean(tf.square(defense_target - normalized_defense_score))

    # 正则化
    regularization_loss = lambda_reg * (tf.reduce_sum(tf.square(w_def)) + tf.reduce_sum(tf.square(w_off)))

    total_loss = (match_loss + lambda_defense * defense_loss + lambda_offense * offense_loss +
                  lambda_supervision * supervision_loss + regularization_loss)
    return tf.where(tf.math.is_nan(total_loss) | tf.math.is_inf(total_loss), 0.0, total_loss)

# ====================== Adam优化函数 ======================
def adam_optimize_positions(team_positions_df, match_positions_df, cluster_centroid_positions, initial_lr=0.001, decay_steps=20000, decay_rate=0.9,
                            clipnorm=0.5, iterations=100000, verbose_interval=1000, random_seed=42, lambda_defense=0.1,
                            lambda_offense=0.1, lambda_supervision=0.1, lambda_reg=0.01, w_def=None, w_off=None):
    """使用Adam优化球队位置"""
    team_seasons = team_positions_df['team_season'].unique()
    team_season_to_idx = {t: i for i, t in enumerate(team_seasons)}

    elo_scores_dict = initialize_elo_scores(team_positions_df)
    team_names = [t.split('_')[1] for t in team_seasons]
    elo_scores = tf.convert_to_tensor([elo_scores_dict[name] for name in team_names], dtype=tf.float32)

    team_positions_df = team_positions_df.set_index('team_season')
    init_positions = team_positions_df[['PC1', 'PC2']].values
    def_ratios = team_positions_df[['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'total_goals_conceded', 'average_goals_conceded']].values
    off_ratios = team_positions_df[['off_ratio1', 'off_ratio2', 'off_ratio3', 'off_ratio4', 'off_ratio5', 'off_ratio6', 'total_goals_scored', 'average_goals_scored']].values

    scaler_pos = StandardScaler()
    scaler_def = StandardScaler()
    scaler_off = StandardScaler()
    positions_scaled = scaler_pos.fit_transform(init_positions)
    def_ratios_scaled = scaler_def.fit_transform(def_ratios)
    off_ratios_scaled = scaler_off.fit_transform(off_ratios)

    positions = tf.Variable(positions_scaled, dtype=tf.float32)
    def_ratios = tf.constant(def_ratios_scaled, dtype=tf.float32)
    off_ratios = tf.constant(off_ratios_scaled, dtype=tf.float32)
    normalized_defense_score = tf.constant(team_positions_df['normalized_defense_score'].values, dtype=tf.float32)
    points_per_game = tf.constant(team_positions_df['points_per_game'].values, dtype=tf.float32)

    match_array = []
    for _, row in match_positions_df.iterrows():
        home_key = f"{row['league']}_{row['home_team_name']}_{row['season']}"
        away_key = f"{row['league']}_{row['away_team_name']}_{row['season']}"
        if home_key in team_season_to_idx and away_key in team_season_to_idx:
            match_array.append([team_season_to_idx[home_key], team_season_to_idx[away_key], row['cluster_id']])
    match_array = np.array(match_array, dtype=np.float32)
    if len(match_array) == 0:
        print("警告: 无有效比赛数据")
        return [], team_positions_df.reset_index(), None, None, None

    match_home_idx = tf.constant(match_array[:, 0], dtype=tf.int32)
    match_away_idx = tf.constant(match_array[:, 1], dtype=tf.int32)
    match_cluster_ids = tf.constant(match_array[:, 2], dtype=tf.int32)

    # 将所有联赛和赛季的簇质心合并为一个张量
    all_centroids = []
    cluster_id_map = {}
    current_id = 0
    for league_season, centroids_dict in cluster_centroid_positions.items():
        for cid, pos in centroids_dict.items():
            cluster_id_map[(league_season, cid)] = current_id
            all_centroids.append(pos)
            current_id += 1
    cluster_centroid_positions_tf = tf.constant(np.array(all_centroids), dtype=tf.float32)

    # 调整 match_cluster_ids 以匹配全局 cluster_id
    adjusted_cluster_ids = []
    for _, row in match_positions_df.iterrows():
        league_season = f"{row['league']}_{row['season']}"
        if league_season in cluster_centroid_positions and row['cluster_id'] in cluster_centroid_positions[league_season]:
            adjusted_cluster_ids.append(cluster_id_map[(league_season, row['cluster_id'])])
    match_cluster_ids = tf.constant(adjusted_cluster_ids, dtype=tf.int32)

    tf.random.set_seed(random_seed)
    rank_scale = tf.Variable(1.0, dtype=tf.float32)
    w_def = tf.Variable(np.abs(w_def) if w_def is not None else np.ones(8, dtype=np.float32) / 8, dtype=tf.float32)
    w_off = tf.Variable(np.abs(w_off) if w_off is not None else np.ones(8, dtype=np.float32) / 8, dtype=tf.float32)
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_lr, decay_steps, decay_rate)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=clipnorm)

    losses, best_loss = [], float('inf')
    best_positions, best_rank_scale, best_w_def, best_w_off = positions.numpy().copy(), 1.0, w_def.numpy().copy(), w_off.numpy().copy()

    for i in range(iterations):
        with tf.GradientTape() as tape:
            loss = compute_total_loss(positions, match_home_idx, match_away_idx, match_cluster_ids, cluster_centroid_positions_tf, points_per_game,
                                      rank_scale, def_ratios, w_def, off_ratios, w_off, normalized_defense_score,
                                      lambda_defense, lambda_offense, lambda_supervision, lambda_reg, elo_scores)
        grads = tape.gradient(loss, [positions, rank_scale, w_def, w_off])
        if any(g is None for g in grads):
            print(f"警告: 迭代 {i + 1} 梯度为None")
            continue
        optimizer.apply_gradients(zip(grads, [positions, rank_scale, w_def, w_off]))
        loss_val = float(loss.numpy())
        losses.append(loss_val)

        if loss_val < best_loss:
            best_loss = loss_val
            best_positions, best_rank_scale, best_w_def, best_w_off = (positions.numpy().copy(), float(rank_scale.numpy()),
                                                                       w_def.numpy().copy(), w_off.numpy().copy())

        if (i + 1) % verbose_interval == 0:
            print(f"迭代 {i + 1}/{iterations}, 损失 = {loss_val:.4f}, rank_scale = {rank_scale.numpy():.4f}, 最佳损失 = {best_loss:.4f}")

    positions.assign(best_positions)
    rank_scale.assign(best_rank_scale)
    w_def.assign(best_w_def)
    w_off.assign(best_w_off)
    final_pos = scaler_pos.inverse_transform(positions.numpy())
    for idx, team_season in enumerate(team_seasons):
        team_positions_df.loc[team_season, 'PC1'] = final_pos[idx, 0]
        team_positions_df.loc[team_season, 'PC2'] = final_pos[idx, 1]
    team_positions_df = team_positions_df.reset_index()
    return losses, team_positions_df, best_rank_scale, best_w_def, best_w_off

# ====================== 随机搜索超参数函数 ======================
def random_search_hyperparameters(team_positions, match_positions, cluster_centroid_positions, w_def, w_off, n_iter=10, random_state=42):
    """随机搜索最佳超参数"""
    np.random.seed(random_state)
    best_loss, best_params = float('inf'), None

    for _ in range(n_iter):
        params = {
            'lambda_defense': np.random.uniform(0.01, 0.2),
            'lambda_offense': np.random.uniform(0.01, 0.2),
            'lambda_supervision': np.random.uniform(0.01, 0.2),
            'lambda_reg': np.random.uniform(0.001, 0.02)
        }
        print(f"\n随机搜索: {params}")

        losses, _, _, _, _ = adam_optimize_positions(
            team_positions.copy(), match_positions.copy(), cluster_centroid_positions, w_def=w_def, w_off=w_off, **params
        )
        if losses and losses[-1] < best_loss:
            best_loss = losses[-1]
            best_params = params

    print(f"\n最佳超参数: {best_params}, 最佳损失: {best_loss:.4f}")
    return best_params

# ====================== 可视化函数 ======================
def visualize_team_evolution_by_league_static(team_positions_df, seasons_order):
    """按联赛可视化球队演变"""
    output_dir = "/Users/peixuanma/Downloads/Output_Graphs"
    os.makedirs(output_dir, exist_ok=True)

    for league in team_positions_df['league'].unique():
        league_df = team_positions_df[team_positions_df['league'] == league].copy()
        valid_teams = league_df.groupby("team_name").filter(lambda x: x['season'].nunique() == len(seasons_order))['team_name'].unique()
        if not valid_teams.size:
            valid_teams = league_df['team_name'].unique()
        valid_df = league_df[league_df['team_name'].isin(valid_teams)].sort_values(['team_name', 'season'])

        plt.figure(figsize=(10, 8))
        plt.title(f"{league} - 球队演变 (进攻-防守)")
        plt.xlabel("PC1 (进攻)")
        plt.ylabel("PC2 (防守)")
        plt.grid(True)

        for team in valid_df['team_name'].unique():
            sub = valid_df[valid_df['team_name'] == team]
            if 'PC1' in sub.columns and 'PC2' in sub.columns:
                plt.plot(sub['PC1'], sub['PC2'], marker='o', label=team)
                for _, row in sub.iterrows():
                    plt.text(row['PC1'], row['PC2'], row['season'], fontsize=8, ha='right')

        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        filename = os.path.join(output_dir, f"{league}_evolution.png")
        plt.savefig(filename, dpi=150)
        plt.close()
        print(f"保存 {league} 图 -> {filename}")

# ====================== 主函数 ======================
if __name__ == "__main__":
    base_path = '/Users/peixuanma/Downloads/data1'
    leagues = [
        ("england", "premier-league"), ("germany", "bundesliga"), ("spain", "la-liga"),
        ("france", "ligue-1"), ("france", "ligue-2"), ("italy", "serie-a"), ("netherlands", "eredivisie"),
        ("portugal", "ligapro"), ("denmark", "superliga"), ("england", "championship"), ("spain", "segunda-division"),
        ("switzerland", "super-league"), ("portugal", "liga-nos"), ("italy", "serie-b"), ("germany", "2-bundesliga"),
        ("scotland", "premiership"), ("belgium", "pro-league"), ("austria", "bundesliga"),
    ]
    seasons = [
        "2013-to-2014", "2014-to-2015", "2015-to-2016", "2016-to-2017", "2017-to-2018",
        "2018-to-2019", "2019-to-2020", "2020-to-2021", "2021-to-2022", "2022-to-2023", "2023-to-2024"
    ]

    # 手动输入防守和进攻权重（示例值）
    w_def = np.array([
        0.12714323, 0.11415731, 0.04532705, 0.23075414, 0.03240472, 0.14205644, 0.13323908, 0.17491804
    ], dtype=np.float32)

    w_off = np.array([
        0.07438711, 0.07787083, 0.08495551, 0.22353454, 0.08152269, 0.08466186, 0.16022788, 0.21283953
    ], dtype=np.float32)

    # 加载数据
    print("===== 加载数据 =====")
    all_team_positions, all_match_positions, cluster_centroid_positions = load_all_league_data(base_path, leagues, seasons)

    # 随机搜索超参数
    print("\n===== 随机搜索超参数 =====")
    best_params = random_search_hyperparameters(all_team_positions, all_match_positions, cluster_centroid_positions, w_def, w_off)

    # 最终训练
    print("\n===== 最终训练 =====")
    final_losses, final_team_positions, final_rank_scale, final_w_def, final_w_off = adam_optimize_positions(
        all_team_positions.copy(), all_match_positions.copy(), cluster_centroid_positions, w_def=w_def, w_off=w_off, **best_params
    )
    if final_losses:
        final_team_positions.to_csv("trained_team_positions.csv", index=False)
        print(f"最终损失: {final_losses[-1]:.4f}")
        print(f"最终防守权重: {final_w_def}")
        print(f"最终进攻权重: {final_w_off}")

    # 可视化
    print("\n===== 可视化 =====")
    visualize_team_evolution_by_league_static(final_team_positions, seasons)

    print("\n完成！")

scikit-learn 版本: 1.4.2
===== 加载数据 =====
加载数据: england - premier-league - 2013-to-2014
加载数据: england - premier-league - 2014-to-2015
加载数据: england - premier-league - 2015-to-2016
加载数据: england - premier-league - 2016-to-2017
加载数据: england - premier-league - 2017-to-2018
加载数据: england - premier-league - 2018-to-2019
加载数据: england - premier-league - 2019-to-2020
加载数据: england - premier-league - 2020-to-2021
加载数据: england - premier-league - 2021-to-2022
加载数据: england - premier-league - 2022-to-2023
加载数据: england - premier-league - 2023-to-2024
加载数据: germany - bundesliga - 2013-to-2014
加载数据: germany - bundesliga - 2014-to-2015
加载数据: germany - bundesliga - 2015-to-2016
加载数据: germany - bundesliga - 2016-to-2017
加载数据: germany - bundesliga - 2017-to-2018
加载数据: germany - bundesliga - 2018-to-2019
加载数据: germany - bundesliga - 2019-to-2020
加载数据: germany - bundesliga - 2020-to-2021
加载数据: germany - bundesliga - 2021-to-2022
加载数据: germany - bundesliga - 2022-to-2023
加载数据: germany - bundesliga - 2023-

  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 premier-league 图 -> /Users/peixuanma/Downloads/Output_Graphs/premier-league_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 bundesliga 图 -> /Users/peixuanma/Downloads/Output_Graphs/bundesliga_evolution.png
保存 la-liga 图 -> /Users/peixuanma/Downloads/Output_Graphs/la-liga_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 ligue-1 图 -> /Users/peixuanma/Downloads/Output_Graphs/ligue-1_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 ligue-2 图 -> /Users/peixuanma/Downloads/Output_Graphs/ligue-2_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 serie-a 图 -> /Users/peixuanma/Downloads/Output_Graphs/serie-a_evolution.png
保存 eredivisie 图 -> /Users/peixuanma/Downloads/Output_Graphs/eredivisie_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 ligapro 图 -> /Users/peixuanma/Downloads/Output_Graphs/ligapro_evolution.png
保存 superliga 图 -> /Users/peixuanma/Downloads/Output_Graphs/superliga_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 championship 图 -> /Users/peixuanma/Downloads/Output_Graphs/championship_evolution.png
保存 segunda-division 图 -> /Users/peixuanma/Downloads/Output_Graphs/segunda-division_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tigh

保存 super-league 图 -> /Users/peixuanma/Downloads/Output_Graphs/super-league_evolution.png
保存 liga-nos 图 -> /Users/peixuanma/Downloads/Output_Graphs/liga-nos_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 serie-b 图 -> /Users/peixuanma/Downloads/Output_Graphs/serie-b_evolution.png
保存 2-bundesliga 图 -> /Users/peixuanma/Downloads/Output_Graphs/2-bundesliga_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 premiership 图 -> /Users/peixuanma/Downloads/Output_Graphs/premiership_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 pro-league 图 -> /Users/peixuanma/Downloads/Output_Graphs/pro-league_evolution.png

完成！


In [10]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.model_selection import KFold
import sklearn

print(f"scikit-learn 版本: {sklearn.__version__}")

# ====================== 计算防守统计数据 ======================
def compute_defensive_stats(match_df, team_positions_df):
    """计算球队的防守统计数据，包括6种比率、总失球数和平均失球数"""
    epsilon = 1e-8  # 提高数值稳定性
    team_stats = {}
    for team in team_positions_df['team_name'].unique():
        team_stats[team] = {
            'total_goals_conceded': 0, 'ratio1_list': [], 'ratio2_list': [], 'ratio3_list': [],
            'ratio4_list': [], 'ratio5_list': [], 'ratio6_list': [], 'num_matches': 0
        }

    # 主场比赛统计
    for idx, row in match_df.iterrows():
        home_team, away_team = row['home_team_name'], row['away_team_name']
        if home_team not in team_stats or away_team not in team_stats:
            print(f"警告: 主队 {home_team} 或客队 {away_team} 未找到，跳过比赛 {idx}")
            continue
        if row['Pre-Match PPG (Away)'] > 0:
            team_stats[home_team]['ratio1_list'].append(
                row['away_team_goal_count'] / (row['Pre-Match PPG (Away)'] + epsilon))
        if row['away_team_corner_count'] > 0:
            team_stats[home_team]['ratio2_list'].append(
                row['away_team_goal_count'] / (row['away_team_corner_count'] + epsilon))
        denominator = row['home_team_yellow_cards'] + row['home_team_red_cards'] + row['home_team_fouls'] + epsilon
        team_stats[home_team]['ratio3_list'].append(row['away_team_goal_count'] / denominator)
        if row['team_b_xg'] > 0:
            team_stats[home_team]['ratio4_list'].append(row['away_team_goal_count'] / (row['team_b_xg'] + epsilon))
        shots_total = row['away_team_shots_on_target'] + row['away_team_shots_off_target'] + epsilon
        team_stats[home_team]['ratio5_list'].append(row['away_team_goal_count'] / shots_total)
        if row['away_team_possession'] > 0:
            team_stats[home_team]['ratio6_list'].append(
                row['away_team_goal_count'] / (row['away_team_possession'] + epsilon))
        team_stats[home_team]['total_goals_conceded'] += row['away_team_goal_count']
        team_stats[home_team]['num_matches'] += 1

    # 客场比赛统计
    for idx, row in match_df.iterrows():
        away_team = row['away_team_name']
        if away_team not in team_stats:
            print(f"警告: 客队 {away_team} 未找到，跳过比赛 {idx}")
            continue
        team_stats[away_team]['total_goals_conceded'] += row['home_team_goal_count']
        team_stats[away_team]['num_matches'] += 1

    # 汇总数据
    data = []
    for team, stats in team_stats.items():
        num_matches = stats['num_matches']
        average_goals_conceded = stats['total_goals_conceded'] / num_matches if num_matches > 0 else 0
        data.append({
            'team_name': team,
            'ratio1': np.mean(stats['ratio1_list']) if stats['ratio1_list'] else 0,
            'ratio2': np.mean(stats['ratio2_list']) if stats['ratio2_list'] else 0,
            'ratio3': np.mean(stats['ratio3_list']) if stats['ratio3_list'] else 0,
            'ratio4': np.mean(stats['ratio4_list']) if stats['ratio4_list'] else 0,
            'ratio5': np.mean(stats['ratio5_list']) if stats['ratio5_list'] else 0,
            'ratio6': np.mean(stats['ratio6_list']) if stats['ratio6_list'] else 0,
            'total_goals_conceded': stats['total_goals_conceded'],
            'average_goals_conceded': average_goals_conceded
        })

    return pd.DataFrame(data)

# ====================== 计算进攻统计数据 ======================
def compute_offensive_stats(match_df, team_positions_df):
    """计算球队的进攻统计数据，包括6种比率、总进球数和平均进球数"""
    epsilon = 1e-8  # 提高数值稳定性
    team_stats = {}
    for team in team_positions_df['team_name'].unique():
        team_stats[team] = {
            'total_goals_scored': 0, 'off_ratio1_list': [], 'off_ratio2_list': [], 'off_ratio3_list': [],
            'off_ratio4_list': [], 'off_ratio5_list': [], 'off_ratio6_list': [], 'num_matches': 0
        }

    # 主场比赛统计
    for idx, row in match_df.iterrows():
        home_team, away_team = row['home_team_name'], row['away_team_name']
        if home_team not in team_stats or away_team not in team_stats:
            continue
        if row['Pre-Match PPG (Home)'] > 0:
            team_stats[home_team]['off_ratio1_list'].append(
                row['home_team_goal_count'] / (row['Pre-Match PPG (Home)'] + epsilon))
        if row['home_team_corner_count'] > 0:
            team_stats[home_team]['off_ratio2_list'].append(
                row['home_team_goal_count'] / (row['home_team_corner_count'] + epsilon))
        denominator = row['away_team_yellow_cards'] + row['away_team_red_cards'] + row['away_team_fouls'] + epsilon
        team_stats[home_team]['off_ratio3_list'].append(row['home_team_goal_count'] / denominator)
        if row['team_a_xg'] > 0:
            team_stats[home_team]['off_ratio4_list'].append(row['home_team_goal_count'] / (row['team_a_xg'] + epsilon))
        shots_total = row['home_team_shots_on_target'] + row['home_team_shots_off_target'] + epsilon
        team_stats[home_team]['off_ratio5_list'].append(row['home_team_goal_count'] / shots_total)
        if row['home_team_possession'] > 0:
            team_stats[home_team]['off_ratio6_list'].append(
                row['home_team_goal_count'] / (row['home_team_possession'] + epsilon))
        team_stats[home_team]['total_goals_scored'] += row['home_team_goal_count']
        team_stats[home_team]['num_matches'] += 1

    # 客场比赛统计
    for idx, row in match_df.iterrows():
        away_team = row['away_team_name']
        if away_team not in team_stats:
            continue
        team_stats[away_team]['total_goals_scored'] += row['away_team_goal_count']
        team_stats[away_team]['num_matches'] += 1

    # 汇总数据
    data = []
    for team, stats in team_stats.items():
        num_matches = stats['num_matches']
        average_goals_scored = stats['total_goals_scored'] / num_matches if num_matches > 0 else 0
        data.append({
            'team_name': team,
            'off_ratio1': np.mean(stats['off_ratio1_list']) if stats['off_ratio1_list'] else 0,
            'off_ratio2': np.mean(stats['off_ratio2_list']) if stats['off_ratio2_list'] else 0,
            'off_ratio3': np.mean(stats['off_ratio3_list']) if stats['off_ratio3_list'] else 0,
            'off_ratio4': np.mean(stats['off_ratio4_list']) if stats['off_ratio4_list'] else 0,
            'off_ratio5': np.mean(stats['off_ratio5_list']) if stats['off_ratio5_list'] else 0,
            'off_ratio6': np.mean(stats['off_ratio6_list']) if stats['off_ratio6_list'] else 0,
            'total_goals_scored': stats['total_goals_scored'],
            'average_goals_scored': average_goals_scored
        })

    return pd.DataFrame(data)

# ====================== ELO评分算法实现 ======================
def initialize_elo_scores(team_positions_df):
    """初始化球队ELO分数"""
    teams = team_positions_df['team_name'].unique().tolist()
    team_elo = {team: 1500 for team in teams}
    for team in teams:
        team_data = team_positions_df[team_positions_df['team_name'] == team]
        rank = team_data['points_per_game'].rank().iloc[0]
        team_elo[team] += (20 * (len(teams) - rank))
    return team_elo

def update_elo_scores(elo_scores, home_team, away_team, home_score, away_score, K=30):
    """更新ELO分数"""
    home_elo, away_elo = elo_scores[home_team], elo_scores[away_team]
    expected_home = 1 / (1 + 10 ** ((away_elo - home_elo) / 400))
    expected_away = 1 / (1 + 10 ** ((home_elo - away_elo) / 400))

    if home_score > away_score:
        elo_scores[home_team] += K * (1 - expected_home)
        elo_scores[away_team] += K * (0 - expected_away)
    elif home_score < away_score:
        elo_scores[home_team] += K * (0 - expected_home)
        elo_scores[away_team] += K * (1 - expected_away)
    else:
        elo_scores[home_team] += K * (0.5 - expected_home)
        elo_scores[away_team] += K * (0.5 - expected_away)
    return elo_scores

# ====================== 数据加载函数（含聚类） ======================
def load_all_league_data(base_path, leagues, seasons):
    """加载所有联赛数据并进行预处理，使用聚类质心作为比赛锚点"""
    all_team_positions, all_match_positions = [], []
    global_cluster_centroid_positions = {}

    for country_name, league_name in leagues:
        for season in seasons:
            print(f"加载数据: {country_name} - {league_name} - {season}")
            team_file = os.path.join(base_path, f"{country_name}-{league_name}-teams-{season}-stats.csv")
            match_file = os.path.join(base_path, f"{country_name}-{league_name}-matches-{season}-stats.csv")
            if not os.path.exists(team_file) or not os.path.exists(match_file):
                print(f"警告: {country_name} - {league_name} - {season} 文件缺失")
                continue

            team_df = pd.read_csv(team_file)
            match_df = pd.read_csv(match_file)

            # 统一球队名称格式
            team_df['team_name'] = team_df.get('common_name', team_df.get('team_name', None)).str.strip().str.lower()
            match_df['home_team_name'] = match_df['home_team_name'].str.strip().str.lower()
            match_df['away_team_name'] = match_df['away_team_name'].str.strip().str.lower()

            team_names = team_df['team_name'].unique()
            original_match_count = len(match_df)
            match_df = match_df[
                match_df['home_team_name'].isin(team_names) & match_df['away_team_name'].isin(team_names)]
            if len(match_df) < original_match_count:
                print(f"警告: 过滤了 {original_match_count - len(match_df)} 场比赛")

            if match_df.empty:
                print("警告: 无有效比赛数据")
                continue

            # 计算防守和进攻统计
            defensive_stats_df = compute_defensive_stats(match_df, team_df)
            offensive_stats_df = compute_offensive_stats(match_df, team_df)
            if defensive_stats_df.empty or offensive_stats_df.empty:
                print("警告: 未计算出防守或进攻统计")
                continue

            team_df = team_df.merge(defensive_stats_df, on='team_name', how='left')
            team_df = team_df.merge(offensive_stats_df, on='team_name', how='left')

            for col in ['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'total_goals_conceded', 'average_goals_conceded',
                        'off_ratio1', 'off_ratio2', 'off_ratio3', 'off_ratio4', 'off_ratio5', 'off_ratio6', 'total_goals_scored', 'average_goals_scored']:
                if col not in team_df.columns:
                    print(f"警告: {col} 列缺失，设为0")
                    team_df[col] = 0

            # 归一化防守得分
            if 'total_goals_conceded' in team_df.columns and team_df['total_goals_conceded'].max() != team_df['total_goals_conceded'].min():
                team_df['normalized_defense_score'] = (team_df['total_goals_conceded'].max() - team_df['total_goals_conceded']) / (
                    team_df['total_goals_conceded'].max() - team_df['total_goals_conceded'].min() + 1e-8)
            else:
                team_df['normalized_defense_score'] = 0

            # PCA降维（防守和进攻数据）
            defensive_columns = ['total_goals_conceded', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'average_goals_conceded']
            offensive_columns = ['total_goals_scored', 'off_ratio1', 'off_ratio2', 'off_ratio3', 'off_ratio4', 'off_ratio5', 'off_ratio6', 'average_goals_scored']
            team_df_defensive = team_df[defensive_columns].fillna(0)
            kf = KFold(n_splits=5, shuffle=True, random_state=42)
            pca_results = []
            for train_idx, _ in kf.split(team_df_defensive):
                train_data = team_df_defensive.iloc[train_idx]
                scaler = StandardScaler()
                X_scaled = scaler.fit_transform(train_data)
                pca = PCA(n_components=2)
                X_pca = pca.fit_transform(X_scaled)
                pca_results.append(X_pca)
            X_pca_avg = np.mean([np.pad(r, ((0, len(team_df_defensive) - len(r)), (0, 0)), 'constant') for r in pca_results], axis=0)

            team_positions = pd.DataFrame({
                'team_name': team_df['team_name'], 'PC1': X_pca_avg[:, 0], 'PC2': X_pca_avg[:, 1],
                'points_per_game': team_df['points_per_game'], 'league': league_name, 'season': season,
                'ratio1': team_df['ratio1'], 'ratio2': team_df['ratio2'], 'ratio3': team_df['ratio3'],
                'ratio4': team_df['ratio4'], 'ratio5': team_df['ratio5'], 'ratio6': team_df['ratio6'],
                'off_ratio1': team_df['off_ratio1'], 'off_ratio2': team_df['off_ratio2'], 'off_ratio3': team_df['off_ratio3'],
                'off_ratio4': team_df['off_ratio4'], 'off_ratio5': team_df['off_ratio5'], 'off_ratio6': team_df['off_ratio6'],
                'normalized_defense_score': team_df['normalized_defense_score'],
                'total_goals_conceded': team_df['total_goals_conceded'],
                'average_goals_conceded': team_df['average_goals_conceded'],
                'total_goals_scored': team_df['total_goals_scored'],
                'average_goals_scored': team_df['average_goals_scored']
            })
            team_positions['team_season'] = league_name + '_' + team_positions['team_name'] + '_' + team_positions['season']
            all_team_positions.append(team_positions)

            # 比赛数据聚类
            match_df = match_df.drop(columns=['timestamp', 'date_GMT', 'status', 'attendance', 'referee', 'stadium_name', 'Game Week'], errors='ignore')
            numeric_cols = match_df.select_dtypes(include=[np.number]).columns
            match_df[numeric_cols] = match_df[numeric_cols].fillna(match_df[numeric_cols].mean())
            scaler = StandardScaler()
            X_scaled = scaler.fit_transform(match_df[numeric_cols])

            # 选择最佳簇数 k
            silhouette_scores = []
            max_k = min(10, len(X_scaled) // 2)
            for k in range(2, max_k + 1):
                kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
                cluster_labels = kmeans.fit_predict(X_scaled)
                silhouette_avg = silhouette_score(X_scaled, cluster_labels)
                silhouette_scores.append(silhouette_avg)
            if silhouette_scores:
                best_k = np.argmax(silhouette_scores) + 2
            else:
                best_k = 2

            # 聚类
            kmeans = KMeans(n_clusters=best_k, random_state=42, n_init=10)
            cluster_labels = kmeans.fit_predict(X_scaled)
            match_df['cluster_id'] = cluster_labels

            # 计算簇质心并降维
            centroids = kmeans.cluster_centers_
            if len(centroids) > 0:
                pca_centroids = PCA(n_components=2).fit_transform(centroids)
                cluster_centroid_positions = {i: pca_centroids[i] for i in range(best_k)}
                global_cluster_centroid_positions[f"{league_name}_{season}"] = cluster_centroid_positions

            # 存储比赛数据
            match_positions = pd.DataFrame({
                'home_team_name': match_df['home_team_name'], 'away_team_name': match_df['away_team_name'],
                'cluster_id': match_df['cluster_id'], 'league': league_name, 'season': season
            })
            all_match_positions.append(match_positions)

    return pd.concat(all_team_positions, ignore_index=True), pd.concat(all_match_positions, ignore_index=True), global_cluster_centroid_positions

# ====================== 损失函数 ======================
def compute_total_loss(positions, match_home_idx, match_away_idx, match_cluster_ids, cluster_centroid_positions, points_per_game, rank_scale,
                       def_ratios, w_def, off_ratios, w_off, normalized_defense_score, lambda_defense, lambda_offense,
                       lambda_supervision, lambda_reg, elo_scores, home_advantage, match_results):
    """计算总损失，包括匹配损失、防守损失、进攻损失、监督损失和主场优势损失"""
    epsilon = 1e-8
    if tf.shape(match_home_idx)[0] == 0:
        return tf.constant(0.0, dtype=tf.float32)

    # 匹配损失
    home_pos = tf.gather(positions, match_home_idx)
    away_pos = tf.gather(positions, match_away_idx)
    match_target_pos = tf.gather(cluster_centroid_positions, match_cluster_ids)
    dist_home = tf.norm(home_pos - match_target_pos + epsilon, axis=1)
    dist_away = tf.norm(away_pos - match_target_pos + epsilon, axis=1)
    all_distances = tf.concat([dist_home, dist_away], axis=0)
    dist_range = tf.reduce_max(all_distances) - tf.reduce_min(all_distances) + epsilon
    dist_home_norm = (dist_home - tf.reduce_min(all_distances)) / dist_range
    dist_away_norm = (dist_away - tf.reduce_min(all_distances)) / dist_range

    # 在ELO分数计算中加入主场优势
    home_elo = tf.gather(elo_scores, match_home_idx) + home_advantage
    away_elo = tf.gather(elo_scores, match_away_idx)
    elo_diff = tf.abs(home_elo - away_elo)
    weight = 1.0 / (1.0 + elo_diff * rank_scale + epsilon)
    match_loss = tf.reduce_mean(weight * (dist_home_norm + dist_away_norm))

    # 防守损失
    w_def = tf.abs(w_def)
    defense_target = -tf.reduce_sum(w_def * def_ratios, axis=1)
    defense_loss = tf.reduce_mean(tf.square(positions[:, 1] - defense_target))

    # 进攻损失
    w_off = tf.abs(w_off)
    offense_target = tf.reduce_sum(w_off * off_ratios, axis=1)
    offense_loss = tf.reduce_mean(tf.square(positions[:, 0] - offense_target))

    # 监督损失
    supervision_loss = tf.reduce_mean(tf.square(defense_target - normalized_defense_score))

    # 正则化
    regularization_loss = lambda_reg * (tf.reduce_sum(tf.square(w_def)) + tf.reduce_sum(tf.square(w_off)))

    # 主场优势损失：基于比赛结果优化home_advantage
    expected_home = 1 / (1 + 10 ** ((away_elo - home_elo) / 400))
    actual_result = tf.cast(match_results, tf.float32)  # 1: 主胜, 0: 平局或客胜
    home_advantage_loss = tf.reduce_mean(tf.square(expected_home - actual_result))

    total_loss = (match_loss + lambda_defense * defense_loss + lambda_offense * offense_loss +
                  lambda_supervision * supervision_loss + regularization_loss + home_advantage_loss)
    return tf.where(tf.math.is_nan(total_loss) | tf.math.is_inf(total_loss), 0.0, total_loss)

# ====================== Adam优化函数 ======================
def adam_optimize_positions(team_positions_df, match_positions_df, cluster_centroid_positions, initial_lr=0.001, decay_steps=20000, decay_rate=0.9,
                            clipnorm=0.5, iterations=100000, verbose_interval=1000, random_seed=42, lambda_defense=0.1,
                            lambda_offense=0.1, lambda_supervision=0.1, lambda_reg=0.01, w_def=None, w_off=None):
    """使用Adam优化球队位置，新增主场优势参数"""
    team_seasons = team_positions_df['team_season'].unique()
    team_season_to_idx = {t: i for i, t in enumerate(team_seasons)}

    elo_scores_dict = initialize_elo_scores(team_positions_df)
    team_names = [t.split('_')[1] for t in team_seasons]
    elo_scores = tf.convert_to_tensor([elo_scores_dict[name] for name in team_names], dtype=tf.float32)

    team_positions_df = team_positions_df.set_index('team_season')
    init_positions = team_positions_df[['PC1', 'PC2']].values
    def_ratios = team_positions_df[['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'total_goals_conceded', 'average_goals_conceded']].values
    off_ratios = team_positions_df[['off_ratio1', 'off_ratio2', 'off_ratio3', 'off_ratio4', 'off_ratio5', 'off_ratio6', 'total_goals_scored', 'average_goals_scored']].values

    scaler_pos = StandardScaler()
    scaler_def = StandardScaler()
    scaler_off = StandardScaler()
    positions_scaled = scaler_pos.fit_transform(init_positions)
    def_ratios_scaled = scaler_def.fit_transform(def_ratios)
    off_ratios_scaled = scaler_off.fit_transform(off_ratios)

    positions = tf.Variable(positions_scaled, dtype=tf.float32)
    def_ratios = tf.constant(def_ratios_scaled, dtype=tf.float32)
    off_ratios = tf.constant(off_ratios_scaled, dtype=tf.float32)
    normalized_defense_score = tf.constant(team_positions_df['normalized_defense_score'].values, dtype=tf.float32)
    points_per_game = tf.constant(team_positions_df['points_per_game'].values, dtype=tf.float32)

    # 初始化可学习的主场优势参数
    home_advantage = tf.Variable(50.0, dtype=tf.float32, trainable=True)  # 初始值设为50，可调整

    match_array = []
    match_results = []  # 记录比赛结果用于优化主场优势
    for _, row in match_positions_df.iterrows():
        home_key = f"{row['league']}_{row['home_team_name']}_{row['season']}"
        away_key = f"{row['league']}_{row['away_team_name']}_{row['season']}"
        if home_key in team_season_to_idx and away_key in team_season_to_idx:
            match_array.append([team_season_to_idx[home_key], team_season_to_idx[away_key], row['cluster_id']])
            # 假设match_positions_df有比赛结果列，这里简化为主胜为1，否则为0
            # 需要根据实际数据调整
            result = 1 if row.get('home_team_goal_count', 0) > row.get('away_team_goal_count', 0) else 0
            match_results.append(result)
    match_array = np.array(match_array, dtype=np.float32)
    match_results = np.array(match_results, dtype=np.int32)
    if len(match_array) == 0:
        print("警告: 无有效比赛数据")
        return [], team_positions_df.reset_index(), None, None, None, None

    match_home_idx = tf.constant(match_array[:, 0], dtype=tf.int32)
    match_away_idx = tf.constant(match_array[:, 1], dtype=tf.int32)
    match_cluster_ids = tf.constant(match_array[:, 2], dtype=tf.int32)
    match_results = tf.constant(match_results, dtype=tf.int32)

    # 将所有联赛和赛季的簇质心合并为一个张量
    all_centroids = []
    cluster_id_map = {}
    current_id = 0
    for league_season, centroids_dict in cluster_centroid_positions.items():
        for cid, pos in centroids_dict.items():
            cluster_id_map[(league_season, cid)] = current_id
            all_centroids.append(pos)
            current_id += 1
    cluster_centroid_positions_tf = tf.constant(np.array(all_centroids), dtype=tf.float32)

    # 调整 match_cluster_ids 以匹配全局 cluster_id
    adjusted_cluster_ids = []
    for _, row in match_positions_df.iterrows():
        league_season = f"{row['league']}_{row['season']}"
        if league_season in cluster_centroid_positions and row['cluster_id'] in cluster_centroid_positions[league_season]:
            adjusted_cluster_ids.append(cluster_id_map[(league_season, row['cluster_id'])])
    match_cluster_ids = tf.constant(adjusted_cluster_ids, dtype=tf.int32)

    tf.random.set_seed(random_seed)
    rank_scale = tf.Variable(1.0, dtype=tf.float32)
    w_def = tf.Variable(np.abs(w_def) if w_def is not None else np.ones(8, dtype=np.float32) / 8, dtype=tf.float32)
    w_off = tf.Variable(np.abs(w_off) if w_off is not None else np.ones(8, dtype=np.float32) / 8, dtype=tf.float32)
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_lr, decay_steps, decay_rate)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=clipnorm)

    losses, best_loss = [], float('inf')
    best_positions = positions.numpy().copy()
    best_rank_scale, best_w_def, best_w_off, best_home_advantage = 1.0, w_def.numpy().copy(), w_off.numpy().copy(), home_advantage.numpy()

    for i in range(iterations):
        with tf.GradientTape() as tape:
            loss = compute_total_loss(positions, match_home_idx, match_away_idx, match_cluster_ids, cluster_centroid_positions_tf, points_per_game,
                                      rank_scale, def_ratios, w_def, off_ratios, w_off, normalized_defense_score,
                                      lambda_defense, lambda_offense, lambda_supervision, lambda_reg, elo_scores,
                                      home_advantage, match_results)
        grads = tape.gradient(loss, [positions, rank_scale, w_def, w_off, home_advantage])
        if any(g is None for g in grads):
            print(f"警告: 迭代 {i + 1} 梯度为None")
            continue
        optimizer.apply_gradients(zip(grads, [positions, rank_scale, w_def, w_off, home_advantage]))
        loss_val = float(loss.numpy())
        losses.append(loss_val)

        if loss_val < best_loss:
            best_loss = loss_val
            best_positions = positions.numpy().copy()
            best_rank_scale = float(rank_scale.numpy())
            best_w_def = w_def.numpy().copy()
            best_w_off = w_off.numpy().copy()
            best_home_advantage = float(home_advantage.numpy())

        if (i + 1) % verbose_interval == 0:
            print(f"迭代 {i + 1}/{iterations}, 损失 = {loss_val:.4f}, rank_scale = {rank_scale.numpy():.4f}, "
                  f"主场优势 = {home_advantage.numpy():.4f}, 最佳损失 = {best_loss:.4f}")

    positions.assign(best_positions)
    rank_scale.assign(best_rank_scale)
    w_def.assign(best_w_def)
    w_off.assign(best_w_off)
    home_advantage.assign(best_home_advantage)
    final_pos = scaler_pos.inverse_transform(positions.numpy())
    for idx, team_season in enumerate(team_seasons):
        team_positions_df.loc[team_season, 'PC1'] = final_pos[idx, 0]
        team_positions_df.loc[team_season, 'PC2'] = final_pos[idx, 1]
    team_positions_df = team_positions_df.reset_index()
    return losses, team_positions_df, best_rank_scale, best_w_def, best_w_off, best_home_advantage

# ====================== 随机搜索超参数函数 ======================
def random_search_hyperparameters(team_positions, match_positions, cluster_centroid_positions, w_def, w_off, n_iter=10, random_state=42):
    """随机搜索最佳超参数"""
    np.random.seed(random_state)
    best_loss, best_params = float('inf'), None

    for _ in range(n_iter):
        params = {
            'lambda_defense': np.random.uniform(0.01, 0.2),
            'lambda_offense': np.random.uniform(0.01, 0.2),
            'lambda_supervision': np.random.uniform(0.01, 0.2),
            'lambda_reg': np.random.uniform(0.001, 0.02)
        }
        print(f"\n随机搜索: {params}")

        losses, _, _, _, _, _ = adam_optimize_positions(
            team_positions.copy(), match_positions.copy(), cluster_centroid_positions, w_def=w_def, w_off=w_off, **params
        )
        if losses and losses[-1] < best_loss:
            best_loss = losses[-1]
            best_params = params

    print(f"\n最佳超参数: {best_params}, 最佳损失: {best_loss:.4f}")
    return best_params

# ====================== 可视化函数 ======================
def visualize_team_evolution_by_league_static(team_positions_df, seasons_order):
    """按联赛可视化球队演变"""
    output_dir = "/Users/peixuanma/Downloads/Output_Graphs"
    os.makedirs(output_dir, exist_ok=True)

    for league in team_positions_df['league'].unique():
        league_df = team_positions_df[team_positions_df['league'] == league].copy()
        valid_teams = league_df.groupby("team_name").filter(lambda x: x['season'].nunique() == len(seasons_order))['team_name'].unique()
        if not valid_teams.size:
            valid_teams = league_df['team_name'].unique()
        valid_df = league_df[league_df['team_name'].isin(valid_teams)].sort_values(['team_name', 'season'])

        plt.figure(figsize=(10, 8))
        plt.title(f"{league} - 球队演变 (进攻-防守)")
        plt.xlabel("PC1 (进攻)")
        plt.ylabel("PC2 (防守)")
        plt.grid(True)

        for team in valid_df['team_name'].unique():
            sub = valid_df[valid_df['team_name'] == team]
            if 'PC1' in sub.columns and 'PC2' in sub.columns:
                plt.plot(sub['PC1'], sub['PC2'], marker='o', label=team)
                for _, row in sub.iterrows():
                    plt.text(row['PC1'], row['PC2'], row['season'], fontsize=8, ha='right')

        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        filename = os.path.join(output_dir, f"{league}_evolution.png")
        plt.savefig(filename, dpi=150)
        plt.close()
        print(f"保存 {league} 图 -> {filename}")

# ====================== 主函数 ======================
if __name__ == "__main__":
    base_path = '/Users/peixuanma/Downloads/data1'
    leagues = [
        ("england", "premier-league"), ("germany", "bundesliga"), ("spain", "la-liga"),
        ("france", "ligue-1"), ("france", "ligue-2"), ("italy", "serie-a"), ("netherlands", "eredivisie"),
        ("portugal", "ligapro"), ("denmark", "superliga"), ("england", "championship"), ("spain", "segunda-division"),
        ("switzerland", "super-league"), ("portugal", "liga-nos"), ("italy", "serie-b"), ("germany", "2-bundesliga"),
        ("scotland", "premiership"), ("belgium", "pro-league"), ("austria", "bundesliga"),
    ]
    seasons = [
        "2013-to-2014", "2014-to-2015", "2015-to-2016", "2016-to-2017", "2017-to-2018",
        "2018-to-2019", "2019-to-2020", "2020-to-2021", "2021-to-2022", "2022-to-2023", "2023-to-2024"
    ]

    # 手动输入防守和进攻权重（示例值）
    w_def = np.array([
        0.12714323, 0.11415731, 0.04532705, 0.23075414, 0.03240472, 0.14205644, 0.13323908, 0.17491804
    ], dtype=np.float32)

    w_off = np.array([
        0.07438711, 0.07787083, 0.08495551, 0.22353454, 0.08152269, 0.08466186, 0.16022788, 0.21283953
    ], dtype=np.float32)

    # 加载数据
    print("===== 加载数据 =====")
    all_team_positions, all_match_positions, cluster_centroid_positions = load_all_league_data(base_path, leagues, seasons)

    # 随机搜索超参数
    print("\n===== 随机搜索超参数 =====")
    best_params = random_search_hyperparameters(all_team_positions, all_match_positions, cluster_centroid_positions, w_def, w_off)

    # 最终训练
    print("\n===== 最终训练 =====")
    final_losses, final_team_positions, final_rank_scale, final_w_def, final_w_off, final_home_advantage = adam_optimize_positions(
        all_team_positions.copy(), all_match_positions.copy(), cluster_centroid_positions, w_def=w_def, w_off=w_off, **best_params
    )
    if final_losses:
        final_team_positions.to_csv("trained_team_positions.csv", index=False)
        print(f"最终损失: {final_losses[-1]:.4f}")
        print(f"最终防守权重: {final_w_def}")
        print(f"最终进攻权重: {final_w_off}")
        print(f"最终主场优势值: {final_home_advantage}")

    # 可视化
    print("\n===== 可视化 =====")
    visualize_team_evolution_by_league_static(final_team_positions, seasons)

    print("\n完成！")

scikit-learn 版本: 1.4.2
===== 加载数据 =====
加载数据: england - premier-league - 2013-to-2014
加载数据: england - premier-league - 2014-to-2015
加载数据: england - premier-league - 2015-to-2016
加载数据: england - premier-league - 2016-to-2017
加载数据: england - premier-league - 2017-to-2018
加载数据: england - premier-league - 2018-to-2019
加载数据: england - premier-league - 2019-to-2020
加载数据: england - premier-league - 2020-to-2021
加载数据: england - premier-league - 2021-to-2022
加载数据: england - premier-league - 2022-to-2023
加载数据: england - premier-league - 2023-to-2024
加载数据: germany - bundesliga - 2013-to-2014
加载数据: germany - bundesliga - 2014-to-2015
加载数据: germany - bundesliga - 2015-to-2016
加载数据: germany - bundesliga - 2016-to-2017
加载数据: germany - bundesliga - 2017-to-2018
加载数据: germany - bundesliga - 2018-to-2019
加载数据: germany - bundesliga - 2019-to-2020
加载数据: germany - bundesliga - 2020-to-2021
加载数据: germany - bundesliga - 2021-to-2022
加载数据: germany - bundesliga - 2022-to-2023
加载数据: germany - bundesliga - 2023-

  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 premier-league 图 -> /Users/peixuanma/Downloads/Output_Graphs/premier-league_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 bundesliga 图 -> /Users/peixuanma/Downloads/Output_Graphs/bundesliga_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 la-liga 图 -> /Users/peixuanma/Downloads/Output_Graphs/la-liga_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 ligue-1 图 -> /Users/peixuanma/Downloads/Output_Graphs/ligue-1_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 ligue-2 图 -> /Users/peixuanma/Downloads/Output_Graphs/ligue-2_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 serie-a 图 -> /Users/peixuanma/Downloads/Output_Graphs/serie-a_evolution.png
保存 eredivisie 图 -> /Users/peixuanma/Downloads/Output_Graphs/eredivisie_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 ligapro 图 -> /Users/peixuanma/Downloads/Output_Graphs/ligapro_evolution.png
保存 superliga 图 -> /Users/peixuanma/Downloads/Output_Graphs/superliga_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 championship 图 -> /Users/peixuanma/Downloads/Output_Graphs/championship_evolution.png
保存 segunda-division 图 -> /Users/peixuanma/Downloads/Output_Graphs/segunda-division_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 super-league 图 -> /Users/peixuanma/Downloads/Output_Graphs/super-league_evolution.png
保存 liga-nos 图 -> /Users/peixuanma/Downloads/Output_Graphs/liga-nos_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 serie-b 图 -> /Users/peixuanma/Downloads/Output_Graphs/serie-b_evolution.png
保存 2-bundesliga 图 -> /Users/peixuanma/Downloads/Output_Graphs/2-bundesliga_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 premiership 图 -> /Users/peixuanma/Downloads/Output_Graphs/premiership_evolution.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)
  plt.savefig(filename, dpi=150)


保存 pro-league 图 -> /Users/peixuanma/Downloads/Output_Graphs/pro-league_evolution.png

完成！
