scikit-learn 版本: 1.4.2
===== 加载数据 =====
加载数据: england - premier-league - 2013-to-2014
加载数据: england - premier-league - 2014-to-2015
加载数据: england - premier-league - 2015-to-2016
加载数据: england - premier-league - 2016-to-2017
加载数据: england - premier-league - 2017-to-2018
加载数据: england - premier-league - 2018-to-2019
加载数据: england - premier-league - 2020-to-2021
加载数据: england - premier-league - 2021-to-2022
加载数据: england - premier-league - 2022-to-2023
加载数据: england - premier-league - 2023-to-2024
加载数据: germany - bundesliga - 2013-to-2014
加载数据: germany - bundesliga - 2014-to-2015
加载数据: germany - bundesliga - 2015-to-2016
加载数据: germany - bundesliga - 2016-to-2017
加载数据: germany - bundesliga - 2017-to-2018
加载数据: germany - bundesliga - 2018-to-2019
加载数据: germany - bundesliga - 2020-to-2021
加载数据: germany - bundesliga - 2021-to-2022
加载数据: germany - bundesliga - 2022-to-2023
加载数据: germany - bundesliga - 2023-to-2024
加载数据: spain - la-liga - 2013-to-2014
加载数据: spain - la-liga - 2014-to-2015
加载数据: 

In [51]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix
import sklearn

print(f"scikit-learn 版本: {sklearn.__version__}")

# ====================== 计算防守统计数据 ======================
def compute_defensive_stats(match_df, team_positions_df):
    """计算球队的防守统计数据，包括6种比率、总失球数和平均失球数"""
    epsilon = 1e-8  # 提高数值稳定性
    team_stats = {}
    for team in team_positions_df['team_name'].unique():
        team_stats[team] = {
            'total_goals_conceded': 0, 'ratio1_list': [], 'ratio2_list': [], 'ratio3_list': [],
            'ratio4_list': [], 'ratio5_list': [], 'ratio6_list': [], 'num_matches': 0
        }

    # 主场比赛统计
    for idx, row in match_df.iterrows():
        home_team, away_team = row['home_team_name'], row['away_team_name']
        if home_team not in team_stats or away_team not in team_stats:
            print(f"警告: 主队 {home_team} 或客队 {away_team} 未找到，跳过比赛 {idx}")
            continue
        if row['Pre-Match PPG (Away)'] > 0:
            team_stats[home_team]['ratio1_list'].append(
                row['away_team_goal_count'] / (row['Pre-Match PPG (Away)'] + epsilon))
        if row['away_team_corner_count'] > 0:
            team_stats[home_team]['ratio2_list'].append(
                row['away_team_goal_count'] / (row['away_team_corner_count'] + epsilon))
        denominator = row['home_team_yellow_cards'] + row['home_team_red_cards'] + row['home_team_fouls'] + epsilon
        team_stats[home_team]['ratio3_list'].append(row['away_team_goal_count'] / denominator)
        if row['team_b_xg'] > 0:
            team_stats[home_team]['ratio4_list'].append(row['away_team_goal_count'] / (row['team_b_xg'] + epsilon))
        shots_total = row['away_team_shots_on_target'] + row['away_team_shots_off_target'] + epsilon
        team_stats[home_team]['ratio5_list'].append(row['away_team_goal_count'] / shots_total)
        if row['away_team_possession'] > 0:
            team_stats[home_team]['ratio6_list'].append(
                row['away_team_goal_count'] / (row['away_team_possession'] + epsilon))
        team_stats[home_team]['total_goals_conceded'] += row['away_team_goal_count']
        team_stats[home_team]['num_matches'] += 1  # 记录主场比赛场次

    # 客场比赛统计
    for idx, row in match_df.iterrows():
        away_team = row['away_team_name']
        if away_team not in team_stats:
            print(f"警告: 客队 {away_team} 未找到，跳过比赛 {idx}")
            continue
        team_stats[away_team]['total_goals_conceded'] += row['home_team_goal_count']
        team_stats[away_team]['num_matches'] += 1  # 记录客场比赛场次

    # 汇总数据
    data = []
    for team, stats in team_stats.items():
        num_matches = stats['num_matches']
        average_goals_conceded = stats['total_goals_conceded'] / num_matches if num_matches > 0 else 0
        data.append({
            'team_name': team,
            'ratio1': np.mean(stats['ratio1_list']) if stats['ratio1_list'] else 0,
            'ratio2': np.mean(stats['ratio2_list']) if stats['ratio2_list'] else 0,
            'ratio3': np.mean(stats['ratio3_list']) if stats['ratio3_list'] else 0,
            'ratio4': np.mean(stats['ratio4_list']) if stats['ratio4_list'] else 0,
            'ratio5': np.mean(stats['ratio5_list']) if stats['ratio5_list'] else 0,
            'ratio6': np.mean(stats['ratio6_list']) if stats['ratio6_list'] else 0,
            'total_goals_conceded': stats['total_goals_conceded'],
            'average_goals_conceded': average_goals_conceded,
            'num_matches': num_matches
        })

    return pd.DataFrame(data)

# ====================== 数据加载函数 ======================
def load_all_league_data(base_path, leagues, seasons):
    """加载所有联赛数据并进行预处理"""
    all_team_positions = []
    for country_name, league_name in leagues:
        for season in seasons:
            print(f"加载数据: {country_name} - {league_name} - {season}")
            team_file = os.path.join(base_path, f"{country_name}-{league_name}-teams-{season}-stats.csv")
            match_file = os.path.join(base_path, f"{country_name}-{league_name}-matches-{season}-stats.csv")
            if not os.path.exists(team_file) or not os.path.exists(match_file):
                print(f"警告: {country_name} - {league_name} - {season} 文件缺失")
                continue

            team_df = pd.read_csv(team_file)
            match_df = pd.read_csv(match_file)

            # 统一球队名称格式
            team_df['team_name'] = team_df.get('common_name', team_df.get('team_name', None)).str.strip().str.lower()
            match_df['home_team_name'] = match_df['home_team_name'].str.strip().str.lower()
            match_df['away_team_name'] = match_df['away_team_name'].str.strip().str.lower()

            team_names = team_df['team_name'].unique()
            original_match_count = len(match_df)
            match_df = match_df[
                match_df['home_team_name'].isin(team_names) & match_df['away_team_name'].isin(team_names)]
            if len(match_df) < original_match_count:
                print(f"警告: 过滤了 {original_match_count - len(match_df)} 场比赛")

            if match_df.empty:
                print("警告: 无有效比赛数据")
                continue

            defensive_stats_df = compute_defensive_stats(match_df, team_df)
            if defensive_stats_df.empty:
                print("警告: 未计算出防守统计")
                continue

            team_df = team_df.merge(defensive_stats_df, on='team_name', how='left')
            for col in ['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'total_goals_conceded', 'average_goals_conceded']:
                if col not in team_df.columns:
                    print(f"警告: {col} 列缺失，设为0")
                    team_df[col] = 0

            team_positions = pd.DataFrame({
                'team_name': team_df['team_name'],
                'ratio1': team_df['ratio1'],
                'ratio2': team_df['ratio2'],
                'ratio3': team_df['ratio3'],
                'ratio4': team_df['ratio4'],
                'ratio5': team_df['ratio5'],
                'ratio6': team_df['ratio6'],
                'total_goals_conceded': team_df['total_goals_conceded'],
                'average_goals_conceded': team_df['average_goals_conceded']
            })
            all_team_positions.append(team_positions)

    return pd.concat(all_team_positions, ignore_index=True)

# ====================== 使用Adaboost优化权重 ======================
def compute_adaboost_weights(team_positions_df):
    """使用Adaboost计算防守比率的最佳权重，并通过交叉验证优化参数"""
    # 数据预处理
    ratios = team_positions_df[['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6']].replace([np.inf, -np.inf],
                                                                                                     np.nan).fillna(0)

    # 检查特征相关性
    correlation_matrix = ratios.corr()
    print("特征相关性矩阵:\n", correlation_matrix)

    scaler = StandardScaler()
    ratios_scaled = scaler.fit_transform(ratios)

    # 将平均失球数分箱为两个分类标签
    labels = pd.qcut(team_positions_df['average_goals_conceded'], q=2, labels=[0, 1], duplicates='drop').fillna(0).astype(int)

    # 划分训练集和测试集
    X_train, X_test, y_train, y_test = train_test_split(ratios_scaled, labels, test_size=0.2, random_state=42)

    # 定义Adaboost模型和参数网格
    ada = AdaBoostClassifier(algorithm='SAMME', random_state=42)
    param_grid = {
        'estimator': [DecisionTreeClassifier(max_depth=5), DecisionTreeClassifier(max_depth=10)],  # 增加深度
        'n_estimators': [500, 1000, 1500,250,50,200,150,100],  # 增加弱分类器数量
        'learning_rate': [0.01, 0.1, 0.5,0.05,0.25]  # 更广泛的学习率
    }

    # 使用网格搜索进行交叉验证
    grid_search = GridSearchCV(ada, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    # 输出最佳参数
    best_params = grid_search.best_params_
    print("最佳参数:", best_params)

    # 使用最佳参数重新训练模型
    adaboost = AdaBoostClassifier(
        estimator=best_params['estimator'],
        n_estimators=best_params['n_estimators'],
        learning_rate=best_params['learning_rate'],
        algorithm='SAMME',
        random_state=42
    )
    adaboost.fit(X_train, y_train)

    # 在测试集上评估模型
    y_pred = adaboost.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    roc_auc = roc_auc_score(y_test, adaboost.predict_proba(X_test)[:, 1])  # 二分类ROC-AUC
    conf_matrix = confusion_matrix(y_test, y_pred)

    print("测试集准确率:", accuracy)
    print("测试集F1分数:", f1)
    print("测试集ROC-AUC:", roc_auc)
    print("混淆矩阵:\n", conf_matrix)

    # 计算特征重要性并加权
    feature_importances = np.zeros(6)
    for i, est in enumerate(adaboost.estimators_):
        feature_importances += adaboost.estimator_weights_[i] * est.feature_importances_
    feature_importances /= np.sum(adaboost.estimator_weights_)

    # 不再设置最小重要性阈值，以保留原始差异
    weights = feature_importances / np.sum(feature_importances)
    print(f"计算得到的权重: {weights}")

    return weights.astype(np.float32)

# ====================== 主函数 ======================
if __name__ == "__main__":
    base_path = '/Users/peixuanma/Downloads/data1'
    leagues = [
        ("england", "premier-league"), ("germany", "bundesliga"), ("spain", "la-liga"),
        ("france", "ligue-1"), ("france", "ligue-2"),("italy", "serie-a"), ("netherlands", "eredivisie"),
        ("portugal", "ligapro"), ("denmark", "superliga"), ("england", "championship"),("spain", "segunda-division"),("switzerland","super-league"),
        ("portugal", "liga-nos"), ("italy", "serie-b"), ("germany", "2-bundesliga"),("scotland","premiership"),("belgium","pro-league"),("austria","bundesliga"),
    ]
    seasons = [
        "2013-to-2014", "2014-to-2015", "2015-to-2016", "2016-to-2017", "2017-to-2018",
        "2018-to-2019", "2019-to-2020","2020-to-2021", "2021-to-2022", "2022-to-2023", "2023-to-2024"
    ]

    # 加载数据
    print("===== 加载数据 =====")
    all_team_positions = load_all_league_data(base_path, leagues, seasons)

    # 计算Adaboost权重
    print("\n===== 计算Adaboost权重 =====")
    adaboost_weights = compute_adaboost_weights(all_team_positions)
    print("优化后的Adaboost权重:", adaboost_weights)

    print("\n完成！")

scikit-learn 版本: 1.4.2
===== 加载数据 =====
加载数据: england - premier-league - 2013-to-2014
加载数据: england - premier-league - 2014-to-2015
加载数据: england - premier-league - 2015-to-2016
加载数据: england - premier-league - 2016-to-2017
加载数据: england - premier-league - 2017-to-2018
加载数据: england - premier-league - 2018-to-2019
加载数据: england - premier-league - 2019-to-2020
警告: england - premier-league - 2019-to-2020 文件缺失
加载数据: england - premier-league - 2020-to-2021
加载数据: england - premier-league - 2021-to-2022
加载数据: england - premier-league - 2022-to-2023
加载数据: england - premier-league - 2023-to-2024
加载数据: germany - bundesliga - 2013-to-2014
加载数据: germany - bundesliga - 2014-to-2015
加载数据: germany - bundesliga - 2015-to-2016
加载数据: germany - bundesliga - 2016-to-2017
加载数据: germany - bundesliga - 2017-to-2018
加载数据: germany - bundesliga - 2018-to-2019
加载数据: germany - bundesliga - 2019-to-2020
警告: germany - bundesliga - 2019-to-2020 文件缺失
加载数据: germany - bundesliga - 2020-to-2021
加载数据: germany - bundesli

KeyboardInterrupt: 

In [4]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix
import sklearn

print(f"scikit-learn 版本: {sklearn.__version__}")

# ====================== 计算防守统计数据 ======================
def compute_defensive_stats(match_df, team_positions_df):
    """计算球队的防守统计数据，包括6种比率、总失球数、平均失球数和防守标签"""
    epsilon = 1e-8  # 提高数值稳定性
    team_stats = {}
    for team in team_positions_df['team_name'].unique():
        team_stats[team] = {
            'total_goals_conceded': 0, 'ratio1_list': [], 'ratio2_list': [], 'ratio3_list': [],
            'ratio4_list': [], 'ratio5_list': [], 'ratio6_list': [], 'num_matches': 0, 'defense_labels': []
        }

    # 主场比赛统计
    for idx, row in match_df.iterrows():
        home_team, away_team = row['home_team_name'], row['away_team_name']
        if home_team not in team_stats or away_team not in team_stats:
            print(f"警告: 主队 {home_team} 或客队 {away_team} 未找到，跳过比赛 {idx}")
            continue

        # 计算主队的防守标签（对手是客队）
        away_actual_goals = row['away_team_goal_count']
        away_xg = row['team_b_xg']
        ratio_home_defense = (away_actual_goals + 1) / (away_xg + 1)  # 加1避免比值为0
        if ratio_home_defense < 0.6:
            label_home_defense = 0
        elif 0.6 <= ratio_home_defense < 0.8:
            label_home_defense = 1
        elif 0.8 <= ratio_home_defense < 1.0:
            label_home_defense = 2
        elif 1.0 <= ratio_home_defense < 1.2:
            label_home_defense = 3
        else:  # >= 1.2
            label_home_defense = 4
        team_stats[home_team]['defense_labels'].append(label_home_defense)

        # 计算客队的防守标签（对手是主队）
        home_actual_goals = row['home_team_goal_count']
        home_xg = row['team_a_xg']
        ratio_away_defense = (home_actual_goals + 1) / (home_xg + 1)  # 加1避免比值为0
        if ratio_away_defense < 0.6:
            label_away_defense = 0
        elif 0.6 <= ratio_away_defense < 0.8:
            label_away_defense = 1
        elif 0.8 <= ratio_away_defense < 1.0:
            label_away_defense = 2
        elif 1.0 <= ratio_away_defense < 1.2:
            label_away_defense = 3
        else:  # >= 1.2
            label_away_defense = 4
        team_stats[away_team]['defense_labels'].append(label_away_defense)

        # 其他统计（保持不变）
        if row['Pre-Match PPG (Away)'] > 0:
            team_stats[home_team]['ratio1_list'].append(
                row['away_team_goal_count'] / (row['Pre-Match PPG (Away)'] + epsilon))
        if row['away_team_corner_count'] > 0:
            team_stats[home_team]['ratio2_list'].append(
                row['away_team_goal_count'] / (row['away_team_corner_count'] + epsilon))
        denominator = row['home_team_yellow_cards'] + row['home_team_red_cards'] + row['home_team_fouls'] + epsilon
        team_stats[home_team]['ratio3_list'].append(row['away_team_goal_count'] / denominator)
        if row['team_b_xg'] > 0:
            team_stats[home_team]['ratio4_list'].append(row['away_team_goal_count'] / (row['team_b_xg'] + epsilon))
        shots_total = row['away_team_shots_on_target'] + row['away_team_shots_off_target'] + epsilon
        team_stats[home_team]['ratio5_list'].append(row['away_team_goal_count'] / shots_total)
        if row['away_team_possession'] > 0:
            team_stats[home_team]['ratio6_list'].append(
                row['away_team_goal_count'] / (row['away_team_possession'] + epsilon))
        team_stats[home_team]['total_goals_conceded'] += row['away_team_goal_count']
        team_stats[home_team]['num_matches'] += 1  # 记录主场比赛场次

    # 客场比赛统计（仅更新总失球数和比赛场次）
    for idx, row in match_df.iterrows():
        away_team = row['away_team_name']
        if away_team not in team_stats:
            print(f"警告: 客队 {away_team} 未找到，跳过比赛 {idx}")
            continue
        team_stats[away_team]['total_goals_conceded'] += row['home_team_goal_count']
        team_stats[away_team]['num_matches'] += 1  # 记录客场比赛场次

    # 汇总数据
    data = []
    for team, stats in team_stats.items():
        num_matches = stats['num_matches']
        average_goals_conceded = stats['total_goals_conceded'] / num_matches if num_matches > 0 else 0
        # 计算防守标签的众数
        if stats['defense_labels']:
            label_mode = pd.Series(stats['defense_labels']).mode()[0]
        else:
            label_mode = 0  # 如果没有比赛数据，设为默认值0
        data.append({
            'team_name': team,
            'ratio1': np.mean(stats['ratio1_list']) if stats['ratio1_list'] else 0,
            'ratio2': np.mean(stats['ratio2_list']) if stats['ratio2_list'] else 0,
            'ratio3': np.mean(stats['ratio3_list']) if stats['ratio3_list'] else 0,
            'ratio4': np.mean(stats['ratio4_list']) if stats['ratio4_list'] else 0,
            'ratio5': np.mean(stats['ratio5_list']) if stats['ratio5_list'] else 0,
            'ratio6': np.mean(stats['ratio6_list']) if stats['ratio6_list'] else 0,
            'total_goals_conceded': stats['total_goals_conceded'],
            'average_goals_conceded': average_goals_conceded,
            'defense_label': label_mode
        })

    return pd.DataFrame(data)

# ====================== 数据加载函数 ======================
def load_all_league_data(base_path, leagues, seasons):
    """加载所有联赛数据并进行预处理"""
    all_team_positions = []
    for country_name, league_name in leagues:
        for season in seasons:
            print(f"加载数据: {country_name} - {league_name} - {season}")
            team_file = os.path.join(base_path, f"{country_name}-{league_name}-teams-{season}-stats.csv")
            match_file = os.path.join(base_path, f"{country_name}-{league_name}-matches-{season}-stats.csv")
            if not os.path.exists(team_file) or not os.path.exists(match_file):
                print(f"警告: {country_name} - {league_name} - {season} 文件缺失")
                continue

            team_df = pd.read_csv(team_file)
            match_df = pd.read_csv(match_file)

            # 统一球队名称格式
            team_df['team_name'] = team_df.get('common_name', team_df.get('team_name', None)).str.strip().str.lower()
            match_df['home_team_name'] = match_df['home_team_name'].str.strip().str.lower()
            match_df['away_team_name'] = match_df['away_team_name'].str.strip().str.lower()

            team_names = team_df['team_name'].unique()
            original_match_count = len(match_df)
            match_df = match_df[
                match_df['home_team_name'].isin(team_names) & match_df['away_team_name'].isin(team_names)]
            if len(match_df) < original_match_count:
                print(f"警告: 过滤了 {original_match_count - len(match_df)} 场比赛")

            if match_df.empty:
                print("警告: 无有效比赛数据")
                continue

            defensive_stats_df = compute_defensive_stats(match_df, team_df)
            if defensive_stats_df.empty:
                print("警告: 未计算出防守统计")
                continue

            team_df = team_df.merge(defensive_stats_df, on='team_name', how='left')
            for col in ['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'total_goals_conceded', 
                        'average_goals_conceded', 'defense_label']:
                if col not in team_df.columns:
                    print(f"警告: {col} 列缺失，设为0")
                    team_df[col] = 0

            team_positions = pd.DataFrame({
                'team_name': team_df['team_name'],
                'ratio1': team_df['ratio1'],
                'ratio2': team_df['ratio2'],
                'ratio3': team_df['ratio3'],
                'ratio4': team_df['ratio4'],
                'ratio5': team_df['ratio5'],
                'ratio6': team_df['ratio6'],
                'total_goals_conceded': team_df['total_goals_conceded'],
                'average_goals_conceded': team_df['average_goals_conceded'],
                'defense_label': team_df['defense_label']
            })
            all_team_positions.append(team_positions)

    return pd.concat(all_team_positions, ignore_index=True)

# ====================== 使用Adaboost优化权重 ======================
def compute_adaboost_weights(team_positions_df):
    """使用Adaboost计算防守比率的最佳权重，并通过交叉验证优化参数"""
    # 数据预处理
    ratios = team_positions_df[['ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6']].replace([np.inf, -np.inf],
                                                                                                     np.nan).fillna(0)

    # 检查特征相关性
    correlation_matrix = ratios.corr()
    print("特征相关性矩阵:\n", correlation_matrix)

    scaler = StandardScaler()
    ratios_scaled = scaler.fit_transform(ratios)

    # 使用defense_label作为标签（五个类别）
    labels = team_positions_df['defense_label'].astype(int)

    # 划分训练集和测试集
    X_train, X_test, y_train, y_test = train_test_split(ratios_scaled, labels, test_size=0.2, random_state=42)

    # 定义Adaboost模型和参数网格
    ada = AdaBoostClassifier(algorithm='SAMME', random_state=42)
    param_grid = {
        'estimator': [DecisionTreeClassifier(max_depth=1), DecisionTreeClassifier(max_depth=3), DecisionTreeClassifier(max_depth=5),DecisionTreeClassifier(max_depth=7),  DecisionTreeClassifier(max_depth=10)],  # 增加深度
        'n_estimators': [500, 1000, 1500,250,50,200,150,100],  # 增加弱分类器数量
        'learning_rate': [0.01, 0.1, 0.5,0.05,0.25]  # 更广泛的学习率
    }

    # 使用网格搜索进行交叉验证
    grid_search = GridSearchCV(ada, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    # 输出最佳参数
    best_params = grid_search.best_params_
    print("最佳参数:", best_params)

    # 使用最佳参数重新训练模型
    adaboost = AdaBoostClassifier(
        estimator=best_params['estimator'],
        n_estimators=best_params['n_estimators'],
        learning_rate=best_params['learning_rate'],
        algorithm='SAMME',
        random_state=42
    )
    adaboost.fit(X_train, y_train)

    # 在测试集上评估模型
    y_pred = adaboost.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    roc_auc = roc_auc_score(y_test, adaboost.predict_proba(X_test), multi_class='ovr')  # 多分类ROC-AUC
    conf_matrix = confusion_matrix(y_test, y_pred)

    print("测试集准确率:", accuracy)
    print("测试集F1分数:", f1)
    print("测试集ROC-AUC:", roc_auc)
    print("混淆矩阵:\n", conf_matrix)

    # 计算特征重要性并加权
    feature_importances = np.zeros(6)
    for i, est in enumerate(adaboost.estimators_):
        feature_importances += adaboost.estimator_weights_[i] * est.feature_importances_
    feature_importances /= np.sum(adaboost.estimator_weights_)

    # 归一化权重
    weights = feature_importances / np.sum(feature_importances)
    print(f"计算得到的权重: {weights}")

    return weights.astype(np.float32)

# ====================== 主函数 ======================
if __name__ == "__main__":
    base_path = '/Users/peixuanma/Downloads/data1'
    leagues = [
        ("england", "premier-league"), ("germany", "bundesliga"), ("spain", "la-liga"),
        ("france", "ligue-1"), ("france", "ligue-2"), ("italy", "serie-a"), ("netherlands", "eredivisie"),
        ("portugal", "ligapro"), ("denmark", "superliga"), ("england", "championship"), ("spain", "segunda-division"), 
        ("switzerland", "super-league"), ("portugal", "liga-nos"), ("italy", "serie-b"), ("germany", "2-bundesliga"), 
        ("scotland", "premiership"), ("belgium", "pro-league"), ("austria", "bundesliga"),
    ]
    seasons = [
        "2013-to-2014", "2014-to-2015", "2015-to-2016", "2016-to-2017", "2017-to-2018",
        "2018-to-2019", "2019-to-2020", "2020-to-2021", "2021-to-2022", "2022-to-2023", "2023-to-2024"
    ]

    # 加载数据
    print("===== 加载数据 =====")
    all_team_positions = load_all_league_data(base_path, leagues, seasons)

    # 计算Adaboost权重
    print("\n===== 计算Adaboost权重 =====")
    adaboost_weights = compute_adaboost_weights(all_team_positions)
    print("优化后的Adaboost权重:", adaboost_weights)

    print("\n完成！")

scikit-learn 版本: 1.4.2
===== 加载数据 =====
加载数据: england - premier-league - 2013-to-2014
加载数据: england - premier-league - 2014-to-2015
加载数据: england - premier-league - 2015-to-2016
加载数据: england - premier-league - 2016-to-2017
加载数据: england - premier-league - 2017-to-2018
加载数据: england - premier-league - 2018-to-2019
加载数据: england - premier-league - 2019-to-2020
加载数据: england - premier-league - 2020-to-2021
加载数据: england - premier-league - 2021-to-2022
加载数据: england - premier-league - 2022-to-2023
加载数据: england - premier-league - 2023-to-2024
加载数据: germany - bundesliga - 2013-to-2014
加载数据: germany - bundesliga - 2014-to-2015
加载数据: germany - bundesliga - 2015-to-2016
加载数据: germany - bundesliga - 2016-to-2017
加载数据: germany - bundesliga - 2017-to-2018
加载数据: germany - bundesliga - 2018-to-2019
加载数据: germany - bundesliga - 2019-to-2020
加载数据: germany - bundesliga - 2020-to-2021
加载数据: germany - bundesliga - 2021-to-2022
加载数据: germany - bundesliga - 2022-to-2023
加载数据: germany - bundesliga - 2023-