In [7]:
import pandas as pd
import numpy as np
df = pd.read_csv('../complete_data.csv')
total_medals_per_year = df.groupby('Year')['Total'].sum().reset_index()
total_medals_per_year.columns = ['Year', 'Total_Medals_AllCountries']
df = pd.merge(df, total_medals_per_year, on='Year', how='left')
df['medal_share'] = df['Total'] / df['Total_Medals_AllCountries']

In [9]:
def create_features(df_country):
    # 按年份升序
    df_country = df_country.sort_values('Year').reset_index(drop=True)
    # 构建一个列表，用来存放每一行对应的训练数据
    rows = []
    print(df_country)
    for i in range(len(df_country)):
        current_year = df_country.loc[i, 'Year']

        # 前三届的年份
        year_t12 = current_year - 12
        year_t8  = current_year - 8
        year_t4  = current_year - 4

        # 判断三届数据是否都存在
        # 我们用 df_country 查询一下
        if year_t12 in df_country['Year'].values \
           and year_t8 in df_country['Year'].values \
           and year_t4 in df_country['Year'].values:

            # 分别取出那三届的数据
            row_t12 = df_country[df_country['Year'] == year_t12].iloc[0]
            row_t8  = df_country[df_country['Year'] == year_t8].iloc[0]
            row_t4  = df_country[df_country['Year'] == year_t4].iloc[0]
            row_t   = df_country.loc[i]

            # 以三届前的数据作为特征
            features = {
                # 1) medal_share
                'medal_share_t12': row_t12['medal_share'],
                'medal_share_t8':  row_t8['medal_share'],
                'medal_share_t4':  row_t4['medal_share'],

                # 2) participants
                'participants_t12': row_t12['Participants'],
                'participants_t8':  row_t8['Participants'],
                'participants_t4':  row_t4['Participants'],

                # 3) events
                'events_t12': row_t12['Events'],
                'events_t8':  row_t8['Events'],
                'events_t4':  row_t4['Events'],

                # 4) is_host(前几届的是否主办也可做特征，但往往影响可能较弱，可按需决定)
                'is_host_t12': row_t12['is_host'],
                'is_host_t8':  row_t8['is_host'],
                'is_host_t4':  row_t4['is_host'],

                # 5) 当前届是否主办(这个在预测时是已知，可作为特征)
                'is_host_t': row_t['is_host']
            }

            # 目标：当年 (current_year) 的 medal_share
            target = row_t['medal_share']

            row_dict = {**features, 'target_year': current_year, 'medal_share': target}
            rows.append(row_dict)

    # 将收集到的所有样本行转化成 DataFrame
    df_features = pd.DataFrame(rows)
    return df_features


In [56]:
from sklearn.linear_model import LassoCV
from sklearn.model_selection import train_test_split

# 按国家分组
all_countries = df['NOC'].unique()

# 设置一个字典，用来保存每个国家的模型
country_models = {}
cnt = 0

for country in all_countries:
    # country = "USA"
    df_country = df[df['NOC'] == country].copy()
    df_country = df_country[df_country['Participants'] != 0]
    # print(df_country)
    # 如果该国历史数据还是太少，直接跳过
    if df_country.shape[0] < 8:
        cnt += 1
        continue

    # 构造时序特征
    df_features = create_features(df_country)
    df_features.fillna(0, inplace=True)
    # 若总样本量过少，也跳过
    if df_features.shape[0] < 5:
        continue

    # ---------------------------------------------------
    # 关键改动：把 2024 年的数据留作验证 / 测试集
    # ---------------------------------------------------
    df_features_2024 = df_features[df_features['target_year'] == 2024].copy()
    df_features_train = df_features[df_features['target_year'] != 2024].copy()

    # 如果训练集都没什么数据，也就没法训练
    if df_features_train.shape[0] < 2:
        continue

    # 分别取出训练集和验证集的 X, y
    X_train = df_features_train.drop(columns=['target_year', 'medal_share'])
    y_train = df_features_train['medal_share']

    # 对 2024 的特征留作验证/测试
    X_test_2024 = df_features_2024.drop(columns=['target_year', 'medal_share'])
    y_test_2024 = df_features_2024['medal_share']

    # ---------------------------------------------------
    # 动态调整交叉验证折数
    # ---------------------------------------------------
    n_samples = X_train.shape[0]
    if n_samples < 5:
        cv = n_samples  # 使用留一法交叉验证
    else:
        cv = 5  # 使用 5 折交叉验证

    # 使用 LassoCV 进行训练
    model = LassoCV(alphas=[0.001, 0.01, 0.1, 1.0, 10.0], cv=cv, random_state=42)
    model.fit(X_train, y_train)

    # 如果 2024 年有样本，就进行一下评估
    if len(X_test_2024) > 0:
        score_2024 = model.score(X_test_2024, y_test_2024)
        print(f"Country: {country}, 2024 hold-out R^2: {score_2024:.4f}, #train: {X_train.shape[0]}, #2024: {X_test_2024.shape[0]}, Best alpha: {model.alpha_:.4f}")

    # 保存模型
    country_models[country] = model

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: AFG, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: ALB, 2024 hold-out R^2: nan, #train: 5, #2024: 1, Best alpha: 10.0000
Country: ALG, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 0.0100
Country: AND, 2024 hold-out R^2: nan, #train: 9, #2024: 1, Best alpha: 10.0000
Country: ANG, 2024 hold-out R^2: nan, #train: 6, #2024: 1, Best alpha: 10.0000
Country: ANT, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: ARG, 2024 hold-out R^2: nan, #train: 14, #2024: 1, Best alpha: 10.0000


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Country: ARM, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: ARU, 2024 hold-out R^2: nan, #train: 6, #2024: 1, Best alpha: 10.0000
Country: ASA, 2024 hold-out R^2: nan, #train: 6, #2024: 1, Best alpha: 10.0000
Country: AUS, 2024 hold-out R^2: nan, #train: 18, #2024: 1, Best alpha: 0.1000
Country: AUT, 2024 hold-out R^2: nan, #train: 19, #2024: 1, Best alpha: 0.1000
Country: AZE, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: BAH, 2024 hold-out R^2: nan, #train: 11, #2024: 1, Best alpha: 0.0010


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Country: BAN, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: BAR, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: BDI, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: BEL, 2024 hold-out R^2: nan, #train: 18, #2024: 1, Best alpha: 10.0000
Country: BEN, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: BER, 2024 hold-out R^2: nan, #train: 12, #2024: 1, Best alpha: 0.0010
Country: BHU, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: BIH, 2024 hold-out R^2: nan, #train: 5, #2024: 1, Best alpha: 10.0000


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Country: BIZ, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: BOL, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: BOT, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: BRA, 2024 hold-out R^2: nan, #train: 16, #2024: 1, Best alpha: 0.0010
Country: BRN, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: BUL, 2024 hold-out R^2: nan, #train: 12, #2024: 1, Best alpha: 0.0100
Country: BUR, 2024 hold-out R^2: nan, #train: 6, #2024: 1, Best alpha: 10.0000
Country: CAF, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: CAM, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: CAN, 2024 hold-out R^2: nan, #train: 15, #2024: 1, Best alpha: 10.0000
Country: CAY, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: CGO, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: CHA, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: CHI, 2024 hold-out R^2: nan, #train: 12, #2024: 1, Best alpha: 10.0000
Country: CHN, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 1.0000


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Country: CIV, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: CMR, 2024 hold-out R^2: nan, #train: 12, #2024: 1, Best alpha: 10.0000
Country: COD, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: COK, 2024 hold-out R^2: nan, #train: 6, #2024: 1, Best alpha: 10.0000
Country: COL, 2024 hold-out R^2: nan, #train: 14, #2024: 1, Best alpha: 0.0100
Country: COM, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: CPV, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Country: CRC, 2024 hold-out R^2: nan, #train: 12, #2024: 1, Best alpha: 10.0000
Country: CRO, 2024 hold-out R^2: nan, #train: 5, #2024: 1, Best alpha: 0.0010
Country: CUB, 2024 hold-out R^2: nan, #train: 11, #2024: 1, Best alpha: 0.1000
Country: CYP, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: CZE, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: DEN, 2024 hold-out R^2: nan, #train: 18, #2024: 1, Best alpha: 10.0000
Country: DMA, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: DOM, 2024 hold-out R^2: nan, #train: 12, #2024: 1, Best alpha: 10.0000


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Country: ECU, 2024 hold-out R^2: nan, #train: 11, #2024: 1, Best alpha: 10.0000
Country: EGY, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: ESA, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: ESP, 2024 hold-out R^2: nan, #train: 17, #2024: 1, Best alpha: 0.0010
Country: EST, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: ETH, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: FIJ, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 0.0010
Country: FIN, 2024 hold-out R^2: nan, #train: 18, #2024: 1, Best alpha: 0.0100


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Country: FRA, 2024 hold-out R^2: nan, #train: 20, #2024: 1, Best alpha: 0.0100
Country: GAB, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: GAM, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: GBR, 2024 hold-out R^2: nan, #train: 20, #2024: 1, Best alpha: 10.0000
Country: GBS, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: GEO, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: GEQ, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: GER, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Country: GHA, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 0.0010
Country: GRE, 2024 hold-out R^2: nan, #train: 20, #2024: 1, Best alpha: 10.0000
Country: GRN, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: GUA, 2024 hold-out R^2: nan, #train: 11, #2024: 1, Best alpha: 10.0000
Country: GUI, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: GUM, 2024 hold-out R^2: nan, #train: 6, #2024: 1, Best alpha: 10.0000
Country: GUY, 2024 hold-out R^2: nan, #train: 12, #2024: 1, Best alpha: 10.0000


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: HAI, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: HKG, 2024 hold-out R^2: nan, #train: 11, #2024: 1, Best alpha: 10.0000
Country: HON, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: HUN, 2024 hold-out R^2: nan, #train: 15, #2024: 1, Best alpha: 0.1000
Country: INA, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 0.0100
Country: IND, 2024 hold-out R^2: nan, #train: 18, #2024: 1, Best alpha: 10.0000
Country: IRI, 2024 hold-out R^2: nan, #train: 11, #2024: 1, Best alpha: 0.0100


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Country: IRL, 2024 hold-out R^2: nan, #train: 16, #2024: 1, Best alpha: 10.0000
Country: IRQ, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: ISL, 2024 hold-out R^2: nan, #train: 16, #2024: 1, Best alpha: 10.0000
Country: ISR, 2024 hold-out R^2: nan, #train: 11, #2024: 1, Best alpha: 0.0010
Country: ISV, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: ITA, 2024 hold-out R^2: nan, #train: 20, #2024: 1, Best alpha: 0.0010
Country: IVB, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Country: JAM, 2024 hold-out R^2: nan, #train: 12, #2024: 1, Best alpha: 0.0010
Country: JOR, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: JPN, 2024 hold-out R^2: nan, #train: 13, #2024: 1, Best alpha: 0.1000
Country: KAZ, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: KEN, 2024 hold-out R^2: nan, #train: 9, #2024: 1, Best alpha: 0.0100
Country: KGZ, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: KOR, 2024 hold-out R^2: nan, #train: 12, #2024: 1, Best alpha: 0.1000
Country: KSA, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Country: LAO, 2024 hold-out R^2: nan, #train: 6, #2024: 1, Best alpha: 10.0000
Country: LAT, 2024 hold-out R^2: nan, #train: 6, #2024: 1, Best alpha: 10.0000
Country: LBA, 2024 hold-out R^2: nan, #train: 6, #2024: 1, Best alpha: 10.0000
Country: LBR, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: LCA, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: LES, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gr

Country: LIE, 2024 hold-out R^2: nan, #train: 9, #2024: 1, Best alpha: 10.0000
Country: LTU, 2024 hold-out R^2: nan, #train: 5, #2024: 1, Best alpha: 10.0000
Country: LUX, 2024 hold-out R^2: nan, #train: 18, #2024: 1, Best alpha: 10.0000
Country: MAD, 2024 hold-out R^2: nan, #train: 5, #2024: 1, Best alpha: 10.0000
Country: MAR, 2024 hold-out R^2: nan, #train: 9, #2024: 1, Best alpha: 0.0100
Country: MAS, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: MAW, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: MDA, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: MDV, 2024 hold-out R^2: nan, #train: 6, #2024: 1, Best alpha: 10.0000
Country: MEX, 2024 hold-out R^2: nan, #train: 17, #2024: 1, Best alpha: 10.0000
Country: MGL, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: MKD, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: MLI, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: MLT, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: MON, 2024 hold-out R^2: nan, #train: 11, #2024: 1, Best alpha: 10.0000
Country: MOZ, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 0.0010
Country: MRI, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: MTN, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: MYA, 2024 hold-out R^2: nan, #train: 12, #2024: 1, Best alpha: 10.0000
Country: NAM, 2024 hold-out R^2: nan, #train: 5, #2024: 1, Best alpha: 10.0000
Country: NCA, 2024 hold-out R^2: nan, #train: 7, #

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: NGR, 2024 hold-out R^2: nan, #train: 11, #2024: 1, Best alpha: 10.0000
Country: NIG, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: NOR, 2024 hold-out R^2: nan, #train: 15, #2024: 1, Best alpha: 10.0000
Country: NRU, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: NZL, 2024 hold-out R^2: nan, #train: 18, #2024: 1, Best alpha: 10.0000
Country: OMA, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: PAK, 2024 hold-out R^2: nan, #train: 12, #2024: 1, Best alpha: 0.0010
Country: PAN, 2024 hold-out R^2: nan, #train: 9, #2024: 1, Best alpha: 10.0000
Country: PAR, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: PER, 2024 hold-out R^2: nan, #train: 14, #2024: 1, Best alpha: 10.0000
Country: PHI, 2024 hold-out R^2: nan, #train: 13, #2024: 1, Best alpha: 0.0010
Country: PLE, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: PNG, 2024 hold-out R^2: nan, #train: 7,

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Country: ROU, 2024 hold-out R^2: nan, #train: 16, #2024: 1, Best alpha: 0.1000
Country: RSA, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: RWA, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: SAM, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: SEN, 2024 hold-out R^2: nan, #train: 12, #2024: 1, Best alpha: 10.0000
Country: SEY, 2024 hold-out R^2: nan, #train: 5, #2024: 1, Best alpha: 10.0000
Country: SGP, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 0.0010
Country: SKN, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: SLE, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: SLO, 2024 hold-out R^2: nan, #train: 5, #2024: 1, Best alpha: 10.0000
Country: SMR, 2024 hold-out R^2: nan, #train: 11, #2024: 1, Best alpha: 10.0000
Country: SOL, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: SOM, 2024 hold-out R^2: nan, #train: 4, #2

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: SUR, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: SVK, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 0.0100
Country: SWE, 2024 hold-out R^2: nan, #train: 18, #2024: 1, Best alpha: 0.0010
Country: SWZ, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: SYR, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: TAN, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: TGA, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: THA, 2024 hold-out R^2: nan, #train: 11, #2024: 1, Best alpha: 0.0010
Country: TJK, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: TKM, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
Country: TOG, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
Country: TPE, 2024 hold-out R^2: nan, #train: 9, #2024: 1, Best alpha: 10.0000
Country: TTO, 2024 hold-out R^2: nan, #train: 12, #20

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Country: URU, 2024 hold-out R^2: nan, #train: 13, #2024: 1, Best alpha: 10.0000
Country: USA, 2024 hold-out R^2: nan, #train: 16, #2024: 1, Best alpha: 10.0000
Country: UZB, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 0.0010
Country: VAN, 2024 hold-out R^2: nan, #train: 6, #2024: 1, Best alpha: 10.0000
Country: VEN, 2024 hold-out R^2: nan, #train: 16, #2024: 1, Best alpha: 10.0000
Country: VIE, 2024 hold-out R^2: nan, #train: 6, #2024: 1, Best alpha: 10.0000
Country: VIN, 2024 hold-out R^2: nan, #train: 6, #2024: 1, Best alpha: 10.0000
Country: YEM, 2024 hold-out R^2: nan, #train: 5, #2024: 1, Best alpha: 10.0000
Country: ZAM, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
Country: ZIM, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


In [57]:
host_2024 = "FRA"

# 构造一个空的字典，用来存放每个国家的预测结果
predictions_2024 = {}

# 遍历每一个国家
for country in all_countries:
    # 若该国没有在前面成功训练模型，则跳过
    if country not in country_models:
        continue
    print(country)
    # 取出该国数据
    df_country = df[df['NOC'] == country].copy()

    # 为了构造特征，需要查询 2012、2016、2020 这三届，若任意一届缺失，则无法做该国预测
    needed_years = [2012, 2016, 2020]
    # 判断这三届是否都齐全
    if not all(y in df_country['Year'].values for y in needed_years):
        # 若有缺失，跳过
        continue

    # 分别取 2012、2016、2020 年的数据行（这里假设每年只出现一次）
    row_2012 = df_country[df_country['Year'] == 2012].iloc[0]
    row_2016 = df_country[df_country['Year'] == 2016].iloc[0]
    row_2020 = df_country[df_country['Year'] == 2020].iloc[0]

    # 按我们之前 create_features() 的规则，构造特征
    # 假定 2024 年 is_host 信息：若该国为主办国，则 is_host=1，否则=0
    is_host_t = 1 if country == host_2024 else 0

    X_pred = {
        'medal_share_t12': row_2012['medal_share'],   # (2024 - 12 = 2012)
        'medal_share_t8':  row_2016['medal_share'],   # (2024 - 8  = 2016)
        'medal_share_t4':  row_2020['medal_share'],   # (2024 - 4  = 2020)

        'participants_t12': row_2012['Participants'],
        'participants_t8':  row_2016['Participants'],
        'participants_t4':  row_2020['Participants'],

        'events_t12': row_2012['Events'],
        'events_t8':  row_2016['Events'],
        'events_t4':  row_2020['Events'],

        'is_host_t12': row_2012['is_host'],
        'is_host_t8':  row_2016['is_host'],
        'is_host_t4':  row_2020['is_host'],

        'is_host_t': is_host_t
    }

    # 转成 DataFrame 后做预测
    df_x_pred = pd.DataFrame([X_pred])
    model = country_models[country]
    pred_medal_share = model.predict(df_x_pred)[0]  # 取第一个值

    # 将结果存储到字典中
    predictions_2024[country] = pred_medal_share

# 至此，predictions_2024 即可得到形如：
# {
#   'USA': 0.1823,
#   'CHN': 0.1567,
#   'RUS': 0.1045,
#   'FRA': 0.0602,
#   ...
# }
# 等等的字典，每个国家的 value 是模型预测的 2024 年 medal_share

# 如果有需求，可以进行简单的后处理，比如将负预测值设为 0，或者将总和重新归一化为 1
predictions_2024_fixed = {}
sum_pred = sum(max(0, v) for v in predictions_2024.values())
for country, val in predictions_2024.items():
    val_clipped = max(0, val)  # 若出现负值则置 0
    if sum_pred > 0:
        predictions_2024_fixed[country] = val_clipped / sum_pred
    else:
        # 若全部都 <=0，则平分
        predictions_2024_fixed[country] = 1.0 / len(predictions_2024)

# 输出预测值
print("Predicted medal share for 2024 (after简单归一化):")
for country, ms in predictions_2024_fixed.items():
    print(f"{country}: {ms*1091:.4f}")

AFG
AHO
ALB
ALG
AND
ANG
ANT
ARG
ARM
ARU
ASA
AUS
AUT
AZE
BAH
BAN
BAR
BDI
BEL
BEN
BER
BHU
BIH
BIZ
BOL
BOT
BRA
BRN
BUL
BUR
CAF
CAM
CAN
CAY
CGO
CHA
CHI
CHN
CIV
CMR
COD
COK
COL
COM
CPV
CRC
CRO
CUB
CYP
CZE
DEN
DMA
DOM
ECU
EGY
ESA
ESP
EST
ETH
FIJ
FIN
FRA
GAB
GAM
GBR
GBS
GEO
GEQ
GER
GHA
GRE
GRN
GUA
GUI
GUM
GUY
HAI
HKG
HON
HUN
INA
IND
IRI
IRL
IRQ
ISL
ISR
ISV
ITA
IVB
JAM
JOR
JPN
KAZ
KEN
KGZ
KOR
KSA
KUW
LAO
LAT
LBA
LBR
LCA
LES
LIB
LIE
LTU
LUX
MAD
MAR
MAS
MAW
MDA
MDV
MEX
MGL
MKD
MLI
MLT
MON
MOZ
MRI
MTN
MYA
NAM
NCA
NED
NEP
NGR
NIG
NOR
NRU
NZL
OMA
PAK
PAN
PAR
PER
PHI
PLE
PNG
POL
POR
PUR
QAT
ROU
RSA
RWA
SAM
SEN
SEY
SGP
SKN
SLE
SLO
SMR
SOL
SOM
SRI
STP
SUD
SUI
SUR
SVK
SWE
SWZ
SYR
TAN
TCH
TGA
THA
TJK
TKM
TOG
TPE
TTO
TUN
TUR
UAE
UGA
UKR
URU
USA
UZB
VAN
VEN
VIE
VIN
YEM
YUG
ZAM
ZIM
Predicted medal share for 2024 (after简单归一化):
AFG: 0.0000
AHO: 0.0000
ALB: 0.0000
ALG: 2.3650
AND: 0.0000
ANG: 0.0000
ANT: 0.0000
ARG: 5.8124
ARM: 4.2482
ARU: 0.0000
ASA: 0.0000
AUS: 56.6892
AUT: 6.0843
AZE: 11.5160
BAH: 1.5103
B