In [58]:
import pandas as pd
import numpy as np
df = pd.read_csv('../complete_data.csv')
total_medals_per_year = df.groupby('Year')['Gold'].sum().reset_index()
total_medals_per_year.columns = ['Year', 'Gold_Medals_AllCountries']
df = pd.merge(df, total_medals_per_year, on='Year', how='left')
df['medal_share'] = df['Gold'] / df['Gold_Medals_AllCountries']

In [59]:
def create_features(df_country):
    # 按年份升序
    df_country = df_country.sort_values('Year').reset_index(drop=True)
    # 构建一个列表，用来存放每一行对应的训练数据
    rows = []
    print(df_country)
    for i in range(len(df_country)):
        current_year = df_country.loc[i, 'Year']

        # 前三届的年份
        year_t12 = current_year - 12
        year_t8  = current_year - 8
        year_t4  = current_year - 4

        # 判断三届数据是否都存在
        # 我们用 df_country 查询一下
        if year_t12 in df_country['Year'].values \
           and year_t8 in df_country['Year'].values \
           and year_t4 in df_country['Year'].values:

            # 分别取出那三届的数据
            row_t12 = df_country[df_country['Year'] == year_t12].iloc[0]
            row_t8  = df_country[df_country['Year'] == year_t8].iloc[0]
            row_t4  = df_country[df_country['Year'] == year_t4].iloc[0]
            row_t   = df_country.loc[i]

            # 以三届前的数据作为特征
            features = {
                # 1) medal_share
                'medal_share_t12': row_t12['medal_share'],
                'medal_share_t8':  row_t8['medal_share'],
                'medal_share_t4':  row_t4['medal_share'],

                # 2) participants
                'participants_t12': row_t12['Participants'],
                'participants_t8':  row_t8['Participants'],
                'participants_t4':  row_t4['Participants'],

                # 3) events
                'events_t12': row_t12['Events'],
                'events_t8':  row_t8['Events'],
                'events_t4':  row_t4['Events'],

                # 4) is_host(前几届的是否主办也可做特征，但往往影响可能较弱，可按需决定)
                'is_host_t12': row_t12['is_host'],
                'is_host_t8':  row_t8['is_host'],
                'is_host_t4':  row_t4['is_host'],

                # 5) 当前届是否主办(这个在预测时是已知，可作为特征)
                'is_host_t': row_t['is_host']
            }

            # 目标：当年 (current_year) 的 medal_share
            target = row_t['medal_share']

            row_dict = {**features, 'target_year': current_year, 'medal_share': target}
            rows.append(row_dict)

    # 将收集到的所有样本行转化成 DataFrame
    df_features = pd.DataFrame(rows)
    return df_features


In [60]:
from sklearn.linear_model import LassoCV
from sklearn.model_selection import train_test_split

# 按国家分组
all_countries = df['NOC'].unique()

# 设置一个字典，用来保存每个国家的模型
country_models = {}
cnt = 0

for country in all_countries:
    # country = "USA"
    df_country = df[df['NOC'] == country].copy()
    df_country = df_country[df_country['Participants'] != 0]
    # print(df_country)
    # 如果该国历史数据还是太少，直接跳过
    if df_country.shape[0] < 8:
        cnt += 1
        continue

    # 构造时序特征
    df_features = create_features(df_country)
    df_features.fillna(0, inplace=True)
    # 若总样本量过少，也跳过
    if df_features.shape[0] < 5:
        continue

    # ---------------------------------------------------
    # 关键改动：把 2024 年的数据留作验证 / 测试集
    # ---------------------------------------------------
    df_features_2024 = df_features[df_features['target_year'] == 2024].copy()
    df_features_train = df_features[df_features['target_year'] != 2024].copy()

    # 如果训练集都没什么数据，也就没法训练
    if df_features_train.shape[0] < 2:
        continue

    # 分别取出训练集和验证集的 X, y
    X_train = df_features_train.drop(columns=['target_year', 'medal_share'])
    y_train = df_features_train['medal_share']

    # 对 2024 的特征留作验证/测试
    X_test_2024 = df_features_2024.drop(columns=['target_year', 'medal_share'])
    y_test_2024 = df_features_2024['medal_share']

    # ---------------------------------------------------
    # 动态调整交叉验证折数
    # ---------------------------------------------------
    n_samples = X_train.shape[0]
    if n_samples < 5:
        cv = n_samples  # 使用留一法交叉验证
    else:
        cv = 5  # 使用 5 折交叉验证

    # 使用 LassoCV 进行训练
    model = LassoCV(alphas=[0.001, 0.01, 0.1, 1.0, 10.0], cv=cv, random_state=42)
    model.fit(X_train, y_train)

    # 如果 2024 年有样本，就进行一下评估
    if len(X_test_2024) > 0:
        score_2024 = model.score(X_test_2024, y_test_2024)
        print(f"Country: {country}, 2024 hold-out R^2: {score_2024:.4f}, #train: {X_train.shape[0]}, #2024: {X_test_2024.shape[0]}, Best alpha: {model.alpha_:.4f}")

    # 保存模型
    country_models[country] = model

    NOC  Year  Bronze  Gold  Silver  Participants  Events  is_host  Total  \
0   AFG  1936       0     0       0            16       2        0      0   
1   AFG  1948       0     0       0            25       2        0      0   
2   AFG  1956       0     0       0            12       1        0      0   
3   AFG  1960       0     0       0            16       2        0      0   
4   AFG  1964       0     0       0             8       1        0      0   
5   AFG  1968       0     0       0             5       1        0      0   
6   AFG  1972       0     0       0             8       1        0      0   
7   AFG  1980       0     0       0            11       2        0      0   
8   AFG  1988       0     0       0             5       1        0      0   
9   AFG  1996       0     0       0             2       1        0      0   
10  AFG  2004       0     0       0             5       4        0      0   
11  AFG  2008       1     0       0             4       2        0      1   

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: AUT, 2024 hold-out R^2: nan, #train: 19, #2024: 1, Best alpha: 0.1000
   NOC  Year  Bronze  Gold  Silver  Participants  Events  is_host  Total  \
0  AZE  1996       0     0       1            24       9        0      1   
1  AZE  2000       1     2       0            32       8        0      3   
2  AZE  2004       4     1       0            37      10        0      5   
3  AZE  2008       4     1       1            44      10        0      6   
4  AZE  2012       5     2       2            59      15        0      9   
5  AZE  2016      10     1       7            69      15        0     18   
6  AZE  2020       4     0       3            55      14        0      7   
7  AZE  2024       3     2       2            49      15        0      7   

   Gold_Medals_AllCountries  medal_share  
0                       238     0.000000  
1                       285     0.007018  
2                       274     0.003650  
3                       280     0.003571  
4                    

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: BOT, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
    NOC  Year  Bronze  Gold  Silver  Participants  Events  is_host  Total  \
0   BRA  1900       0     0       0             3       1        0      0   
1   BRA  1920       1     1       1            38       5        0      3   
2   BRA  1924       0     0       0            18       4        0      0   
3   BRA  1932       0     0       0            67       5        0      0   
4   BRA  1936       0     0       0            94       9        0      0   
5   BRA  1948       1     0       0           105      11        0      1   
6   BRA  1952       2     1       0           119      14        0      3   
7   BRA  1956       0     1       0            63      12        0      1   
8   BRA  1960       2     0       0            86      14        0      2   
9   BRA  1964       1     0       0            65      11        0      1   
10  BRA  1968       2     0       1            89      13        0      3 

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: CMR, 2024 hold-out R^2: nan, #train: 12, #2024: 1, Best alpha: 10.0000
    NOC  Year  Bronze  Gold  Silver  Participants  Events  is_host  Total  \
0   COD  1968       0     0       0             8       1        0      0   
1   COD  1984       0     0       0             9       2        0      0   
2   COD  1988       0     0       0            21       3        0      0   
3   COD  1992       0     0       0            19       4        0      0   
4   COD  1996       0     0       0            14       2        0      0   
5   COD  2000       0     0       0             3       1        0      0   
6   COD  2004       0     0       0             6       2        0      0   
7   COD  2008       0     0       0             5       4        0      0   
8   COD  2012       0     0       0             4       3        0      0   
9   COD  2016       0     0       0             4       3        0      0   
10  COD  2020       0     0       0             7       4        0      0

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: ESA, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
    NOC  Year  Bronze  Gold  Silver  Participants  Events  is_host  Total  \
0   ESP  1900       0     1       0            12       4        0      1   
1   ESP  1920       0     0       2           111       7        0      2   
2   ESP  1924       0     0       0           151      15        0      0   
3   ESP  1928       0     1       0           106      10        0      1   
4   ESP  1932       1     0       0             8       3        0      1   
5   ESP  1948       0     0       1            84      11        0      1   
6   ESP  1952       0     0       1            42       7        0      1   
7   ESP  1956       0     0       0            12       1        0      0   
8   ESP  1960       1     0       0           238      16        0      1   
9   ESP  1964       0     0       0            67       9        0      0   
10  ESP  1968       0     0       0           137      12        0      0 

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: GHA, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
    NOC  Year  Bronze  Gold  Silver  Participants  Events  is_host  Total  \
0   GRE  1896      19    10      18           148       9        0     47   
1   GRE  1900       0     0       0             4       2        0      0   
2   GRE  1904       1     1       0            17       3        0      2   
3   GRE  1908       1     0       3            49       3        0      4   
4   GRE  1912       1     1       0            77       5        0      2   
5   GRE  1920       0     0       1            98       8        0      1   
6   GRE  1924       0     1       0            79       9        0      1   
7   GRE  1928       0     0       0            47       4        0      0   
8   GRE  1932       0     0       0            16       3        0      0   
9   GRE  1936       0     0       0            64       7        0      0   
10  GRE  1948       0     0       0            97      10        0      0 

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: IRQ, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
    NOC  Year  Bronze  Gold  Silver  Participants  Events  is_host  Total  \
0   ISL  1908       0     0       0             1       1        0      0   
1   ISL  1912       0     0       0             2       2        0      0   
2   ISL  1936       0     0       0            13       2        0      0   
3   ISL  1948       0     0       0            27       3        0      0   
4   ISL  1952       0     0       0            18       1        0      0   
5   ISL  1956       0     0       1             2       1        0      1   
6   ISL  1960       0     0       0            11       2        0      0   
7   ISL  1964       0     0       0             5       2        0      0   
8   ISL  1968       0     0       0            16       3        0      0   
9   ISL  1972       0     0       0            30       4        0      0   
10  ISL  1976       0     0       0            22       4        0      0 

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

    NOC  Year  Bronze  Gold  Silver  Participants  Events  is_host  Total  \
0   LAO  1980       0     0       0            19       3        0      0   
1   LAO  1988       0     0       0             3       2        0      0   
2   LAO  1992       0     0       0             7       2        0      0   
3   LAO  1996       0     0       0             5       1        0      0   
4   LAO  2000       0     0       0             3       2        0      0   
5   LAO  2004       0     0       0             5       3        0      0   
6   LAO  2008       0     0       0             4       2        0      0   
7   LAO  2012       0     0       0             3       2        0      0   
8   LAO  2016       0     0       0             5       4        0      0   
9   LAO  2020       0     0       0             4       3        0      0   
10  LAO  2024       0     0       0             4       3        0      0   

    Gold_Medals_AllCountries  medal_share  
0                        172   

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: MDA, 2024 hold-out R^2: nan, #train: 4, #2024: 1, Best alpha: 10.0000
   NOC  Year  Bronze  Gold  Silver  Participants  Events  is_host  Total  \
0  MDV  1988       0     0       0             9       1        0      0   
1  MDV  1992       0     0       0            10       2        0      0   
2  MDV  1996       0     0       0             9       2        0      0   
3  MDV  2000       0     0       0             4       2        0      0   
4  MDV  2004       0     0       0             4       2        0      0   
5  MDV  2008       0     0       0             4       2        0      0   
6  MDV  2012       0     0       0             5       3        0      0   
7  MDV  2016       0     0       0             4       2        0      0   
8  MDV  2020       0     0       0             4       3        0      0   
9  MDV  2024       0     0       0             5       4        0      0   

   Gold_Medals_AllCountries  medal_share  
0                       237          0.0 

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: NCA, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
    NOC  Year  Bronze  Gold  Silver  Participants  Events  is_host  Total  \
0   NED  1900       3     0       2            73       7        0      5   
1   NED  1908       2     0       0           218      11        0      2   
2   NED  1912       3     0       0            45       7        0      3   
3   NED  1920       5     4       2           213      15        0     11   
4   NED  1924       5     4       1           246      17        0     10   
5   NED  1928       4     6       9           384      17        0     19   
6   NED  1932       1     2       5            80       9        0      8   
7   NED  1936       7     6       4           227      15        0     17   
8   NED  1948       9     5       2           187      18        0     16   
9   NED  1952       0     0       5           180      14        0      5   
10  NED  1956       0     0       0             1       1        0      0 

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: PNG, 2024 hold-out R^2: nan, #train: 7, #2024: 1, Best alpha: 10.0000
    NOC  Year  Bronze  Gold  Silver  Participants  Events  is_host  Total  \
0   POL  1912       0     0       0             1       1        0      0   
1   POL  1924       1     0       1           100      10        0      2   
2   POL  1928       3     1       1           146      11        0      5   
3   POL  1932       4     3       2            71       4        0      9   
4   POL  1936       5     0       4           177      15        0      9   
5   POL  1948       1     0       0            52       5        0      1   
6   POL  1952       1     1       2           248      11        0      4   
7   POL  1956       4     1       4           118       9        0      9   
8   POL  1960      11     4       6           307      17        0     21   
9   POL  1964      10     7       6           240      12        0     23   
10  POL  1968      11     5       2           296      16        0     18 

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: SLE, 2024 hold-out R^2: nan, #train: 8, #2024: 1, Best alpha: 10.0000
   NOC  Year  Bronze  Gold  Silver  Participants  Events  is_host  Total  \
0  SLO  1992       2     0       0            47      12        0      2   
1  SLO  1996       0     0       2            47       8        0      2   
2  SLO  2000       0     2       0            90      13        0      2   
3  SLO  2004       3     0       1           102      10        0      4   
4  SLO  2008       2     1       2            73      11        0      5   
5  SLO  2012       2     1       1            78      15        0      4   
6  SLO  2016       1     1       2            70      12        0      4   
7  SLO  2020       1     3       1            64      17        0      5   
8  SLO  2024       0     2       1           117      17        0      3   

   Gold_Medals_AllCountries  medal_share  
0                       309     0.000000  
1                       238     0.000000  
2                       285    

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

    NOC  Year  Bronze  Gold  Silver  Participants  Events  is_host  Total  \
0   TGA  1984       0     0       0             7       1        0      0   
1   TGA  1988       0     0       0             7       2        0      0   
2   TGA  1992       0     0       0             5       2        0      0   
3   TGA  1996       0     0       1             5       3        0      1   
4   TGA  2000       0     0       0             3       2        0      0   
5   TGA  2004       0     0       0             5       4        0      0   
6   TGA  2008       0     0       0             3       2        0      0   
7   TGA  2012       0     0       0             3       2        0      0   
8   TGA  2016       0     0       0             7       4        0      0   
9   TGA  2020       0     0       0             6       4        0      0   
10  TGA  2024       0     0       0             4       3        0      0   

    Gold_Medals_AllCountries  medal_share  
0                        325   

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Country: VEN, 2024 hold-out R^2: nan, #train: 16, #2024: 1, Best alpha: 10.0000
    NOC  Year  Bronze  Gold  Silver  Participants  Events  is_host  Total  \
0   VIE  1980       0     0       0            38       4        0      0   
1   VIE  1988       0     0       0            12       6        0      0   
2   VIE  1992       0     0       0            10       3        0      0   
3   VIE  1996       0     0       0             6       4        0      0   
4   VIE  2000       0     0       1             7       4        0      1   
5   VIE  2004       0     0       1            11       8        0      1   
6   VIE  2008       0     0       1            18       8        0      1   
7   VIE  2012       0     0       1            24      11        0      1   
8   VIE  2016       1     0       0            28      10        0      1   
9   VIE  2020       0     1       1            23      11        0      2   
10  VIE  2024       0     0       0            20      11        0      0

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


In [67]:
host_2024 = "FRA"

# 构造一个空的字典，用来存放每个国家的预测结果
predictions_2024 = {}

# 遍历每一个国家
for country in all_countries:
    # 若该国没有在前面成功训练模型，则跳过
    if country not in country_models:
        continue
    # print(country)
    # 取出该国数据
    df_country = df[df['NOC'] == country].copy()

    # 为了构造特征，需要查询 2012、2016、2020 这三届，若任意一届缺失，则无法做该国预测
    needed_years = [2012, 2016, 2020]
    # 判断这三届是否都齐全
    if not all(y in df_country['Year'].values for y in needed_years):
        # 若有缺失，跳过
        continue

    # 分别取 2012、2016、2020 年的数据行（这里假设每年只出现一次）
    row_2012 = df_country[df_country['Year'] == 2012].iloc[0]
    row_2016 = df_country[df_country['Year'] == 2016].iloc[0]
    row_2020 = df_country[df_country['Year'] == 2020].iloc[0]

    # 按我们之前 create_features() 的规则，构造特征
    # 假定 2024 年 is_host 信息：若该国为主办国，则 is_host=1，否则=0
    is_host_t = 1 if country == host_2024 else 0

    X_pred = {
        'medal_share_t12': row_2012['medal_share'],   # (2024 - 12 = 2012)
        'medal_share_t8':  row_2016['medal_share'],   # (2024 - 8  = 2016)
        'medal_share_t4':  row_2020['medal_share'],   # (2024 - 4  = 2020)

        'participants_t12': row_2012['Participants'],
        'participants_t8':  row_2016['Participants'],
        'participants_t4':  row_2020['Participants'],

        'events_t12': row_2012['Events'],
        'events_t8':  row_2016['Events'],
        'events_t4':  row_2020['Events'],

        'is_host_t12': row_2012['is_host'],
        'is_host_t8':  row_2016['is_host'],
        'is_host_t4':  row_2020['is_host'],

        'is_host_t': is_host_t
    }

    # 转成 DataFrame 后做预测
    df_x_pred = pd.DataFrame([X_pred])
    model = country_models[country]
    pred_medal_share = model.predict(df_x_pred)[0]  # 取第一个值

    # 将结果存储到字典中
    predictions_2024[country] = pred_medal_share

# 至此，predictions_2024 即可得到形如：
# {
#   'USA': 0.1823,
#   'CHN': 0.1567,
#   'RUS': 0.1045,
#   'FRA': 0.0602,
#   ...
# }
# 等等的字典，每个国家的 value 是模型预测的 2024 年 medal_share

# 如果有需求，可以进行简单的后处理，比如将负预测值设为 0，或者将总和重新归一化为 1
predictions_2024_fixed = {}
sum_pred = sum(max(0, v) for v in predictions_2024.values())
for country, val in predictions_2024.items():
    val_clipped = max(0, val)  # 若出现负值则置 0
    if sum_pred > 0:
        predictions_2024_fixed[country] = val_clipped / sum_pred
    else:
        # 若全部都 <=0，则平分
        predictions_2024_fixed[country] = 1.0 / len(predictions_2024)

# 输出预测值
print("Predicted medal share for 2024 (after简单归一化):")
for country, ms in predictions_2024_fixed.items():
    print(f"{country},{ms*290:.4f},")

Predicted medal share for 2024 (after简单归一化):
AFG,0.0000,
AHO,0.0000,
ALB,0.0000,
ALG,0.7615,
AND,0.0000,
ANG,0.0000,
ANT,0.0000,
ARG,1.5413,
ARM,0.2841,
ARU,0.0000,
ASA,0.0000,
AUS,15.9324,
AUT,1.2733,
AZE,1.1664,
BAH,0.0000,
BAN,0.0000,
BAR,0.0000,
BDI,0.0000,
BEL,2.3808,
BEN,0.0000,
BER,0.0776,
BHU,0.0000,
BIH,0.0000,
BIZ,0.0000,
BOL,0.0000,
BOT,0.0000,
BRA,8.9990,
BRN,0.3318,
BUL,1.7203,
BUR,0.0000,
CAF,0.0000,
CAM,0.0000,
CAN,3.3662,
CAY,0.0000,
CGO,0.0000,
CHA,0.0000,
CHI,0.1970,
CHN,36.8732,
CIV,0.1420,
CMR,0.2896,
COD,0.0000,
COK,0.0000,
COL,1.2591,
COM,0.0000,
CPV,0.0000,
CRC,0.1134,
CRO,2.6430,
CUB,3.8085,
CYP,0.0000,
CZE,3.2686,
DEN,2.5970,
DMA,0.0000,
DOM,0.2937,
ECU,0.2929,
EGY,0.2641,
ESA,0.0000,
ESP,6.6468,
EST,0.0000,
ETH,2.1451,
FIJ,0.2953,
FIN,1.3650,
FRA,12.0396,
GAB,0.0000,
GAM,0.0000,
GBR,0.0000,
GBS,0.0000,
GEO,3.6508,
GEQ,0.0000,
GER,16.7210,
GHA,0.0000,
GRE,1.6222,
GRN,0.1695,
GUA,0.0000,
GUI,0.0000,
GUM,0.0000,
GUY,0.0000,
HAI,0.0000,
HKG,0.2083,
HON,0.0000,
HUN