In [11]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit

# 加载数据（假设数据已读入df）
df = pd.read_csv("../complete_data.csv")

# 按国家和年份排序
df = df.sort_values(by= ['NOC', 'Year'])

# 生成滞后特征（包括未参赛年份的0值）
def generate_lags(group):
    for lag in [1, 2, 3]:
        group[f'Gold_lag{lag}'] = group['Gold'].shift(lag)
        group[f'Silver_lag{lag}'] = group['Silver'].shift(lag)
        group[f'Bronze_lag{lag}'] = group['Bronze'].shift(lag)
        group[f'Total_lag{lag}'] = group['Total'].shift(lag)
        group[f'is_participated_lag{lag}'] = group['is_participated'].shift(lag)
    return group.fillna(0)

df['is_participated'] = np.where(df['Participants'] > 0, 1, 0)
df = df.groupby('NOC', group_keys=False).apply(generate_lags)

# 定义特征列
features = [
    'Gold_lag1', 'Gold_lag2', 'Gold_lag3',
    'Silver_lag1', 'Silver_lag2', 'Silver_lag3',
    'Bronze_lag1', 'Bronze_lag2', 'Bronze_lag3',
    'Total_lag1', 'Total_lag2', 'Total_lag3',
    'is_participated_lag1', 'is_participated_lag2', 'is_participated_lag3',
    'is_host'
]

# 存储模型和结果
models = {}
results = {}

for noc in df['NOC'].unique():
    country_df = df[df['NOC'] == noc]
    train = country_df[country_df['Year'] < 2024]
    test = country_df[country_df['Year'] == 2024]

    # 跳过无测试数据或训练数据不足的情况
    if len(test) == 0 or len(train) < 4:
        print(f"Skipping {noc}: Insufficient data.")
        continue

    X_train = train[features]
    y_train = train['Gold']
    X_test = test[features]
    y_test = test['Gold']

    # 跳过全零特征
    if X_train[features].sum().sum() == 0:
        print(f"Skipping {noc}: All features are zero.")
        continue

    # 时间序列交叉验证
    n_splits = min(3, len(X_train) - 1)
    if n_splits < 2:
        print(f"Skipping {noc}: Not enough samples for CV.")
        continue

    tscv = TimeSeriesSplit(n_splits=n_splits)

    # 创建包含标准化的LassoCV管道
    pipeline = make_pipeline(
        StandardScaler(),  # 标准化特征
        LassoCV(
            alphas=np.logspace(-3, 2, 50),  # 扩展alpha范围（0.001到100）
            cv=tscv,
            max_iter=100000,  # 显著增加迭代次数
            random_state=42,
            selection='random'  # 改进收敛性
        )
    )

    try:
        # 训练模型
        pipeline.fit(X_train, y_train)
        lasso_model = pipeline.named_steps['lassocv']

        # 预测
        y_pred = pipeline.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)

        # 存储结果
        models[noc] = {
            'model': pipeline,
            'best_alpha': lasso_model.alpha_
        }
        results[noc] = {
            'Actual': y_test.values[0],
            'Predicted': y_pred[0],
            'MSE': mse
        }
    except Exception as e:
        print(f"Failed for {noc}: {str(e)}")

  df = df.groupby('NOC', group_keys=False).apply(generate_lags)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_

Skipping AIN: All features are zero.


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gra

Skipping CRT: All features are zero.


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gra

Skipping EOR: All features are zero.


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  

Skipping LBN: All features are zero.


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gra

Skipping ROC: All features are zero.


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gra

In [12]:
import pandas as pd

# 将结果转换为DataFrame
results_list = []
for noc, res in results.items():
    results_list.append({
        'NOC': noc,
        'Actual_Total_2024': res['Actual'],
        'Predicted_Total_2024': res['Predicted']
    })

# 创建DataFrame
results_df = pd.DataFrame(results_list)

# 保存到CSV文件
results_df.to_csv("Gold.csv", index=False)
print("Results saved to 'Gold.csv'.")

Results saved to 'Gold.csv'.


KeyError: 'Predicted_Gold_2024'