In [26]:
import numpy as np
import pandas as pd


#models = ['gem1', 'gem1-gamma11', 'gem1-gamma12', 'gem1-gamma11-gc2']
models = ['gem1']
seeds = [0, 1, 2, 3]


def get_ensemble_dataset(models, seeds, phase='val'):
    X, y = [], []
    for model in models:
        for seed in seeds:
            dfs = []
            for fold in range(10):
                df = pd.read_csv(f'outputs/{model}/fold{fold}/seed{seed}/{phase}_preds.csv')
                dfs.append(df)
            
            if phase == 'val':
                stacked_df = pd.concat(dfs, axis=0)
                X.append(stacked_df[stacked_df.columns[-2:]].values)
                y.append(stacked_df[['Reorg_g', 'Reorg_ex']].values)
            else:
                mean_X = np.mean([df[df.columns[-2:]].values for df in dfs], axis=0)
                X.append(mean_X)
    X = np.concatenate(X, axis=1)
    y = np.concatenate(y, axis=0) if phase == 'val' else None

    return X, y


train_X, train_y = get_ensemble_dataset(models, seeds, phase='val')
test_X, _ = get_ensemble_dataset(models, seeds, phase='test')

print(train_X.shape, train_y.shape)
print(test_X.shape)

(18157, 8) (72628, 2)
(457, 8)


In [31]:
import xgboost as xgb
from sklearn.model_selection import KFold


preds = []

kfold = KFold(n_splits=7, shuffle=True, random_state=0)

for train_idx, val_idx in kfold.split(train_X):
    X_train, y_train = train_X[train_idx], train_y[train_idx]
    X_val, y_val = train_X[val_idx], train_y[val_idx]
    model = xgb.XGBRegressor(gpu_id=1)
    model.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=20)
    preds.append(model.predict(test_X))

preds = np.mean(preds, axis=0)
sub = pd.read_csv('data/sample_submission.csv')
sub['Reorg_g'] = preds[:, 0]
sub['Reorg_ex'] = preds[:, 1]
sub.to_csv('outputs/ensembled_submission.csv', index=False)

[0]	validation_0-rmse:0.26961
[1]	validation_0-rmse:0.20640
[2]	validation_0-rmse:0.16669
[3]	validation_0-rmse:0.14314
[4]	validation_0-rmse:0.13015
[5]	validation_0-rmse:0.12357
[6]	validation_0-rmse:0.12029
[7]	validation_0-rmse:0.11862
[8]	validation_0-rmse:0.11795
[9]	validation_0-rmse:0.11758
[10]	validation_0-rmse:0.11736
[11]	validation_0-rmse:0.11730
[12]	validation_0-rmse:0.11740
[13]	validation_0-rmse:0.11747
[14]	validation_0-rmse:0.11734
[15]	validation_0-rmse:0.11729
[16]	validation_0-rmse:0.11726
[17]	validation_0-rmse:0.11751
[18]	validation_0-rmse:0.11766
[19]	validation_0-rmse:0.11779
[20]	validation_0-rmse:0.11797
[21]	validation_0-rmse:0.11803
[22]	validation_0-rmse:0.11806
[23]	validation_0-rmse:0.11810
[24]	validation_0-rmse:0.11812
[25]	validation_0-rmse:0.11815
[26]	validation_0-rmse:0.11812
[27]	validation_0-rmse:0.11830
[28]	validation_0-rmse:0.11830
[29]	validation_0-rmse:0.11838
[30]	validation_0-rmse:0.11826
[31]	validation_0-rmse:0.11850
[32]	validation_0-