**Imports**

In [57]:
import xgboost as xgb
from xgboost import XGBRegressor

import sklearn as sk
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score, KFold, TimeSeriesSplit
from sklearn.metrics import r2_score, mean_squared_error

import shap
import warnings



**Load Data**

In [58]:
train_data = pd.read_csv('./data/train.csv')
test_data = pd.read_csv('./data/test.csv')

print(train_data.head())
print(test_data.head())

FEATURES = [col for col in train_data.columns if col in list("ABCDEFGHIJKLMN")]
TARGET = ["Y1", "Y2"]

X = train_data[FEATURES].copy()
y1 = train_data["Y1"].copy()
y2 = train_data["Y2"].copy()
X_test = test_data[FEATURES].copy()



   time         A         B         C         D         E         F         G  \
0     0  0.207366 -0.159951 -0.634176 -0.580962 -0.266505  0.060173 -0.475257   
1     1  0.188828 -0.265508  0.042143 -0.550442 -0.132319 -0.185219  0.028295   
2     2 -0.144261 -0.577142 -0.214634 -0.747391 -0.184255 -0.464831 -0.085181   
3     3  0.208982 -0.310449  0.513708 -0.562868  0.742308 -0.305487  0.762246   
4     4  0.093320 -0.358156  0.173188 -0.687296 -0.161461 -0.116062 -0.245748   

          H         I         J         K         L         M         N  \
0 -1.486516 -0.332594 -0.671466 -0.226149 -0.187624 -0.780237 -0.785965   
1  0.093210 -0.518139 -0.251917 -0.347845 -0.359069 -0.161254  0.020401   
2  0.700449 -0.603438  0.197773 -0.566696 -0.580799  0.202726  0.135261   
3  1.363020 -0.384575  0.525556 -0.348514 -0.428099  0.548993  0.471031   
4  0.863372 -0.655588 -0.263358 -0.557428 -0.481214  0.083602  0.003087   

         Y1        Y2  
0 -0.935902 -0.310081  
1 -0.089707 -0

**Models**

In [59]:
#Make XGBRegressor model
def make_xgbregressor():
    return XGBRegressor(
        n_estimators=1000,
        learning_rate=0.03,
        max_depth=6,
        subsample=0.8,
        colsample_bytree=0.8,
        reg_lambda=1,
        reg_alpha=0,
        objective='reg:squarederror',
        tree_method='hist',
        random_state=42,
        n_jobs=-1,
        verbosity=2,
        eval_metric='rmse',
        early_stopping_rounds=100
    )


**Train the model**

In [60]:
def train_model(X, y, X_test, cv, make_model_fn):
    oof = np.zeros(len(X))
    test_fold_preds = []

    for fold, (train, val) in enumerate(cv.split(X)):
        X_train, X_val = X.iloc[train], X.iloc[val]
        y_train, y_val = y.iloc[train], y.iloc[val]

        model=make_model_fn()

        model.fit(
            X_train,
            y_train,
            eval_set=[(X_val, y_val)],
            verbose=False
        )

        oof[val] = model.predict(X_val)

        test_fold_preds.append(model.predict(X_test))

        print(f"fold {fold+1} R^2: {r2_score(y_val, oof[val]):.4f}")
        cv_r2 = r2_score(y, oof)
        print(f"CV R^2: {cv_r2:.4f}")

        test_pred = np.mean(test_fold_preds, axis=0)

    return oof, test_pred

**Defining Folds**

In [61]:
#Time series cross validation
tscv = TimeSeriesSplit(n_splits=5) 

#Kfold
kf = KFold(n_splits=5, shuffle=True, random_state=42)

**Running the model**

In [62]:
oof_y1, test_pred_y1 = train_model(X, y1, X_test, tscv, make_xgbregressor)
oof_y2, test_pred_y2 = train_model(X, y2, X_test, tscv, make_xgbregressor)

[19:16:55] INFO: /Users/runner/work/xgboost/xgboost/src/data/iterative_dmatrix.cc:53: Finished constructing the `IterativeDMatrix`: (13335, 14, 186690).
[19:16:55] INFO: /Users/runner/work/xgboost/xgboost/src/data/iterative_dmatrix.cc:53: Finished constructing the `IterativeDMatrix`: (13333, 14, 186662).
fold 1 R^2: 0.7343
CV R^2: 0.0527
[19:16:55] INFO: /Users/runner/work/xgboost/xgboost/src/data/iterative_dmatrix.cc:53: Finished constructing the `IterativeDMatrix`: (26668, 14, 373352).
[19:16:55] INFO: /Users/runner/work/xgboost/xgboost/src/data/iterative_dmatrix.cc:53: Finished constructing the `IterativeDMatrix`: (13333, 14, 186662).
fold 2 R^2: 0.6866
CV R^2: 0.1307
[19:16:55] INFO: /Users/runner/work/xgboost/xgboost/src/data/iterative_dmatrix.cc:53: Finished constructing the `IterativeDMatrix`: (40001, 14, 560014).
[19:16:55] INFO: /Users/runner/work/xgboost/xgboost/src/data/iterative_dmatrix.cc:53: Finished constructing the `IterativeDMatrix`: (13333, 14, 186662).
fold 3 R^2: 0.