In [1]:
import os
from multiprocessing import Pool
import gc
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
from sklearn.multioutput import MultiOutputRegressor

from scipy.stats import pearsonr
import lightgbm as lgb
#import shap

# **1. Data loading**

In [2]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/g-research-crypto-forecasting/example_sample_submission.csv
/kaggle/input/g-research-crypto-forecasting/asset_details.csv
/kaggle/input/g-research-crypto-forecasting/example_test.csv
/kaggle/input/g-research-crypto-forecasting/train.csv
/kaggle/input/g-research-crypto-forecasting/supplemental_train.csv
/kaggle/input/g-research-crypto-forecasting/gresearch_crypto/competition.cpython-37m-x86_64-linux-gnu.so
/kaggle/input/g-research-crypto-forecasting/gresearch_crypto/__init__.py


In [3]:
data = pd.read_csv('/kaggle/input/g-research-crypto-forecasting/train.csv')
supplemental = pd.read_csv('/kaggle/input/g-research-crypto-forecasting/supplemental_train.csv')

supplemental['timestamp_dt'] = pd.to_datetime(supplemental['timestamp'], unit='s')
data['timestamp_dt'] = pd.to_datetime(data['timestamp'], unit='s')

In [4]:
weights_assets = pd.read_csv('/kaggle/input/g-research-crypto-forecasting/asset_details.csv')

In [5]:
assets = data['Asset_ID'].unique().tolist()

In [None]:
n_plots = 2
for i in range(0, len(assets), n_plots):
    batch = assets[i:i+n_plots]
    fig, axes = plt.subplots(1, len(batch), figsize=(5*len(batch), 4))
    
    for ax, asset_id in zip(axes, batch):
        data_main = data[data['Asset_ID'] == asset_id].sort_values('timestamp')
        ax.plot(data_main['timestamp'], data_main['VWAP'], color='blue', label='Data')
        
        if 'Asset_ID' in supplemental.columns:
            data_supp = supplemental[supplemental['Asset_ID'] == asset_id].sort_values('timestamp')
            if not data_supp.empty:
                ax.plot(data_supp['timestamp'], data_supp['VWAP'], color='red', label='Supplemental data')
        
        ax.set_title(f'Asset {asset_id}: VWAP over time')
        ax.set_xlabel('Timestamp')
        ax.set_ylabel('VWAP')
        ax.grid(True)
        ax.legend()
    
    plt.tight_layout()
    plt.show()

In [6]:
test = (supplemental.sort_values(["Asset_ID", "timestamp"]).reset_index(drop=True))
train = (data.sort_values(["Asset_ID", "timestamp"]).reset_index(drop=True))

In [7]:
del supplemental, data
gc.collect()

30

# **2. Data preprocessing**

In [8]:
def make_regular_series(df_asset, asset, method='pad', col='timestamp'):
    """
    Function makes the time series regular by timestamp.
    df_asset — filtered DataFrame by Asset_ID 
    """
    df_asset = df_asset.set_index(col)
    
    min_time = df_asset.index.min()
    max_time = df_asset.index.max()
    
    df_asset = df_asset.reindex(
        range(min_time, max_time + 60, 60),
        method=method
    )
    
    return df_asset

assets = train['Asset_ID'].unique().tolist()
groups = {asset: train[train['Asset_ID'] == asset].copy() for asset in assets}

with Pool(3) as pool:
    results = pool.starmap(make_regular_series, [(groups[asset], asset) for asset in assets])

train_all_regular = pd.concat(results)
train_all_regular = train_all_regular.reset_index().set_index(['timestamp','Asset_ID'], drop=False)

del results
del groups
gc.collect()


0

In [9]:
# All fillings instead of 0 will provide leakage 
train_all_regular['Target'] = train_all_regular['Target'].fillna(0)

train_all_regular['VWAP'] = (
    train_all_regular['VWAP']
    .replace([np.inf, -np.inf], np.nan)  
    .ffill()                              
)

In [None]:
ratio_zeros = (train_all_regular['Target'] == 0).sum() / (train_all_regular['Target'] != 0).sum()
print(ratio_zeros)

In [11]:
scalers_log = {}
scalers_rob = {}
scalers_y_rl = {}

train_scaled_list = []
test_scaled_list = []
features_to_log_scale = ['Volume', 'Count']
features_to_rob_scale = ["Open", "High", "Low", "Close", "VWAP"]
y_col = ["Target"]

features = ["Count", "Open", "High", "Low", "Close", "Volume", "VWAP"]
x_cols = ["Asset_ID"] + features
all_cols = x_cols + y_col

for asset in train_all_regular["Asset_ID"].unique():
    df_a = train_all_regular.loc[train_all_regular.Asset_ID == asset, :].copy()
    
    df_a[features_to_log_scale] = np.log1p(df_a[features_to_log_scale])
    scalers_log[asset] = "log1p"
    
    scaler = RobustScaler()
    df_a[features_to_rob_scale] = scaler.fit_transform(df_a[features_to_rob_scale])
    scalers_rob[asset] = scaler
    
    scaler_y = StandardScaler()
    df_a[y_col] = scaler_y.fit_transform(df_a[y_col])
    scalers_y_rl[asset] = scaler_y

    train_scaled_list.append(df_a[all_cols])

train_log_rob = pd.concat(train_scaled_list)

# Same test normalization
for asset in test["Asset_ID"].unique():
    df_test_rl = test[test.Asset_ID == asset].copy()
    df_test_rl[features_to_log_scale] = np.log1p(df_test_rl[features_to_log_scale])
    df_test_rl[features_to_rob_scale] = scalers_rob[asset].transform(df_test_rl[features_to_rob_scale])
    test_scaled_list.append(df_test_rl[all_cols + ['timestamp']])
 
test_log_rob = pd.concat(test_scaled_list)
test_log_rob = test_log_rob.set_index(['timestamp', 'Asset_ID'], drop=False)

del train_scaled_list
del test_scaled_list
gc.collect()

0

In [13]:
del train, test, train_all_regular
gc.collect()

0

# **3. Baseline**
## **3.1 Linear regression**

In [14]:
def weighted_pearsonr(x, y, w):
    """
    Count weighted pearson correlation coefficient.
    x - tested data
    y - predicted data
    w - weights
    """
    x = np.asarray(x)
    y = np.asarray(y)
    w = np.asarray(w)

    mask = np.isfinite(x) & np.isfinite(y) & np.isfinite(w)
    x, y, w = x[mask], y[mask], w[mask]

    if len(x) == 0:
        return np.nan

    w_sum = np.sum(w)
    if w_sum == 0:
        return np.nan

    mx = np.sum(w * x) / w_sum
    my = np.sum(w * y) / w_sum

    cov = np.sum(w * (x - mx) * (y - my))
    vx  = np.sum(w * (x - mx) ** 2)
    vy  = np.sum(w * (y - my) ** 2)

    if vx <= 0 or vy <= 0:
        return np.nan

    return cov / np.sqrt(vx * vy)

In [15]:
asset_weights = weights_assets.set_index('Asset_ID')['Weight'].to_dict()
coins = train_log_rob['Asset_ID'].unique()

In [None]:
all_mses = []
all_corrs = []

all_y_true = []
all_y_pred = []
all_weights = []

for coin in coins:

    X_train_coin = train_log_rob[train_log_rob['Asset_ID'] == coin][x_cols]
    y_train_coin = train_log_rob[train_log_rob['Asset_ID'] == coin][y_col]
    
    X_test_coin = test_log_rob[test_log_rob['Asset_ID'] == coin][x_cols]
    y_test_coin = test_log_rob[test_log_rob['Asset_ID'] == coin][y_col].fillna(0)
    
    lr = LinearRegression()
    lr.fit(X_train_coin, y_train_coin)
    y_pred_coin = lr.predict(X_test_coin)

    y_test_1d = np.ravel(y_test_coin)
    y_pred_1d = np.ravel(y_pred_coin)

    corr_coin, _ = pearsonr(y_test_1d, y_pred_1d)
    mse_coin = mean_squared_error(y_test_1d, y_pred_1d)

    all_mses.append(mse_coin)
    all_corrs.append(corr_coin)

    w = asset_weights[coin]
    all_y_true.append(y_test_1d)
    all_y_pred.append(y_pred_1d)
    all_weights.append(np.full_like(y_test_1d, w, dtype=float))

    del X_train_coin, y_train_coin
    gc.collect()
    
    plt.figure(figsize=(6,6))
    plt.scatter(y_test_coin, y_pred_coin, alpha=0.3)
    plt.plot([y_test_coin.min(), y_test_coin.max()],
             [y_test_coin.min(), y_test_coin.max()],
             color='red', linestyle='--')
    plt.xlabel("True Target")
    plt.ylabel("Predicted Target")
    plt.title(f"LR Baseline: Robust+log Scaled — {coin}\nMSE: {mse_coin:.4f}, Pearson: {corr_coin:.4f}")
    plt.show()

overall_mse = np.mean(all_mses)
overall_corr = np.mean(all_corrs)

print(f"Aggregated MSE: {overall_mse:.6f}")
print(f"Aggregated Pearson Correlation: {overall_corr:.6f}")

y_true_all = np.concatenate(all_y_true)
y_pred_all = np.concatenate(all_y_pred)
w_all      = np.concatenate(all_weights)

weighted_corr = weighted_pearsonr(y_true_all, y_pred_all, w_all)

print(f"Aggregated Weighted Pearson: {weighted_corr:.6f}")


## 3.2 Constant model

In [None]:
all_mses = []
all_corrs = []

for coin in coins:

    y_test_coin = test_log_rob[test_log_rob['Asset_ID'] == coin][y_col].fillna(0)

    y_pred_coin = np.zeros_like(y_test_coin)

    y_test_1d = np.ravel(y_test_coin)
    y_pred_1d = np.ravel(y_pred_coin)
    
    mse_coin = mean_squared_error(y_test_1d, y_pred_1d)
    
    all_mses.append(mse_coin)
    
    print(f"{coin}: MSE={mse_coin:.8f}")
    del y_test_coin, y_pred_coin, y_test_1d, y_pred_1d
    gc.collect()

overall_mse = np.mean(all_mses)
print(f"\nAggregated MSE: {overall_mse:.8f}")

## 3.3 Multioutput regression

In [None]:
X_train = train_log_rob[x_cols]
X_test  = test_log_rob[x_cols]
y_train = train_log_rob[y_col]
y_test  = test_log_rob[y_col]

mlr = MultiOutputRegressor(LinearRegression())
mlr.fit(X_train, y_train)
y_pred = mlr.predict(X_test)

y_true = np.nan_to_num(y_test.values.ravel(), nan=0.0)
y_pred_vals = np.nan_to_num(y_pred.ravel(), nan=0.0)

mse = mean_squared_error(y_true, y_pred_vals)
corr, _ = pearsonr(y_true, y_pred_vals)

print(f"MSE={mse:.6f}, Pearson={corr:.4f}")
 
weights = test_log_rob['Asset_ID'].map(asset_weights).values
weights = np.nan_to_num(weights, nan=0.0)

weighted_corr = weighted_pearsonr(y_true, y_pred_vals, weights)
print(f"Weighted Pearson={weighted_corr:.6f}")

del X_train, X_test, y_train, y_test
gc.collect()

# **4. Feature engineering**

In [18]:
CORR_GROUP = [0, 8, 10, 11, 13]

In [19]:
def add_features_per_asset(df):
    df = df.copy()
    new_cols = {}

    for lag in [1, 5, 15, 55]:
        new_cols[f'Close_lag{lag}'] = df['Close'].shift(lag)

    # log-return 
    safe_close = df['Close'].replace(0, np.nan)
    log_ret = np.log(safe_close / safe_close.shift(1))
    log_ret = log_ret.replace([np.inf, -np.inf], np.nan).fillna(0)
    new_cols['Close_log_ret'] = log_ret

    # volatility
    new_cols['Close_vol_10'] = log_ret.rolling(10).std().fillna(0)
    new_cols['Close_vol_50'] = log_ret.rolling(50).std().fillna(0)

    # EMA
    new_cols['Close_ema_10'] = df['Close'].ewm(span=10, adjust=False).mean()
    new_cols['Close_ema_50'] = df['Close'].ewm(span=50, adjust=False).mean()

    # VWAP lags
    new_cols['VWAP_lag1'] = df['VWAP'].shift(1)
    new_cols['VWAP_lag5'] = df['VWAP'].shift(5)

    # Candle features
    new_cols['Upper_Shadow'] = df['High'] - np.maximum(df['Close'], df['Open'])
    new_cols['Lower_Shadow'] = np.minimum(df['Close'], df['Open']) - df['Low']
    denom = (df['Close'] - df['Open']).replace(0, np.nan)
    new_cols['hlco_ratio'] = (df['High'] - df['Low']) / denom

    df_new = pd.concat([df, pd.DataFrame(new_cols, index=df.index)], axis=1)
    df_new = df_new.replace([np.inf, -np.inf], 0).fillna(0)

    del new_cols, log_ret, safe_close, denom
    gc.collect()
    return df_new


def add_cross_asset_corr_features(df, corr_assets):
    """
    df: DataFrame with MultiIndex (timestamp, Asset_ID) or with Asset_ID column.
    corr_assets: list of asset ids that form the correlated group.
    Returns df with two new columns:
      - corr_group_log_ret : mean log-return of the corr_assets at the same timestamp
      - corr_log_ret_diff  : Close_log_ret - corr_group_log_ret
    """
    df = df.copy()

    if 'Close_log_ret' not in df.columns:
        safe_close = df['Close'].replace(0, np.nan)
        df['Close_log_ret'] = np.log(safe_close / safe_close.shift(1))
        df['Close_log_ret'] = df['Close_log_ret'].replace([np.inf, -np.inf], np.nan).fillna(0)
        del safe_close

    if 'Asset_ID' in df.columns:
        asset_vals = df['Asset_ID']
    else:
        try:
            asset_vals = df.index.get_level_values(1)
        except Exception:
            raise ValueError("DataFrame must have 'Asset_ID' column or be indexed by (timestamp, Asset_ID) MultiIndex.")

    # boolean mask for correlated group (aligned with df)
    mask = np.isin(asset_vals, corr_assets)
    # subset of rows that belong to corr group
    df_corr = df[mask].copy()

    if df_corr.shape[0] == 0:
        df['corr_group_log_ret'] = 0.0
        df['corr_log_ret_diff']  = df['Close_log_ret'] - df['corr_group_log_ret']
        df = df.fillna(0)
        return df

    if isinstance(df_corr.index, pd.MultiIndex):
        ts_corr_index = df_corr.index.get_level_values(0)
    else:
        ts_corr_index = df_corr.index

    group_ret = df_corr.groupby(ts_corr_index)['Close_log_ret'].mean()
  
    if isinstance(df.index, pd.MultiIndex):
        ts_all = df.index.get_level_values(0)
    else:
        ts_all = df.index

    corr_group_values = group_ret.reindex(ts_all).values

    df['corr_group_log_ret'] = pd.Series(corr_group_values, index=df.index).fillna(0)
    df['corr_log_ret_diff']  = df['Close_log_ret'] - df['corr_group_log_ret']

    del df_corr, group_ret, ts_corr_index, ts_all, mask, asset_vals, corr_group_values
    gc.collect()

    return df

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [None]:
assets = train_log_rob.index.get_level_values(1).unique()
parts = []
for asset in assets:
    df_asset = train_log_rob.xs(asset, level=1).copy()  
    df_asset['Asset_ID'] = asset                         
    df_feat = add_features_per_asset(df_asset)
    parts.append(df_feat)
    del df_asset, df_feat
    gc.collect()
train_features = pd.concat(parts)
del parts; gc.collect()
train_features = add_cross_asset_corr_features(train_features, CORR_GROUP)

In [20]:
parts_ = []

for asset in assets:
    df_asset = test_log_rob[test_log_rob['Asset_ID'] == asset].copy()                        
    df_feat = add_features_per_asset(df_asset)
    parts_.append(df_feat)
    del df_asset, df_feat
    gc.collect()
test_features = pd.concat(parts_)
del parts_; gc.collect()
test_features = add_cross_asset_corr_features(test_features, CORR_GROUP)


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [None]:
'''
The warning is most likely due to the fact that at the beginning there are no previous values 
(lags) and NaN are returned, but this is not a problem for the model.
'''

# **5. Model development**

In [21]:
x_cols = [c for c in train_features.columns if c not in ['Asset_ID', y_col]]

In [22]:
params = {
    'objective': 'regression',
    'metric': 'mse',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'max_depth': -1,
    'min_data_in_leaf': 50, ##
    'learning_rate': 0.01,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'bagging_freq': 1,
    'lambda_l2': 10, ## 
    'verbosity': -1,
    'random_state': 42
}

all_y_true = []
all_y_pred = []
all_weights = []

for asset in tqdm(assets, desc="Assets"):
    df_train_asset = train_features[train_features['Asset_ID'] == asset].copy()
    df_test_asset  = test_features[test_features['Asset_ID'] == asset].copy()
    
    X_train_full = df_train_asset[x_cols].fillna(0)
    y_train_full = df_train_asset[y_col].fillna(0).values.ravel()
    X_test_asset = df_test_asset[x_cols].fillna(0)
    y_test_asset = df_test_asset[y_col].fillna(0).values.ravel()
    
    val_size = 100
    X_tr, X_val = X_train_full.iloc[:-val_size], X_train_full.iloc[-val_size:]
    y_tr, y_val = y_train_full[:-val_size], y_train_full[-val_size:]
    
    train_set = lgb.Dataset(X_tr, label=y_tr)
    val_set = lgb.Dataset(X_val, label=y_val, reference=train_set)
    
    model = lgb.train(
        params,
        train_set,
        num_boost_round=2000,
        valid_sets=[val_set],
        callbacks=[lgb.early_stopping(stopping_rounds=100)]
    )
    

    y_pred_val = model.predict(X_val)
    mse_val = mean_squared_error(y_val, y_pred_val)
    try:
        corr_val, _ = pearsonr(y_val, y_pred_val)
    except:
        corr_val = np.nan
    print(f"Asset {asset} | VAL MSE={mse_val:.6f} | Pearson={corr_val if not np.isnan(corr_val) else 'nan'} | best_iter={model.best_iteration}")
    
    final_model = lgb.train(params, lgb.Dataset(X_train_full, y_train_full), num_boost_round=model.best_iteration)
    y_pred_test = final_model.predict(X_test_asset)
    mse_test = mean_squared_error(y_test_asset, y_pred_test)
    try:
        corr_test, _ = pearsonr(y_test_asset, y_pred_test)
    except:
        corr_test = np.nan
    
    print(f"Asset {asset} | TEST MSE={mse_test:.6f} | Pearson={corr_test if not np.isnan(corr_test) else 'nan'}")
    
    w = asset_weights[asset]
    all_y_true.append(y_test_asset)
    all_y_pred.append(y_pred_test)
    all_weights.append(np.full_like(y_test_asset, w, dtype=float))
    
    #gain_importance = final_model.feature_importance(importance_type='gain')
    #print(f"Asset {asset} | Feature Importance (gain): {dict(zip(x_cols, gain_importance))}")
    #explainer = shap.TreeExplainer(final_model)
    #shap_values = explainer.shap_values(X_test_asset)
    #shap.summary_plot(shap_values, X_test_asset, show=False)

y_true_all = np.concatenate(all_y_true)
y_pred_all = np.concatenate(all_y_pred)
w_all      = np.concatenate(all_weights)

weighted_corr = weighted_pearsonr(y_true_all, y_pred_all, w_all)
print(f"Aggregated Weighted Pearson: {weighted_corr:.6f}")


Assets:   0%|          | 0/14 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[514]	valid_0's l2: 0.00334971
Asset 0 | VAL MSE=0.003350 | Pearson=0.9988068665825638 | best_iter=514


Assets:   7%|▋         | 1/14 [01:48<23:31, 108.55s/it]

Asset 0 | TEST MSE=0.000005 | Pearson=0.7921015730634521
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[637]	valid_0's l2: 0.00173723
Asset 1 | VAL MSE=0.001737 | Pearson=0.9993570361260662 | best_iter=637


Assets:  14%|█▍        | 2/14 [04:09<25:32, 127.73s/it]

Asset 1 | TEST MSE=0.000168 | Pearson=0.1881802748239657
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[962]	valid_0's l2: 1.80397e-05
Asset 2 | VAL MSE=0.000018 | Pearson=0.9999204906743178 | best_iter=962


Assets:  21%|██▏       | 3/14 [06:49<26:05, 142.29s/it]

Asset 2 | TEST MSE=0.000005 | Pearson=0.7755032185541944
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1997]	valid_0's l2: 2.9077e-05
Asset 3 | VAL MSE=0.000029 | Pearson=0.9999640716604302 | best_iter=1997


Assets:  29%|██▊       | 4/14 [11:20<32:13, 193.34s/it]

Asset 3 | TEST MSE=0.000031 | Pearson=0.4921404988296343
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1953]	valid_0's l2: 6.43263e-06
Asset 4 | VAL MSE=0.000006 | Pearson=0.9999578550023354 | best_iter=1953


Assets:  36%|███▌      | 5/14 [14:19<28:13, 188.16s/it]

Asset 4 | TEST MSE=0.000119 | Pearson=0.3969668370749251
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1993]	valid_0's l2: 1.80123e-05
Asset 5 | VAL MSE=0.000018 | Pearson=0.9999286419866127 | best_iter=1993


Assets:  43%|████▎     | 6/14 [19:21<30:14, 226.83s/it]

Asset 5 | TEST MSE=0.000021 | Pearson=0.6786962811144006
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[879]	valid_0's l2: 0.000137714
Asset 6 | VAL MSE=0.000138 | Pearson=0.9998999100945309 | best_iter=879


Assets:  50%|█████     | 7/14 [22:15<24:25, 209.37s/it]

Asset 6 | TEST MSE=0.000019 | Pearson=0.44451967944315074
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1564]	valid_0's l2: 1.32227e-05
Asset 7 | VAL MSE=0.000013 | Pearson=0.9999424270742608 | best_iter=1564


Assets:  57%|█████▋    | 8/14 [26:29<22:21, 223.61s/it]

Asset 7 | TEST MSE=0.000004 | Pearson=0.8394570764216289
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1266]	valid_0's l2: 0.00071742
Asset 8 | VAL MSE=0.000717 | Pearson=0.9997721694209172 | best_iter=1266


Assets:  64%|██████▍   | 9/14 [29:40<17:47, 213.58s/it]

Asset 8 | TEST MSE=0.000130 | Pearson=0.7552705727747192
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1405]	valid_0's l2: 6.16034e-06
Asset 9 | VAL MSE=0.000006 | Pearson=0.999973438149446 | best_iter=1405


Assets:  71%|███████▏  | 10/14 [33:28<14:31, 217.84s/it]

Asset 9 | TEST MSE=0.000115 | Pearson=0.25507734415091576
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[771]	valid_0's l2: 0.472
Asset 10 | VAL MSE=0.472000 | Pearson=0.9731250682323639 | best_iter=771


Assets:  79%|███████▊  | 11/14 [35:25<09:20, 186.97s/it]

Asset 10 | TEST MSE=0.000304 | Pearson=0.3216529721879139
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[744]	valid_0's l2: 0.000864333
Asset 11 | VAL MSE=0.000864 | Pearson=0.9996995179697388 | best_iter=744


Assets:  86%|████████▌ | 12/14 [37:53<05:50, 175.13s/it]

Asset 11 | TEST MSE=0.000020 | Pearson=0.7301661337631803
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[2000]	valid_0's l2: 3.05031e-05
Asset 12 | VAL MSE=0.000031 | Pearson=0.9999510151499497 | best_iter=2000


Assets:  93%|█████████▎| 13/14 [42:24<03:24, 204.34s/it]

Asset 12 | TEST MSE=0.000109 | Pearson=0.3525124871919485
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1817]	valid_0's l2: 0.00495233
Asset 13 | VAL MSE=0.004952 | Pearson=0.9983820908090268 | best_iter=1817


Assets: 100%|██████████| 14/14 [46:53<00:00, 201.00s/it]

Asset 13 | TEST MSE=0.000028 | Pearson=0.47632604335085366
Aggregated Weighted Pearson: 0.410940



