In [1]:
import pandas as pd
import numpy as np
from sklearn.experimental import enable_iterative_imputer  # noqa: F401
from sklearn.impute import IterativeImputer
from sklearn.ensemble import HistGradientBoostingRegressor
import warnings
warnings.filterwarnings('ignore')

# Load your data
test = pd.read_parquet('test_data.parquet')
sample_submission = pd.read_csv('sample_submission.csv')

# Identify target columns (excluding 'timestamp')
target_columns = [col for col in sample_submission.columns if col != 'timestamp']
submission = sample_submission.copy()
for col in target_columns:
    submission[col] = test[col] if col in test.columns else np.nan

# Optionally include 'underlying' if present
if 'underlying' in test.columns:
    submission['underlying'] = test['underlying']
    all_imputation_cols = target_columns + ['underlying']
else:
    all_imputation_cols = target_columns

# Setup HistGradientBoostingRegressor imputer
imputer_hist = IterativeImputer(
    estimator=HistGradientBoostingRegressor(
        max_iter=400,
        max_depth=15,
        learning_rate=0.1,
        early_stopping=True,
        random_state=42
    ),
    max_iter=60,
    tol=1e-6,
    n_nearest_features=20,
    initial_strategy='median',
    imputation_order='ascending',
    random_state=42,
    verbose=2
)

# Fit imputer on the submission data
imputer_hist.fit(submission[all_imputation_cols])

# Transform to get imputed values
imputed_hist = imputer_hist.transform(submission[all_imputation_cols])

# Prepare output dataframe
hist_pred = submission.copy()
hist_pred[target_columns] = imputed_hist[:, :len(target_columns)]

# Save to CSV for blending or submission
hist_pred.to_csv('hist_imputation.csv', index=False)
print("HistGradientBoosting imputation predictions saved to 'hist_predictions.csv'.")


[IterativeImputer] Completing matrix with shape (12065, 53)
[IterativeImputer] Ending imputation round 1/60, elapsed time 17.98
[IterativeImputer] Change: 2.566810101181985, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 2/60, elapsed time 65.06
[IterativeImputer] Change: 0.40203123905179317, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 3/60, elapsed time 80.56
[IterativeImputer] Change: 0.269999974059586, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 4/60, elapsed time 94.28
[IterativeImputer] Change: 0.18683017737714053, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 5/60, elapsed time 107.68
[IterativeImputer] Change: 0.19485893017038405, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 6/60, elapsed time 121.15
[IterativeImputer] Change: 0.15579405499343404, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 7/60, elapsed time 135.75
[It