In [1]:
import pandas as pd
import numpy as np
from sklearn.experimental import enable_iterative_imputer  # noqa: F401
from sklearn.impute import IterativeImputer
from sklearn.linear_model import BayesianRidge
import warnings
warnings.filterwarnings('ignore')

# Load your data
test = pd.read_parquet('test_data.parquet')
sample_submission = pd.read_csv('sample_submission.csv')

# Identify target columns (excluding 'timestamp')
target_columns = [col for col in sample_submission.columns if col != 'timestamp']
submission = sample_submission.copy()
for col in target_columns:
    submission[col] = test[col] if col in test.columns else np.nan

# Optionally include 'underlying' if present
if 'underlying' in test.columns:
    submission['underlying'] = test['underlying']
    all_imputation_cols = target_columns + ['underlying']
else:
    all_imputation_cols = target_columns

# Setup Bayesian Ridge imputer
imputer_bayes = IterativeImputer(
    estimator=BayesianRidge(
        alpha_1=1e-6, alpha_2=1e-6,
        lambda_1=1e-6, lambda_2=1e-6,
        compute_score=True
    ),
    max_iter=60,
    tol=1e-6,
    n_nearest_features=20,
    initial_strategy='median',
    imputation_order='ascending',
    random_state=43,
    verbose=2
)

# Fit imputer on the submission data
imputer_bayes.fit(submission[all_imputation_cols])

# Transform to get imputed values
imputed_bayes = imputer_bayes.transform(submission[all_imputation_cols])

# Prepare output dataframe
bayes_pred = submission.copy()
bayes_pred[target_columns] = imputed_bayes[:, :len(target_columns)]

# Save to CSV for blending or submission
bayes_pred.to_csv('bayesianridge_imputation.csv', index=False)
print("Bayesian Ridge imputation predictions saved to 'bayesianridge_predictions.csv'.")


[IterativeImputer] Completing matrix with shape (12065, 53)
[IterativeImputer] Ending imputation round 1/60, elapsed time 0.57
[IterativeImputer] Change: 2.517904210902664, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 2/60, elapsed time 1.24
[IterativeImputer] Change: 0.564428231658536, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 3/60, elapsed time 1.83
[IterativeImputer] Change: 0.23812610508760465, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 4/60, elapsed time 2.59
[IterativeImputer] Change: 0.15845490378371854, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 5/60, elapsed time 3.41
[IterativeImputer] Change: 0.11886522512598635, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 6/60, elapsed time 4.06
[IterativeImputer] Change: 0.10234666287655461, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 7/60, elapsed time 4.86
[IterativeImp