In [None]:
import pandas as pd
import numpy as np
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from catboost import CatBoostRegressor
import warnings
warnings.filterwarnings('ignore')

# Load your data
test = pd.read_parquet('test_data.parquet')
sample_submission = pd.read_csv('sample_submission.csv')

target_columns = [col for col in sample_submission.columns if col != 'timestamp']
submission = sample_submission.copy()
for col in target_columns:
    submission[col] = test[col] if col in test.columns else np.nan
if 'underlying' in test.columns:
    submission['underlying'] = test['underlying']
    all_imputation_cols = target_columns + ['underlying']
else:
    all_imputation_cols = target_columns

imputer_catboost = IterativeImputer(
    estimator=CatBoostRegressor(
        iterations=500,
        depth=6,
        learning_rate=0.1,
        loss_function='RMSE',
        random_seed=42,
        verbose=0
    ),
    max_iter=50,
    tol=1e-6,
    n_nearest_features=20,
    initial_strategy='median',
    imputation_order='ascending',
    random_state=42,
    verbose=2
)
imputer_catboost.fit(submission[all_imputation_cols])
imputed_catboost = imputer_catboost.transform(submission[all_imputation_cols])
catboost_pred = submission.copy()
catboost_pred[target_columns] = imputed_catboost[:, :len(target_columns)]
catboost_pred.to_csv('catboost_imputation.csv', index=False)
print("CatBoost imputation predictions saved to 'catboost_predictions.csv'.")


[IterativeImputer] Completing matrix with shape (12065, 53)
[IterativeImputer] Ending imputation round 1/50, elapsed time 83.30
[IterativeImputer] Change: 2.6146263128476126, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 2/50, elapsed time 181.58
[IterativeImputer] Change: 0.2340327676329423, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 3/50, elapsed time 295.93
[IterativeImputer] Change: 0.13321532238969774, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 4/50, elapsed time 406.23
[IterativeImputer] Change: 0.09352316177350267, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 5/50, elapsed time 506.10
[IterativeImputer] Change: 0.08337679066600082, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 6/50, elapsed time 604.74
[IterativeImputer] Change: 0.08780202615536484, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 7/50, elapsed time 681.3