In [1]:
import pandas as pd
import numpy as np
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from lightgbm import LGBMRegressor
import warnings
warnings.filterwarnings('ignore')

# Load your data
test = pd.read_parquet('test_data.parquet')
sample_submission = pd.read_csv('sample_submission.csv')

target_columns = [col for col in sample_submission.columns if col != 'timestamp']
submission = sample_submission.copy()
for col in target_columns:
    submission[col] = test[col] if col in test.columns else np.nan
if 'underlying' in test.columns:
    submission['underlying'] = test['underlying']
    all_imputation_cols = target_columns + ['underlying']
else:
    all_imputation_cols = target_columns

imputer_lgbm = IterativeImputer(
    estimator=LGBMRegressor(
        n_estimators=500,
        max_depth=6,
        learning_rate=0.1,
        random_state=42,
        verbose=-1
    ),
    max_iter=50,
    tol=1e-6,
    n_nearest_features=20,
    initial_strategy='median',
    imputation_order='ascending',
    random_state=42,
    verbose=2
)
imputer_lgbm.fit(submission[all_imputation_cols])
imputed_lgbm = imputer_lgbm.transform(submission[all_imputation_cols])
lgbm_pred = submission.copy()
lgbm_pred[target_columns] = imputed_lgbm[:, :len(target_columns)]
lgbm_pred.to_csv('lightgbm_imputation.csv', index=False)
print("LightGBM imputation predictions saved to 'lightgbm_predictions.csv'.")


[IterativeImputer] Completing matrix with shape (12065, 53)
[IterativeImputer] Ending imputation round 1/50, elapsed time 48.00
[IterativeImputer] Change: 2.6391588201183693, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 2/50, elapsed time 97.30
[IterativeImputer] Change: 0.27911239878329913, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 3/50, elapsed time 127.92
[IterativeImputer] Change: 0.13564571442387852, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 4/50, elapsed time 170.93
[IterativeImputer] Change: 0.10031636774516003, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 5/50, elapsed time 202.32
[IterativeImputer] Change: 0.08337576548446252, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 6/50, elapsed time 232.97
[IterativeImputer] Change: 0.1952745916129942, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 7/50, elapsed time 263.50