In [1]:
import pandas as pd
import numpy as np
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.linear_model import Lasso
import warnings
warnings.filterwarnings('ignore')

# Load your actual data
test = pd.read_parquet('test_data.parquet')
sample_submission = pd.read_csv('sample_submission.csv')

# Identify target columns (excluding 'timestamp')
target_columns = [col for col in sample_submission.columns if col != 'timestamp']
submission = sample_submission.copy()
for col in target_columns:
    submission[col] = test[col] if col in test.columns else np.nan

# Optionally include 'underlying' if present
if 'underlying' in test.columns:
    submission['underlying'] = test['underlying']
    all_imputation_cols = target_columns + ['underlying']
else:
    all_imputation_cols = target_columns

# Setup Lasso imputer
imputer_lasso = IterativeImputer(
    estimator=Lasso(alpha=0.01, max_iter=2000, random_state=46),
    max_iter=50,
    tol=1e-6,
    n_nearest_features=20,
    initial_strategy='median',
    imputation_order='ascending',
    random_state=46,
    verbose=2
)

# Fit imputer on the submission data
imputer_lasso.fit(submission[all_imputation_cols])

# Transform to get imputed values
imputed_lasso = imputer_lasso.transform(submission[all_imputation_cols])

# Prepare output dataframe
lasso_pred = submission.copy()
lasso_pred[target_columns] = imputed_lasso[:, :len(target_columns)]

# Save to CSV for blending
lasso_pred.to_csv('lasso_imputation.csv', index=False)
print("Lasso imputation predictions saved to 'lasso_predictions.csv'.")


[IterativeImputer] Completing matrix with shape (12065, 53)
[IterativeImputer] Ending imputation round 1/50, elapsed time 0.36
[IterativeImputer] Change: 1.3812496172619133, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 2/50, elapsed time 1.75
[IterativeImputer] Change: 0.5568173098936476, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 3/50, elapsed time 2.39
[IterativeImputer] Change: 0.835723084401726, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 4/50, elapsed time 3.95
[IterativeImputer] Change: 1.0316502999353279, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 5/50, elapsed time 4.86
[IterativeImputer] Change: 1.073156618397394, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 6/50, elapsed time 5.26
[IterativeImputer] Change: 0.749597413939392, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 7/50, elapsed time 5.80
[IterativeImputer]