In [1]:
import pandas as pd
import numpy as np
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import ExtraTreesRegressor

def setup_imputer(n_estimators=5, max_depth=10, max_iter=50, tolerance=1e-6, random_state=42):
    """Configure the iterative imputer with ensemble estimator."""
    base_model = ExtraTreesRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        random_state=random_state,
        n_jobs=-1
    )
    
    return IterativeImputer(
        estimator=base_model,
        max_iter=max_iter,
        tol=tolerance,
        initial_strategy="median",
        sample_posterior=False,
        random_state=random_state,
        verbose=2
    )

def load_and_prepare_data(file_path):
    """Load data and extract relevant features."""
    data = pd.read_parquet(file_path)
    X = data.iloc[:, :54].copy()
    timestamps = X.pop('timestamp')
    return X, timestamps

def perform_imputation(data, imputer):
    """Apply iterative imputation to the dataset."""
    imputed_array = imputer.fit_transform(data.values)
    return pd.DataFrame(imputed_array, columns=data.columns)

def save_results(imputed_df, timestamps, output_path="imputed_data.csv"):
    """Combine results with timestamps and save to file."""
    final_data = imputed_df.copy()
    final_data.insert(0, "timestamp", timestamps)
    
    
    final_data.drop('underlying', axis=1, inplace=True)
    
    final_data.to_csv(output_path, index=False)
    print(f"Imputation completed → '{output_path}'")
    return final_data


# Configuration
INPUT_FILE = 'test_data.parquet'
OUTPUT_FILE = "submission.csv"

# Load and prepare data
X, timestamps = load_and_prepare_data(INPUT_FILE)

# Setup imputer's
imputerA = setup_imputer(
    n_estimators=7,
    max_depth=10,
    max_iter=10,
    tolerance=1e-6,
    random_state=42
)

imputerB = setup_imputer(
    n_estimators=5,
    max_depth=10,
    max_iter=20,
    tolerance=1e-6,
    random_state=42
)

# Perform imputation
imputed_X_A = perform_imputation(X, imputerA)
imputed_X_B = perform_imputation(X, imputerB)

# Combine results from both imputers
imputed_X = 0.6*imputed_X_A + 0.4*imputed_X_B

# Save final results
result = save_results(imputed_X, timestamps, OUTPUT_FILE)

[IterativeImputer] Completing matrix with shape (12065, 53)
[IterativeImputer] Ending imputation round 1/10, elapsed time 3.20
[IterativeImputer] Change: 2.6241525792450386, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 2/10, elapsed time 6.35
[IterativeImputer] Change: 0.2773838927186836, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 3/10, elapsed time 9.65
[IterativeImputer] Change: 0.07127699148189859, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 4/10, elapsed time 13.28
[IterativeImputer] Change: 0.050546642222487115, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 5/10, elapsed time 16.85
[IterativeImputer] Change: 0.03930517906744288, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 6/10, elapsed time 20.35
[IterativeImputer] Change: 0.04210473250292551, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 7/10, elapsed time 23.85
[Itera



[IterativeImputer] Ending imputation round 1/20, elapsed time 3.57
[IterativeImputer] Change: 2.608114276544552, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 2/20, elapsed time 7.08
[IterativeImputer] Change: 0.21279346667249807, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 3/20, elapsed time 10.72
[IterativeImputer] Change: 0.11763606313884126, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 4/20, elapsed time 16.58
[IterativeImputer] Change: 0.06661551083231876, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 5/20, elapsed time 22.30
[IterativeImputer] Change: 0.04717057913317119, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 6/20, elapsed time 27.85
[IterativeImputer] Change: 0.0492572886201843, scaled tolerance: 0.0250595 
[IterativeImputer] Ending imputation round 7/20, elapsed time 33.71
[IterativeImputer] Change: 0.044442522777590754, scaled tolerance: 



Imputation completed → 'submission.csv'
