In [None]:
import numpy as np
import pandas as pd
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import ExtraTreesRegressor

# Data loading
test_data = pd.read_parquet('/kaggle/input/nk-iv-prediction/test_data.parquet')
submission_template = pd.read_csv('/kaggle/input/nk-iv-prediction/sample_submission.csv')

# Initialize submission structure
final_submission = submission_template.copy()
prediction_columns = [col for col in submission_template if col != 'timestamp']

# Prepare imputation data
features = test_data.drop(columns='timestamp')

# Configure imputation model
imputation_model = ExtraTreesRegressor(
    n_estimators=300,
    max_features='sqrt',
    criterion='friedman_mse',
    min_samples_split=3,
    min_samples_leaf=2,
    random_state=42,
    n_jobs=-1
)

imputer = IterativeImputer(
    estimator=imputation_model,
    max_iter=10,
    tol=1e-10,
    random_state=42,
    verbose=2
)

# Execute imputation
print("Performing data imputation...")
imputed_features = imputer.fit_transform(features)
imputed_df = pd.DataFrame(imputed_features, columns=features.columns)

# Restructure output for submission
required_columns = ['underlying'] + [f'X{i}' for i in range(0,42)]
filtered_data = imputed_df.drop(columns=required_columns)
final_submission[prediction_columns] = filtered_data

# Preserve timestamp column
final_submission['timestamp'] = test_data['timestamp'].values

# Save results
final_submission.to_csv('final_submission.csv', index=False)
print("Submission file successfully created")