1. Importing required libraries

In [2]:
import numpy as np
import pandas as pd
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import ExtraTreesRegressor

2. Data loading

In [3]:
test_data = pd.read_parquet('/kaggle/input/nk-iv-prediction/test_data.parquet')
submission_template = pd.read_csv('/kaggle/input/nk-iv-prediction/sample_submission.csv')

3. Initializing submission structure

In [4]:
final_submission = submission_template.copy()
prediction_columns = [col for col in submission_template if col != 'timestamp']

4. Preparing imputation data

In [5]:
features = test_data.drop(columns='timestamp')

5. Configuring imputation model

In [7]:
imputer = IterativeImputer(
    estimator=ExtraTreesRegressor(
        n_estimators=1000,
        max_features='sqrt',
        criterion='squared_error',
        min_samples_split=2,
        min_samples_leaf=1,
        random_state=42,
        n_jobs=-1
    ),
    max_iter=10,
    tol=1e-10,
    random_state=42,
    verbose=2
)

6. Executing imputation

In [8]:
print("Performing data imputation...")
imputed_features = imputer.fit_transform(features)
imputed_df = pd.DataFrame(imputed_features, columns=features.columns)

Performing data imputation...
[IterativeImputer] Completing matrix with shape (12065, 95)
[IterativeImputer] Ending imputation round 1/10, elapsed time 154.61
[IterativeImputer] Change: 2.1033600807054995, scaled tolerance: 0.0650259937748261 
[IterativeImputer] Ending imputation round 2/10, elapsed time 311.38
[IterativeImputer] Change: 0.12982440299999753, scaled tolerance: 0.0650259937748261 
[IterativeImputer] Ending imputation round 3/10, elapsed time 471.00
[IterativeImputer] Change: 0.050958493000000854, scaled tolerance: 0.0650259937748261 
[IterativeImputer] Early stopping criterion reached.


7. Restructuring output for submission

In [9]:
required_columns = ['underlying'] + [f'X{i}' for i in range(0,42)]
filtered_data = imputed_df.drop(columns=required_columns)
final_submission[prediction_columns] = filtered_data

8. Preserving timestamp column

In [10]:
final_submission['timestamp'] = test_data['timestamp'].values

9. Saving results

In [11]:
final_submission.to_csv('final_submission.csv', index=False)
print("Submission file successfully created")

Submission file successfully created
