In [13]:
import pandas as pd
import numpy as np
from sklearn.experimental import enable_iterative_imputer  # noqa
from sklearn.impute import IterativeImputer
from sklearn.ensemble import RandomForestRegressor

In [14]:
TEST_DATA_PATH = '/kaggle/input/dataset/test_data.csv'
SAMPLE_SUB_PATH = '/kaggle/input/dataset/sample_submission.csv'
FINAL_OUTPUT_PATH = '/kaggle/working/final_submission_rf.csv'

In [15]:
print("Loading test and sample submission data...")
test_data = pd.read_csv(TEST_DATA_PATH)
submission_template = pd.read_csv(SAMPLE_SUB_PATH)

Loading test and sample submission data...


In [16]:
print("Preparing features and timestamp for imputation...")
features_for_impute = test_data.drop(columns=['timestamp'])
timestamps = test_data['timestamp']

Preparing features and timestamp for imputation...


In [17]:
print("Configuring RandomForestRegressor and IterativeImputer...")
rf_model = RandomForestRegressor(
    n_estimators=100,
    max_features=9,
    bootstrap=True,
    criterion='squared_error',
    n_jobs=-1,
    random_state=0
)

rf_imputer = IterativeImputer(
    estimator=rf_model,
    max_iter=18,
    tol=0,
    verbose=2,
    random_state=0,
    skip_complete=True
)
print("Running imputation with RandomForestRegressor...")
completed_features = rf_imputer.fit_transform(features_for_impute)
print("Imputation finished.")

Configuring RandomForestRegressor and IterativeImputer...
Running imputation with RandomForestRegressor...
[IterativeImputer] Completing matrix with shape (12065, 95)
[IterativeImputer] Ending imputation round 1/18, elapsed time 20.30
[IterativeImputer] Change: 2.116454010705495, scaled tolerance: 0.0 
[IterativeImputer] Ending imputation round 2/18, elapsed time 40.44
[IterativeImputer] Change: 0.23699060000000038, scaled tolerance: 0.0 
[IterativeImputer] Ending imputation round 3/18, elapsed time 60.20
[IterativeImputer] Change: 0.09933824000000008, scaled tolerance: 0.0 
[IterativeImputer] Ending imputation round 4/18, elapsed time 80.30
[IterativeImputer] Change: 0.048909630000000676, scaled tolerance: 0.0 
[IterativeImputer] Ending imputation round 5/18, elapsed time 100.58
[IterativeImputer] Change: 0.04128318999999886, scaled tolerance: 0.0 
[IterativeImputer] Ending imputation round 6/18, elapsed time 120.80
[IterativeImputer] Change: 0.031787519999999875, scaled tolerance: 0.



Imputation finished.


In [18]:
imputed_data = pd.DataFrame(completed_features, columns=features_for_impute.columns)
imputed_data['timestamp'] = timestamps.values

submission_columns = [col for col in submission_template.columns if col in imputed_data.columns]
final_submission = imputed_data[submission_columns]

In [19]:
final_submission.to_csv(FINAL_OUTPUT_PATH, index=False)
print(f"Submission file saved to '{FINAL_OUTPUT_PATH}'.")

Submission file saved to '/kaggle/working/final_submission_rf.csv'.
