In [2]:
import numpy as np
import pandas as pd
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import RandomForestRegressor

# Load data
print("Loading data...")
train = pd.read_parquet('train_data.parquet')
test = pd.read_parquet('test_data.parquet')
sample_sub = pd.read_csv('sample_submission.csv')

# print
print("Data loaded successfully.")
print(f"Train shape: {train.shape}, Test shape: {test.shape}")


Loading data...
Data loaded successfully.
Train shape: (178340, 97), Test shape: (12065, 96)


In [3]:
# Get all IV columns from TEST data
iv_columns = [col for col in test.columns if col.startswith(('call_iv_', 'put_iv_'))]

# Prepare data for imputation
print("Preparing data for imputation...")
df = test[['timestamp'] + iv_columns].copy()

# print the data head
print("Data head:")
print(df.head())

Preparing data for imputation...
Data head:
   timestamp  call_iv_24000  call_iv_24100  call_iv_24200  call_iv_24300  \
0          0       0.280939            NaN            NaN            NaN   
1          1       0.270276            NaN       0.258893            NaN   
2          2            NaN       0.251731            NaN            NaN   
3          3       0.241888            NaN       0.220505            NaN   
4          4       0.235328            NaN       0.222983       0.214126   

   call_iv_24400  call_iv_24500  call_iv_24600  call_iv_24700  call_iv_24800  \
0       0.242149            NaN       0.232439            NaN       0.222997   
1            NaN            NaN       0.233548            NaN            NaN   
2       0.214869        0.20458       0.194604            NaN            NaN   
3       0.198602        0.18619            NaN            NaN            NaN   
4            NaN            NaN       0.192603            NaN            NaN   

   ...  put_iv_246

In [None]:
# Initialize imputer
imputer = IterativeImputer(
    estimator=RandomForestRegressor(
        n_estimators=350,
        max_depth=27,
        criterion='squared_error',
        max_features='sqrt',
        bootstrap=False,
        min_samples_leaf=2,
        random_state=0,
        n_jobs=-1,    
    ),
    max_iter=40,
    tol=0.000008,
    verbose=1,
    random_state=0,
)

print("\n--- Performing Imputation ---")
imputed_data_array = imputer.fit_transform(df)

--- Performing Imputation ---
[IterativeImputer] Completing matrix with shape (12065, 53)
[IterativeImputer] Change: 2.11730000000000018, scaled tolerance: 0.000008
[IterativeImputer] Change: 1.43937442998838416, scaled tolerance: 0.000008
[IterativeImputer] Change: 0.97850977646265791, scaled tolerance: 0.000008
[IterativeImputer] Change: 0.66520660829074729, scaled tolerance: 0.000008
[IterativeImputer] Change: 0.45221809976526739, scaled tolerance: 0.000008
[IterativeImputer] Change: 0.30742510252683225, scaled tolerance: 0.000008
[IterativeImputer] Change: 0.20899250541429162, scaled tolerance: 0.000008
[IterativeImputer] Change: 0.14207645036250899, scaled tolerance: 0.000008
[IterativeImputer] Change: 0.09658584506461498, scaled tolerance: 0.000008
[IterativeImputer] Change: 0.06566060345006686, scaled tolerance: 0.000008
[IterativeImputer] Change: 0.04463712920400194, scaled tolerance: 0.000008
[IterativeImputer] Change: 0.03034503490498661, scaled tolerance: 0.000008
[Iterative

In [None]:
# Convert back to DataFrame
imputed_df = pd.DataFrame(imputed_data_array, columns=df.columns)

# Prepare submission
print("\nPreparing submission...")
submission = imputed_df.copy()
submission.columns = sample_sub.columns

# Verify no missing values
assert submission.isna().sum().sum() == 0, "Missing values detected"

# Save submission
print("Saving submission...")
submission.to_csv('output_main.csv', index=False)

print("\nFinal Submission Preview:")
print(submission.head())
print(f"\nSubmission shape: {submission.shape}")


Preparing submission...
Saving submission...

Final Submission Preview:
   timestamp  call_iv_24000  call_iv_24100  call_iv_24200  call_iv_24300  \
0          0       0.280939       0.266696       0.257372       0.249795   
1          1       0.270276       0.269030       0.258893       0.250336   
2          2       0.256382       0.251731       0.236886       0.224831   
3          3       0.241888       0.230551       0.220505       0.208738   
4          4       0.235328       0.229970       0.222983       0.214126   

   call_iv_24400  call_iv_24500  call_iv_24600  call_iv_24700  call_iv_24800  \
0       0.242149       0.237983       0.232439       0.225929       0.222997   
1       0.244387       0.239116       0.233548       0.227972       0.225092   
2       0.214869       0.204580       0.194604       0.188290       0.183239   
3       0.198602       0.186190       0.174691       0.166849       0.161831   
4       0.206151       0.199282       0.192603       0.186478       0.