In [None]:
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf
import lightgbm as lgb
import xgboost as xgb
import catboost as cb
from sklearn.impute import KNNImputer, SimpleImputer

# Load Preprocessors
num_imputer = joblib.load("/kaggle/input/cbimtr-competition/num_imputer.pkl")
cat_imputer = joblib.load("/kaggle/input/cbimtr-competition/cat_imputer.pkl")

# Load Models
xgb_models = joblib.load("/kaggle/input/cbimtr-competition/xgboost_models.pkl")
lgb_models = joblib.load("/kaggle/input/cbimtr-competition/lightgbm_models.pkl")
cat_models = joblib.load("/kaggle/input/cbimtr-competition/catboost_models.pkl")
meta_model = tf.keras.models.load_model("/kaggle/input/cbimtr-competition/meta_model.h5", compile=False)
meta_model.compile(optimizer="adam", loss=tf.keras.losses.MeanSquaredError())


# Load Test Data
test = pd.read_csv("/kaggle/input/equity-post-HCT-survival-predictions/test.csv")


# Identify numerical and categorical columns
num_cols = test.select_dtypes(include=['int64', 'float64']).columns.tolist()
cat_cols = test.select_dtypes(include=['object']).columns.tolist()

# Handle missing values using saved imputers
test[num_cols] = num_imputer.transform(test[num_cols])
test[cat_cols] = cat_imputer.transform(test[cat_cols])

# Function to detect and cap outliers automatically using IQR
def cap_outliers_auto(df, columns):
    for col in columns:
        Q1 = df[col].quantile(0.25)  # First quartile (25th percentile)
        Q3 = df[col].quantile(0.75)  # Third quartile (75th percentile)
        IQR = Q3 - Q1  # Interquartile range
        lower_bound = Q1 - 1.5 * IQR  # Lower bound
        upper_bound = Q3 + 1.5 * IQR  # Upper bound
        
        # Cap outliers
        df[col] = np.where(df[col] < lower_bound, lower_bound, df[col])
        df[col] = np.where(df[col] > upper_bound, upper_bound, df[col])
    
    return df

# Apply outlier handling
test = cap_outliers_auto(test, num_cols)

# Feature Selection
RMV = ["ID"]
FEATURES = [c for c in test.columns if c not in RMV]

# Encode Categorical Features
for col in test.select_dtypes(include=['object', 'category']).columns:
    test[col] = test[col].astype('category').cat.codes

# Make Predictions with Base Models
xgb_preds, lgb_preds, cat_preds = np.zeros(len(test)), np.zeros(len(test)), np.zeros(len(test))

for model in xgb_models:
    xgb_preds += model.predict(test[FEATURES]) / len(xgb_models)

for model in lgb_models:
    lgb_preds += model.predict(test[FEATURES]) / len(lgb_models)

for model in cat_models:
    cat_preds += model.predict(test[FEATURES]) / len(cat_models)

# Prepare Data for Meta-Model
stacked_test = np.vstack((xgb_preds, lgb_preds, cat_preds)).T

# Make Final Prediction with Meta-Model
final_preds = meta_model.predict(stacked_test).flatten()

test["ID"] = test["ID"].astype(int)

# Save Predictions
submission = pd.DataFrame({"ID": test["ID"], "prediction": final_preds})
submission.to_csv("submission.csv", index=False)

print("Inference completed. Predictions saved to submission.csv.")