In [None]:
import os
import joblib
import pandas as pd
import numpy as np

model_files = [
    "random_forest_final.joblib",
    "decision_tree_final.joblib",
    "bagging_method.joblib",
    "decision_tree_final_tuned.joblib",
    "bagging_method_tuned.joblib",
    "random_forest_tuned.joblib"


]

# load test and training header to get feature order
test_df = pd.read_csv("processed_test.csv")
train_header = pd.read_csv("train_preprocessed.csv", nrows=1)
feature_cols = [c for c in train_header.columns if c not in ("Id", "Recovery Index")]

missing = [c for c in feature_cols if c not in test_df.columns]
if missing:
    raise ValueError(f"processed_test.csv is missing required features: {missing}")

X_test_base = test_df[feature_cols].values

saved_any = False
for mf in model_files:
    if not os.path.exists(mf):
        print(f"Model file not found, skipping: {mf}")
        continue

    saved_any = True
    obj = joblib.load(mf)

    # extract model and optional scaler from common save patterns
    if isinstance(obj, dict):
        model = obj.get("model") or obj.get("estimator") or obj.get("model")
        scaler = obj.get("scaler", None)
    else:
        model = obj
        scaler = None

    if model is None:
        print(f"No model found inside {mf}; skipping.")
        continue

    X_test = X_test_base.copy()
    if scaler is not None:
        X_test = scaler.transform(X_test)

    preds = model.predict(X_test)
    preds_rounded = np.rint(preds).astype(int) 

    out_base = os.path.splitext(mf)[0]
    out_name = f"submission_{out_base}.csv"
    submission = pd.DataFrame({
        "Id": test_df["Id"],
        "Recovery Index": preds_rounded
    })
    submission.to_csv(out_name, index=False)
    print(f"Saved {out_name} (model: {mf}) — first rows:")
    print(submission.head(5))

if not saved_any:
    raise FileNotFoundError("No model files found. Place one of the expected joblib files in the notebook directory.")


Saved submission_random_forest_final.csv (model: random_forest_final.joblib) — first rows:
     Id  Recovery Index
0  6253              56
1  4685              22
2  1732              48
3  4743              29
4  4522              43
Saved submission_decision_tree_final.csv (model: decision_tree_final.joblib) — first rows:
     Id  Recovery Index
0  6253              56
1  4685              22
2  1732              49
3  4743              28
4  4522              43
Saved submission_bagging_method.csv (model: bagging_method.joblib) — first rows:
     Id  Recovery Index
0  6253              56
1  4685              23
2  1732              48
3  4743              29
4  4522              43
Saved submission_decision_tree_final_tuned.csv (model: decision_tree_final_tuned.joblib) — first rows:
     Id  Recovery Index
0  6253              56
1  4685              25
2  1732              48
3  4743              28
4  4522              43
Saved submission_bagging_method_tuned.csv (model: bagging_