In [None]:
import pandas as pd
import mlflow.catboost
import mlflow
mlflow.set_tracking_uri("file:../mlruns")

In [None]:

# Replace with your actual Run ID from the MLflow UI
# You can find this in the experiment run page (e.g., "82346612a...")
RUN_ID = "d8a11ca1b6c840ec8f33678b86cbdb24"
MODEL_URI = f"runs:/{RUN_ID}/model"

# File paths
TEST_DATA_PATH = "../data/raw/test.csv"  # Path to your test dataset
OUTPUT_FILE = "submission.csv"

# 1. Load the Test Data
print(f"Loading test data from {TEST_DATA_PATH}...")
try:
    test_df = pd.read_csv(TEST_DATA_PATH)
except FileNotFoundError:
    print("Error: test.csv not found. Please ensure the file path is correct.")

# 2. Prepare Features (Remove 'id')
if 'id' in test_df.columns:
    ids = test_df['id']
    X_test = test_df.drop('id', axis=1)
else:
    # If no ID column in test.csv, use the index or load from sample_submission
    print("Warning: 'id' column not found in test data. Using index.")
    ids = test_df.index
    X_test = test_df.copy()

# 3. Load the Model from MLflow
print(f"Loading model from {MODEL_URI}...")
try:
    loaded_model = mlflow.catboost.load_model(MODEL_URI)
except Exception as e:
    print(f"Error loading model: {e}")
    print("Tip: Make sure the Run ID is correct and MLflow is running.")

# 4. Make Predictions
print("Generating predictions...")

# Option A: Probabilities
preds = loaded_model.predict_proba(X_test)[:, 1]

# Option B: Class Labels (0 or 1) - Uncomment if needed
# preds = loaded_model.predict(X_test)

# 5. Create Submission File
submission = pd.DataFrame({
    'id': ids,
    'Heart Disease': preds
})

# 6. Save to CSV
submission.to_csv(OUTPUT_FILE, index=False)
print(f"âœ… Submission saved to {OUTPUT_FILE}")
print(submission.head())
