In [2]:
print("Current X columns:", X.columns.tolist())


Current X columns: ['ca_2.0', 'ca_3.0', 'slope_2.0', 'cp_3.0', 'thal_7.0', 'sex_0.0', 'restecg_0.0', 'cp_4.0', 'exang_0.0', 'slope_1.0', 'exang_1.0', 'ca_0.0', 'thal_3.0']


In [4]:
import pandas as pd
import joblib
import os
from sklearn.pipeline import Pipeline

# ===== Load raw clean data =====
df = pd.read_csv("data/heart_disease_clean.csv")

# Convert target to binary
df["target"] = df["target"].apply(lambda x: 1 if x > 0 else 0)

X = df.drop("target", axis=1)
y = df["target"]

# ===== Load preprocessor & best model =====
preprocessor = joblib.load("models/preprocessor.pkl")
best_model = joblib.load("models/RandomForest_best.pkl")  # أو غيره حسب الأفضل

# ===== Build final pipeline =====
final_pipeline = Pipeline([
    ("preprocessing", preprocessor),
    ("classifier", best_model)
])

final_pipeline.fit(X, y)

# ===== Save pipeline =====
os.makedirs("deployment", exist_ok=True)
joblib.dump(final_pipeline, "deployment/heart_disease_pipeline.pkl")

print("✅ Final pipeline saved in deployment/heart_disease_pipeline.pkl")

# ===== Test prediction =====
sample = X.iloc[[0]]  # أول مريض في الداتا
prediction = final_pipeline.predict(sample)[0]
proba = final_pipeline.predict_proba(sample)[0, 1]

print("Sample input:\n", sample)
print("Prediction:", "Disease" if prediction == 1 else "No Disease")
print("Probability of Disease:", proba)


✅ Final pipeline saved in deployment/heart_disease_pipeline.pkl
Sample input:
     age  sex   cp  trestbps   chol  fbs  restecg  thalach  exang  oldpeak  \
0  63.0  1.0  1.0     145.0  233.0  1.0      2.0    150.0    0.0      2.3   

   slope   ca thal  
0    3.0  0.0  6.0  
Prediction: No Disease
Probability of Disease: 0.2650236454514838
