In [None]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline

# ------------------ HEART DISEASE MODEL ------------------ #
def train_heart_model():
    heart_df = pd.read_csv("")
    X = heart_df.drop("HeartDisease", axis=1)
    y = heart_df["HeartDisease"]

    pipeline = ImbPipeline([
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler()),
        ('smote', SMOTE(random_state=42)),
        ('model', RandomForestClassifier(random_state=42))
    ])

    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
    pipeline.fit(X_train, y_train)

    y_pred = pipeline.predict(X_test)
    print("\n🫀 Heart Disease Model:")
    print(classification_report(y_test, y_pred))

    joblib.dump(pipeline, "heart_model.pkl")


# ------------------ DIABETES MODEL ------------------ #
def train_diabetes_model():
    diabetes_df = pd.read_csv("diabetes_normalized.csv")
    X = diabetes_df.drop("Outcome", axis=1)
    y = diabetes_df["Outcome"]

    pipeline = ImbPipeline([
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler()),
        ('smote', SMOTE(random_state=42)),
        ('model', RandomForestClassifier(random_state=42))
    ])

    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
    pipeline.fit(X_train, y_train)

    y_pred = pipeline.predict(X_test)
    print("\n🩸 Diabetes Model:")
    print(classification_report(y_test, y_pred))

    joblib.dump(pipeline, "diabetes_model.pkl")


# ------------------ STROKE MODEL ------------------ #
def train_stroke_model():
    stroke_df = pd.read_csv("stroke_normalized.csv")
    X = stroke_df.drop("stroke", axis=1)
    y = stroke_df["stroke"]

    pipeline = ImbPipeline([
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler()),
        ('smote', SMOTE(random_state=42)),
        ('model', RandomForestClassifier(random_state=42))
    ])

    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
    pipeline.fit(X_train, y_train)

    y_pred = pipeline.predict(X_test)
    print("\n🧠 Stroke Model:")
    print(classification_report(y_test, y_pred))

    joblib.dump(pipeline, "stroke_model.pkl")


# ------------------ RUN ALL ------------------ #
if __name__ == "__main__":
    train_heart_model()
    train_diabetes_model()
    train_stroke_model()


FileNotFoundError: [Errno 2] No such file or directory: 'heart_processed.csv'