In [1]:
import os
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

# Ensure 'models' directory exists
os.makedirs("models", exist_ok=True)


def train_and_save_model(dataset_path, target_column, model_name):
    """Loads dataset, trains a RandomForest model, and saves it."""
    
    # Load dataset
    df = pd.read_csv(dataset_path)

    df[target_column] = df[target_column].apply(lambda x: 1 if x > 0 else 0)

    df = df.drop(columns=["id", "origin"], errors="ignore")

    # Drop rows with missing values
    df = df.dropna()

    # Convert categorical columns to numerical if needed
    df = pd.get_dummies(df, drop_first=True)

    # Split data into features (X) and target (y)
    X = df.drop(columns=[target_column])
    y = df[target_column]

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Scale the data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Train model
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Save model & scaler
    with open(f"models/{model_name}.pkl", "wb") as f:
        pickle.dump(model, f)

    with open(f"models/{model_name}_scaler.pkl", "wb") as f:
        pickle.dump(scaler, f)

    print(f"{model_name} model saved successfully!")


# Train and save models
train_and_save_model("Dataset/heart_disease_uci.csv", "num", "heart_disease")  # UCI Heart Disease

heart_disease model saved successfully!


In [2]:
import pickle

# Load the trained model
with open("models/heart_disease.pkl", "rb") as f:
    model = pickle.load(f)

# Check if the model has feature names
if hasattr(model, "feature_names_in_"):
    features = model.feature_names_in_
    print("Model Features:", list(features))
else:
    print("Feature names not found in the model.")


Feature names not found in the model.
