In [1]:
# Importing Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Load the dataset
file_path = "heart_expanded_final.csv"
df = pd.read_csv(file_path)

# Handle missing values separately for numerical and categorical columns
for col in df.select_dtypes(include=['number']).columns:
    df[col].fillna(df[col].median(), inplace=True)
for col in df.select_dtypes(include=['object']).columns:
    df[col].fillna(df[col].mode()[0], inplace=True)

# Encode categorical columns
categorical_columns = ["COVID History", "Vaccination Status", "Diet Type"]
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split features and target
X = df.drop(columns=["output"])
y = df["output"]

In [3]:
# Standardize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [4]:
# Train and evaluate multiple models
models = {
    "RandomForest": RandomForestClassifier(n_estimators=100, random_state=42),
    "GradientBoosting": GradientBoostingClassifier(random_state=42),
    "SVM": SVC(kernel='linear', random_state=42),
    "KNN": KNeighborsClassifier()
}

best_model = None
best_accuracy = 0

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Model Accuracy: {accuracy}")
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

print(f"Best Model: {best_model.__class__.__name__} with Accuracy: {best_accuracy}")

RandomForest Model Accuracy: 0.985
GradientBoosting Model Accuracy: 0.985
SVM Model Accuracy: 0.85
KNN Model Accuracy: 0.915
Best Model: RandomForestClassifier with Accuracy: 0.985


In [7]:
# Function for user input prediction
def predict_heart_disease():
    user_input = {}
    for col in X.columns:
        if col in categorical_columns:
            # For categorical columns, show available options
            print(f"Available options for {col}: {label_encoders[col].classes_}")
            user_input[col] = input(f"Enter value for {col}: ")
        else:
            # For numerical columns, take input directly
            user_input[col] = float(input(f"Enter value for {col}: "))
    
    # Convert user input into a DataFrame
    input_df = pd.DataFrame([user_input], columns=X.columns)
    
    # Encode categorical columns in user input
    for col in categorical_columns:
        if col in input_df:
            input_df[col] = input_df[col].map(lambda x: label_encoders[col].transform([x])[0] if x in label_encoders[col].classes_ else -1)
    
    # Standardize the input data
    input_scaled = scaler.transform(input_df)
    
    # Make prediction
    prediction = best_model.predict(input_scaled)
    print("Prediction:", "Heart Disease Detected" if prediction[0] == 1 else "No Heart Disease")
    # Prompt user for input
predict_heart_disease()

Enter value for age:  18
Enter value for sex:  1
Enter value for cp:  1
Enter value for trtbps:  1
Enter value for chol:  1
Enter value for fbs:  1
Enter value for restecg:  1
Enter value for thalachh:  1
Enter value for exng:  0
Enter value for oldpeak:  0
Enter value for slp:  0
Enter value for caa:  0
Enter value for thall:  1


Available options for COVID History: ['No' 'Yes']


Enter value for COVID History:  yes


Available options for Vaccination Status: ['Fully' 'Not Vaccinated' 'Partially']


Enter value for Vaccination Status:  1


Available options for Diet Type: ['Non-Vegetarian' 'Vegan' 'Vegetarian']


Enter value for Diet Type:  1
Enter value for Daily Calorie Intake:  1


Prediction: Heart Disease Detected
