In [17]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score

In [None]:
data = pd.read_csv("heart.csv")
print(data.head())

   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  


In [20]:
data.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [21]:
# Select the 5 best features and the target variable
selected_features = ['cp', 'thalach', 'oldpeak', 'ca', 'thal']
X = data[selected_features]
y = data['target']

In [22]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Standardize numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Model evaluation
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

In [23]:
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))
print("\nROC AUC Score:", roc_auc_score(y_test, y_proba))


Confusion Matrix:
 [[18 10]
 [ 2 31]]

Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.64      0.75        28
           1       0.76      0.94      0.84        33

    accuracy                           0.80        61
   macro avg       0.83      0.79      0.79        61
weighted avg       0.82      0.80      0.80        61


Accuracy Score: 0.8032786885245902

ROC AUC Score: 0.9134199134199135


In [26]:
def predict_heart_disease():
    print("\nEnter patient details for prediction:")
    user_input = np.array([
        float(input("Chest Pain Type (cp, 0-3): ")),
        float(input("Maximum Heart Rate Achieved (thalach): ")),
        float(input("ST Depression (oldpeak): ")),
        float(input("Number of Major Vessels (ca, 0-3): ")),
        float(input("Thalassemia Test Result (thal, 1-3): "))
    ]).reshape(1, -1)

    user_input = scaler.transform(user_input)
    prediction = model.predict(user_input)[0]
    probability = model.predict_proba(user_input)[0][1]

    if prediction > 0.5:
        print(f"\nPrediction: The patient is **likely** to have heart disease. Probability: {probability:.2f}")
    else:
        print(f"\nPrediction: The patient is **unlikely** to have heart disease. Probability: {probability:.2f}")

# Run the prediction function
predict_heart_disease()


Enter patient details for prediction:



Prediction: The patient is **likely** to have heart disease. Probability: 0.81


