In [30]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib


In [31]:
# STEP 2: Load and prepare data
df = pd.read_csv("C:\\Users\\prasa\\Downloads\\heart.csv")
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             918 non-null    int64  
 1   Sex             918 non-null    object 
 2   ChestPainType   918 non-null    object 
 3   RestingBP       918 non-null    int64  
 4   Cholesterol     918 non-null    int64  
 5   FastingBS       918 non-null    int64  
 6   RestingECG      918 non-null    object 
 7   MaxHR           918 non-null    int64  
 8   ExerciseAngina  918 non-null    object 
 9   Oldpeak         918 non-null    float64
 10  ST_Slope        918 non-null    object 
 11  HeartDisease    918 non-null    int64  
dtypes: float64(1), int64(6), object(5)
memory usage: 86.2+ KB


In [32]:
# Map categorical text values to numeric codes
df['Sex'] = df['Sex'].map({'M': 1, 'F': 0})
df['ChestPainType'] = df['ChestPainType'].map({'ASY': 0, 'NAP': 1, 'ATA': 2, 'TA': 3})
df['RestingECG'] = df['RestingECG'].map({'Normal': 0, 'ST': 1, 'LVH': 2})
df['ExerciseAngina'] = df['ExerciseAngina'].map({'N': 0, 'Y': 1})
df['ST_Slope'] = df['ST_Slope'].map({'Up': 0, 'Flat': 1, 'Down': 2})

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             918 non-null    int64  
 1   Sex             918 non-null    int64  
 2   ChestPainType   918 non-null    int64  
 3   RestingBP       918 non-null    int64  
 4   Cholesterol     918 non-null    int64  
 5   FastingBS       918 non-null    int64  
 6   RestingECG      918 non-null    int64  
 7   MaxHR           918 non-null    int64  
 8   ExerciseAngina  918 non-null    int64  
 9   Oldpeak         918 non-null    float64
 10  ST_Slope        918 non-null    int64  
 11  HeartDisease    918 non-null    int64  
dtypes: float64(1), int64(11)
memory usage: 86.2 KB


In [33]:
X = df.drop('HeartDisease', axis=1)
y = df['HeartDisease']

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [35]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             918 non-null    int64  
 1   Sex             918 non-null    int64  
 2   ChestPainType   918 non-null    int64  
 3   RestingBP       918 non-null    int64  
 4   Cholesterol     918 non-null    int64  
 5   FastingBS       918 non-null    int64  
 6   RestingECG      918 non-null    int64  
 7   MaxHR           918 non-null    int64  
 8   ExerciseAngina  918 non-null    int64  
 9   Oldpeak         918 non-null    float64
 10  ST_Slope        918 non-null    int64  
 11  HeartDisease    918 non-null    int64  
dtypes: float64(1), int64(11)
memory usage: 86.2 KB


In [37]:
# STEP 3: Train model
model = RandomForestClassifier()
model.fit(X_train, y_train)

In [38]:
# STEP 4: Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.8913043478260869
              precision    recall  f1-score   support

           0       0.85      0.90      0.87        77
           1       0.92      0.89      0.90       107

    accuracy                           0.89       184
   macro avg       0.89      0.89      0.89       184
weighted avg       0.89      0.89      0.89       184



In [29]:
# STEP 6: Save model and scaler
joblib.dump(model, 'heart_disease_model.pkl')

['heart_disease_model.pkl']

In [40]:
def predict_heart_disease():
    print("Enter the following details:")

    Age = float(input("Age: "))
    Sex = int(input("Sex (1 = Male, 0 = Female): "))
    ChestPainType = int(input("Chest Pain Type (0=ASY, 1=NAP, 2=ATA, 3=TA): "))
    RestingBP = float(input("RestingBP: "))
    Cholesterol = float(input("Cholesterol: "))
    FastingBS = int(input("FastingBS (0 or 1): "))
    RestingECG = int(input("RestingECG (0=Normal, 1=ST, 2=LVH): "))
    MaxHR = float(input("MaxHR: "))
    ExerciseAngina = int(input("ExerciseAngina (0=No, 1=Yes): "))
    Oldpeak = float(input("Oldpeak: "))
    ST_Slope = int(input("ST_Slope (0=Up, 1=Flat, 2=Down): "))

    # Combine into feature array
    input_data = np.array([[Age, Sex, ChestPainType, RestingBP, Cholesterol,
                            FastingBS, RestingECG, MaxHR, ExerciseAngina,
                            Oldpeak, ST_Slope]])

    # Load saved model and predict
    model = joblib.load("heart_model.pkl")
    prediction = model.predict(input_data)

    if prediction[0] == 1:
        print("\n🩺 Heart Disease Detected.")
    else:
        print("\n✅ No Heart Disease Detected.")


In [41]:
predict_heart_disease()

Enter the following details:


Age:  20
Sex (1 = Male, 0 = Female):  0
Chest Pain Type (0=ASY, 1=NAP, 2=ATA, 3=TA):  1
RestingBP:  130
Cholesterol:  100
FastingBS (0 or 1):  0
RestingECG (0=Normal, 1=ST, 2=LVH):  0
MaxHR:  158
ExerciseAngina (0=No, 1=Yes):  1
Oldpeak:  0.0
ST_Slope (0=Up, 1=Flat, 2=Down):  2



✅ No Heart Disease Detected.


