In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import pickle

# Male = 1 Female = 0
# Typical angina = 1 Atypical Angina = 2 Non-anginal = 3 Asymptomatic = 4
# yes = 1 no = 0
# "Normal" → 0 "ST-T wave abnormality" → 1 "Left ventricular hypertrophy" → 2
# Upsloping → 0 Flat → 1 Downsloping → 2
# Zero → 0 One → 1 Two → 2 Three → 3 Four → 4
# Normal → 0 Fixed Defect → 1 Reversable Defect → 2 No → 3 (if it represents "No Thalassemia")

In [4]:
heart = pd.read_csv("heart_cleveland_upload.csv")
heart

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,condition
0,69,1,0,160,234,1,2,131,0,0.1,1,1,0,0
1,69,0,0,140,239,0,0,151,0,1.8,0,2,0,0
2,66,0,0,150,226,0,0,114,0,2.6,2,0,0,0
3,65,1,0,138,282,1,2,174,0,1.4,1,1,0,1
4,64,1,0,110,211,0,2,144,1,1.8,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,40,1,3,152,223,0,0,181,0,0.0,0,0,2,1
293,39,1,3,118,219,0,0,140,0,1.2,1,0,2,1
294,35,1,3,120,198,0,0,130,1,1.6,1,0,2,1
295,35,0,3,138,183,0,0,182,0,1.4,0,0,0,0


In [5]:
heart_df = heart.copy()

In [6]:
heart_df = heart_df.rename(columns={'condition':'target'})
print(heart_df.head())

   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   69    1   0       160   234    1        2      131      0      0.1      1   
1   69    0   0       140   239    0        0      151      0      1.8      0   
2   66    0   0       150   226    0        0      114      0      2.6      2   
3   65    1   0       138   282    1        2      174      0      1.4      1   
4   64    1   0       110   211    0        2      144      1      1.8      1   

   ca  thal  target  
0   1     0       0  
1   2     0       0  
2   0     0       0  
3   1     0       1  
4   0     0       0  


In [7]:
x= heart_df.drop(columns= 'target')
y= heart_df.target


In [8]:
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25, random_state=42)

In [9]:
scaler= StandardScaler()
x_train_scaler= scaler.fit_transform(x_train)
x_test_scaler= scaler.fit_transform(x_test)

In [10]:
model=RandomForestClassifier(n_estimators=20)
model.fit(x_train_scaler, y_train)
y_pred= model.predict(x_test_scaler)
score = model.score(x_test_scaler,y_test)
print(score)

0.7733333333333333


In [11]:
print('Classification Report\n', classification_report(y_test, y_pred))
print('Accuracy: {}%\n'.format(round((accuracy_score(y_test, y_pred)*100),2)))

Classification Report
               precision    recall  f1-score   support

           0       0.77      0.79      0.78        38
           1       0.78      0.76      0.77        37

    accuracy                           0.77        75
   macro avg       0.77      0.77      0.77        75
weighted avg       0.77      0.77      0.77        75

Accuracy: 77.33%



In [12]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[30  8]
 [ 9 28]]


In [13]:
# Your new data point
new_data = np.array([[69, 1, 0, 160, 234, 1, 2, 131, 0, 0.1, 1, 1, 0]])

# Transform the new data using the SAME SCALER (fitted on x_train)
new_data_scaled = scaler.transform(new_data)  # Use transform, NOT fit_transform

# Predict the target column
predicted_target = model.predict(new_data_scaled)
print("Predicted Target:", predicted_target[0])

Predicted Target: 0




In [14]:
heart_df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [15]:

# Your new data point
new_data = np.array([[55, 1, 3, 150, 250, 1, 2, 140, 1, 2.5, 2, 1, 3]])

# Transform the new data using the SAME SCALER (fitted on x_train)
new_data_scaled2 = scaler.transform(new_data)  # Use transform, NOT fit_transform

# Predict the target column
predicted_target = model.predict(new_data_scaled2)
print("Predicted Target that is heart is not good :", predicted_target[0])

Predicted Target that is heart is not good : 1




In [None]:
filename = 'heart-disease-prediction-rf-model.pkl'
pickle.dump(model, open(filename, 'wb'))