# Importing Libraries

In [163]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,classification_report

In [164]:
raw_data=pd.read_csv("HeartAttack.csv")

In [165]:
raw_data

Unnamed: 0,Age,Gender,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin,Result
0,64,1,66,160,83,160.0,1.80,0.012,negative
1,21,1,94,98,46,296.0,6.75,1.060,positive
2,55,1,64,160,77,270.0,1.99,0.003,negative
3,64,1,70,120,55,270.0,13.87,0.122,positive
4,55,1,64,112,65,300.0,1.08,0.003,negative
...,...,...,...,...,...,...,...,...,...
1314,44,1,94,122,67,204.0,1.63,0.006,negative
1315,66,1,84,125,55,149.0,1.33,0.172,positive
1316,45,1,85,168,104,96.0,1.24,4.250,positive
1317,54,1,58,117,68,443.0,5.80,0.359,positive


# Cleaning and pre-processing the dataset

In [166]:
raw_data.isnull().sum()

Age                         0
Gender                      0
Heart rate                  0
Systolic blood pressure     0
Diastolic blood pressure    0
Blood sugar                 0
CK-MB                       0
Troponin                    0
Result                      0
dtype: int64

In [167]:
raw_data.duplicated().sum()

0

In [168]:
raw_data.columns

Index(['Age', 'Gender', 'Heart rate', 'Systolic blood pressure',
       'Diastolic blood pressure', 'Blood sugar', 'CK-MB', 'Troponin',
       'Result'],
      dtype='object')

In [169]:
raw_data.isna().sum()

Age                         0
Gender                      0
Heart rate                  0
Systolic blood pressure     0
Diastolic blood pressure    0
Blood sugar                 0
CK-MB                       0
Troponin                    0
Result                      0
dtype: int64

In [170]:
raw_data["Result"]=raw_data["Result"].map({"negative":0,"positive":1}).copy()
raw_data

Unnamed: 0,Age,Gender,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin,Result
0,64,1,66,160,83,160.0,1.80,0.012,0
1,21,1,94,98,46,296.0,6.75,1.060,1
2,55,1,64,160,77,270.0,1.99,0.003,0
3,64,1,70,120,55,270.0,13.87,0.122,1
4,55,1,64,112,65,300.0,1.08,0.003,0
...,...,...,...,...,...,...,...,...,...
1314,44,1,94,122,67,204.0,1.63,0.006,0
1315,66,1,84,125,55,149.0,1.33,0.172,1
1316,45,1,85,168,104,96.0,1.24,4.250,1
1317,54,1,58,117,68,443.0,5.80,0.359,1


In [171]:
core_data=raw_data[(raw_data["Gender"]==0)].copy()
core_data=core_data[["Age","Heart rate","Systolic blood pressure","Diastolic blood pressure","Blood sugar","CK-MB","Troponin","Result"]].copy()
core_data=core_data.reset_index(drop=True).copy()

In [172]:
core_data[core_data["Result"]==1]

Unnamed: 0,Age,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin,Result
6,86,73,114,68,87.0,1.11,0.776,1
10,47,66,134,57,279.0,300.00,0.007,1
13,58,96,111,74,99.0,1.29,0.026,1
16,63,81,130,65,98.0,2.28,0.040,1
18,28,78,127,61,114.0,19.47,0.010,1
...,...,...,...,...,...,...,...,...
443,47,58,93,78,170.0,1.19,0.068,1
444,55,80,117,83,200.0,0.78,0.052,1
445,56,68,123,70,102.0,2.28,0.255,1
446,60,78,109,69,230.0,19.47,0.003,1


In [173]:
core_data[core_data["Result"]==0] 

Unnamed: 0,Age,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin,Result
0,58,61,112,58,87.0,1.83,0.004,0
1,32,40,179,68,102.0,0.71,0.003,0
2,44,60,154,81,135.0,2.35,0.004,0
3,44,60,166,90,102.0,2.39,0.006,0
4,63,60,150,83,198.0,2.39,0.013,0
...,...,...,...,...,...,...,...,...
434,55,73,138,79,105.0,1.61,0.009,0
438,42,70,117,76,100.0,4.24,0.003,0
441,62,60,145,67,208.0,1.29,0.009,0
442,67,81,150,75,80.0,4.80,0.009,0


In [174]:
# Just seperating some values from orginal dataset for manually testing the model
test_values=core_data.iloc[[446,16,438,3]].copy()
core_data=core_data.drop([446,16,438,3]).copy()

# Splitting test and train dataset

In [175]:
X=core_data.drop("Result",axis=1).copy()
Y=core_data["Result"].copy()

In [176]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.1,random_state=42)

# Training the model (model - Random Forest)

In [177]:
clf=RandomForestClassifier(random_state=42)
clf.fit(X_train,Y_train)

# Model prediction and accuraccy report

In [178]:
Y_pred=clf.predict(X_test)

In [179]:
print(classification_report(Y_test,Y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        22
           1       1.00      1.00      1.00        23

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [180]:
from sklearn.model_selection import cross_val_score
cvs=cross_val_score(clf,X,Y,cv=5,scoring="accuracy")
print(cvs)

[0.96629213 1.         0.98876404 0.98876404 0.98876404]


In [181]:
mean_accu=np.mean(cvs)
std_accu=np.std(cvs)
print(mean_accu,std_accu)

0.9865168539325841 0.011008942664194044


In [182]:
test_values

Unnamed: 0,Age,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin,Result
446,60,78,109,69,230.0,19.47,0.003,1
16,63,81,130,65,98.0,2.28,0.04,1
438,42,70,117,76,100.0,4.24,0.003,0
3,44,60,166,90,102.0,2.39,0.006,0


# Loading the saved model

In [187]:
import pickle

In [189]:
load_model=pickle.load(open('TM_HeartAttack.sav','rb'))

# Saving the trained model

In [188]:
filename="TM_HeartAttack.sav"
pickle.dump(clf,open(filename,'wb'))

# Manual Testing

In [191]:
s=test_values.iloc[1].to_numpy()
s=s[0:-1].reshape(1,-1)
print(s)
pred=load_model.predict(s)
print(pred)
if(pred[0]==0):
    print("No chances of heart attack")
else:
    print("There is a chance of heart attack")

[[6.30e+01 8.10e+01 1.30e+02 6.50e+01 9.80e+01 2.28e+00 4.00e-02]]
[1]
There is a chance of heart attack


