# Heart Attack Model

### Imports

In [74]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

### Dataset Read

In [75]:
DataSet = pd.read_csv('Medicaldataset.csv')
DataSet.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1319 entries, 0 to 1318
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Age                       1319 non-null   int64  
 1   Gender                    1319 non-null   int64  
 2   Heart rate                1319 non-null   int64  
 3   Systolic blood pressure   1319 non-null   int64  
 4   Diastolic blood pressure  1319 non-null   int64  
 5   Blood sugar               1319 non-null   float64
 6   CK-MB                     1319 non-null   float64
 7   Troponin                  1319 non-null   float64
 8   Result                    1319 non-null   object 
dtypes: float64(3), int64(5), object(1)
memory usage: 92.9+ KB


### Outlier Detection

    -> Heart Rate

In [76]:
OrderedDataSet = DataSet.sort_values('Heart rate',axis=0)
Q1 = OrderedDataSet['Heart rate'].quantile(0.25)
Q3 = DataSet['Age'].quantile(0.75)
IQR =  Q3 - Q1
LFence = Q1 - 1.5 * IQR
RFence = Q3 + 1.5 * IQR
print(LFence,RFence)
OrderedDataSet['Age'] = OrderedDataSet['Heart rate'].astype(int)
DataSet = OrderedDataSet.query("`Heart rate` > @LFence and `Heart rate` < @RFence")

62.5 66.5


    -> Age

### PreProcessing

    -> One - Hot Encoding

In [77]:
DataSet[['Female','Male']] = pd.DataFrame(pd.get_dummies(DataSet['Gender']).astype(int))
Encoder = LabelEncoder()
DataSet['Result'] = Encoder.fit_transform(DataSet['Result'])
DataSet

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  DataSet[['Female','Male']] = pd.DataFrame(pd.get_dummies(DataSet['Gender']).astype(int))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  DataSet[['Female','Male']] = pd.DataFrame(pd.get_dummies(DataSet['Gender']).astype(int))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  DataSet['Result'] = Encoder

Unnamed: 0,Age,Gender,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin,Result,Female,Male
610,63,1,63,98,57,111.0,2.550,0.006,0,0,1
575,63,1,63,105,64,95.0,1.630,0.032,1,0,1
609,63,1,63,103,61,130.0,2.400,0.026,1,0,1
473,63,0,63,104,87,227.0,0.493,0.011,0,1,0
517,63,0,63,170,104,143.0,1.970,0.006,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
29,66,1,66,135,55,166.0,0.493,10.000,0,0,1
251,66,1,66,115,62,218.0,1.900,0.030,1,0,1
195,66,1,66,101,50,94.0,0.596,0.171,1,0,1
201,66,0,66,150,95,115.0,2.960,0.280,1,1,0


### Column Removal

In [78]:
db = DataSet.drop(['Gender'],axis = 1)
db

Unnamed: 0,Age,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin,Result,Female,Male
610,63,63,98,57,111.0,2.550,0.006,0,0,1
575,63,63,105,64,95.0,1.630,0.032,1,0,1
609,63,63,103,61,130.0,2.400,0.026,1,0,1
473,63,63,104,87,227.0,0.493,0.011,0,1,0
517,63,63,170,104,143.0,1.970,0.006,0,1,0
...,...,...,...,...,...,...,...,...,...,...
29,66,66,135,55,166.0,0.493,10.000,0,0,1
251,66,66,115,62,218.0,1.900,0.030,1,0,1
195,66,66,101,50,94.0,0.596,0.171,1,0,1
201,66,66,150,95,115.0,2.960,0.280,1,1,0


### Columns Splitting

In [79]:
X = db.drop(['Result'],axis = 1)
Y = db['Result']

X_Train,X_Test,Y_Train,Y_Test = train_test_split(X,Y,test_size= 0.2)

### Model Training

In [80]:
model = LogisticRegression()
model.fit(X_Train,Y_Train)

In [85]:
print(model.score(X_Train,Y_Train)*105)
print(model.score(X_Test,Y_Test)*105)

79.75961538461539
84.8076923076923
