<a href="https://colab.research.google.com/github/StealthyNinja26/heart_attack/blob/main/heart.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Find the conditions that lead to higher risk of heart attack

In [None]:
#imporing libraries
import numpy as np
import pandas as pd
from scipy import stats
import warnings
warnings.filterwarnings("ignore")

In [None]:
#restoring the original data by copying
data = pd.read_csv('heart.csv')
df = data.copy()
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


### Features:

1. age: Age of the patient

2. sex: Sex of the patient

3. cp: Chest pain type, 0 = Typical Angina, 1 = Atypical Angina, 2 = Non-anginal Pain, 3 = Asymptomatic

4. trtbps: Resting blood pressure (in mm Hg)

5. chol: Cholestoral in mg/dl fetched via BMI sensor

6. fbs: (fasting blood sugar > 120 mg/dl), 1 = True, 0 = False

7. restecg: Resting electrocardiographic results, 0 = Normal, 1 = ST-T wave normality, 2 = showing probable or definite left ventricular hypertrophy by Estes' criteria

8. thalachh: Maximum heart rate achieved

9. oldpeak: Previous peak

10. slp: Slope

11. caa: Number of major vessels

12. thall: Thalium Stress Test result, (0-3)

13. exang: Exercise induced angina, 1 = Yes, 0 = No

14. output: 0 = less chance of heart attack, 1 = more chance of heart attack

In [None]:
#data cleaning
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1025 non-null   int64  
 1   sex       1025 non-null   int64  
 2   cp        1025 non-null   int64  
 3   trestbps  1025 non-null   int64  
 4   chol      1025 non-null   int64  
 5   fbs       1025 non-null   int64  
 6   restecg   1025 non-null   int64  
 7   thalach   1025 non-null   int64  
 8   exang     1025 non-null   int64  
 9   oldpeak   1025 non-null   float64
 10  slope     1025 non-null   int64  
 11  ca        1025 non-null   int64  
 12  thal      1025 non-null   int64  
 13  target    1025 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 112.2 KB


In [None]:
df.shape

(1025, 14)

In [None]:
df.duplicated().sum()

np.int64(723)

In [None]:
df[df.duplicated()]

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
15,34,0,1,118,210,0,1,192,0,0.7,2,0,2,1
31,50,0,1,120,244,0,1,162,0,1.1,2,0,2,1
43,46,1,0,120,249,0,0,144,0,0.8,2,0,3,0
55,55,1,0,140,217,0,1,111,1,5.6,0,0,3,0
61,66,0,2,146,278,0,0,152,0,0.0,1,1,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221,0,1,164,1,0.0,2,0,2,1
1021,60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
1022,47,1,0,110,275,0,0,118,1,1.0,1,1,2,0
1023,50,0,0,110,254,0,0,159,0,0.0,2,0,2,1


In [None]:
df.drop_duplicates(inplace=True) # dropping duplicated rows
df.reset_index(drop=True, inplace=True)
df.shape

(302, 14)

In [None]:
df.isnull().sum() # Checking null values

Unnamed: 0,0
age,0
sex,0
cp,0
trestbps,0
chol,0
fbs,0
restecg,0
thalach,0
exang,0
oldpeak,0


In [None]:
#changing some numerical columns to categorical columns
df['exang'] = df['exang'].map({1:'yes',0:'no'})
df['cp'] = df['cp'].map({0:'typical angina',1:'atypical angina',2:'non-anginal pain',3:'asymptomatic'})
df['fbs'] = df['fbs'].map({1:'true',0:'false'})
df['restecg'] = df['restecg'].map({0:'normal',1:'having ST-T wave abnormality',2:'showing probable or definite left ventricular hypertrophy'})
df['target'] = df['target'].map({0:'less chance of heart attack',1:'more chance of heart attack'})
df.sample(5)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
73,50,1,non-anginal pain,140,233,False,having ST-T wave abnormality,163,no,0.6,1,1,3,less chance of heart attack
11,43,0,typical angina,132,341,True,normal,136,yes,3.0,1,0,3,less chance of heart attack
103,60,1,typical angina,145,282,False,normal,142,yes,2.8,1,2,3,less chance of heart attack
157,60,0,non-anginal pain,102,318,False,having ST-T wave abnormality,160,no,0.0,2,1,2,more chance of heart attack
250,42,0,typical angina,102,265,False,normal,122,no,0.6,1,0,2,more chance of heart attack


### Calculating Mean, Median and Mode values for 'more chance of heart attack'

In [None]:
df1 = df[df['target'] == "more chance of heart attack"] #spliting the data by 'output' feature

#### MEAN:

In [None]:
print("Age:{:.2f}".format(df1['age'].mean()))
print("Resting blood pressure (in mm Hg):{:.2f}".format(df1['trestbps'].mean()))
print("Cholestoral in mg/dl:{:.2f}".format(df1['chol'].mean()))
print("Maximum heart rate achieved:{:.2f}".format(df1['thalach'].mean()))
print("Previous peak:{:.2f}".format(df1['oldpeak'].mean()))

Age:52.59
Resting blood pressure (in mm Hg):129.25
Cholestoral in mg/dl:242.64
Maximum heart rate achieved:158.38
Previous peak:0.59


#### TRIMMED MEAN:

In [None]:
print("Age:{:.2f}".format(stats.trim_mean(df1['age'],0.1)))
print("Resting blood pressure (in mm Hg):{:.2f}".format(stats.trim_mean(df1['trestbps'],0.1)))
print("Cholestoral in mg/dl:{:.2f}".format(stats.trim_mean(df1['chol'],0.1)))
print("Maximum heart rate achieved:{:.2f}".format(stats.trim_mean(df1['thalach'],0.1)))
print("Previous peak:{:.2f}".format(stats.trim_mean(df1['oldpeak'],0.1)))

Age:52.41
Resting blood pressure (in mm Hg):128.65
Cholestoral in mg/dl:238.44
Maximum heart rate achieved:159.87
Previous peak:0.45


#### MEDIAN

In [None]:
print("Age:",df1['age'].median())
print("Resting blood pressure (in mm Hg):",df1['trestbps'].median())
print("Cholestoral in mg/dl:",df1['chol'].median())
print("Maximum heart rate achieved:",df1['thalach'].median())
print("Previous peak:",df1['oldpeak'].median())

Age: 52.0
Resting blood pressure (in mm Hg): 130.0
Cholestoral in mg/dl: 234.5
Maximum heart rate achieved: 161.0
Previous peak: 0.2


#### MODE

In [None]:
print("Sex:",df1['sex'].mode()[0])
print("Chest Pain:",df1['cp'].mode()[0])
print("Electrocardiograph:",df1['restecg'].mode()[0])
print("fasting blood sugar:",df1['fbs'].mode()[0])
print("Exercise induced angina:",df1['exang'].mode()[0])
print("Slope:",df1['slope'].mode()[0])
print("Number of major vessels:",df1['ca'].mode()[0])
print("Thalium Stress Test result:",df1['thal'].mode()[0])

Sex: 1
Chest Pain: non-anginal pain
Electrocardiograph: having ST-T wave abnormality
fasting blood sugar: false
Exercise induced angina: no
Slope: 2
Number of major vessels: 0
Thalium Stress Test result: 2


### Confidence interval

In [None]:
def con_int(feature):
    sample = df1[feature]
    confidence_level = 0.95
    degrees_freedom = sample.size - 1
    sample_mean = np.mean(sample)
    sample_standard_error = stats.sem(sample)
    confidence_interval = stats.t.interval(confidence_level, degrees_freedom, sample_mean, sample_standard_error)
    return confidence_interval

arr = ['age','trestbps','chol','thalach','oldpeak']
for i in arr:
    result = con_int(i)
    print("Confidence interval of "+i+" =",result)

Confidence interval of age = (np.float64(51.1186955015283), np.float64(54.052036205788774))
Confidence interval of trestbps = (np.float64(126.7513544212682), np.float64(131.7486455787318))
Confidence interval of chol = (np.float64(234.39765202794803), np.float64(250.88283577693))
Confidence interval of thalach = (np.float64(155.41769892300516), np.float64(161.33839863797044))
Confidence interval of oldpeak = (np.float64(0.4660480947628114), np.float64(0.7071226369445056))
