# Data Analysis for Heart Rate

In [4]:
import numpy as np

## Hypothesis
### The older in age a male participant is, and the higher their cholesterol levels, the more likely they are to get heart disease

In [5]:
#Load the data into notebook
data=np.genfromtxt("heart.csv", delimiter=",", skip_header=True)

### Extracting Relevant Columns

In [7]:
age=data[:,0]
print(f"age: {age[:5]}")
sex=data[:,1]
print(f"sex: {sex[:5]}")
resting_heartrate=data[:,3]
print(f"Resting Heatrate: {resting_heartrate[:5]}")
chol=data[:,4]
print(f"Cholesterol levels: {chol[:5]}")
thalach=data[:,7]
print(f"thalach: {thalach[:5]}")
target=data[:,-1]
print(f"target: {target[:5]}")

age: [63. 37. 41. 56. 57.]
sex: [1. 1. 0. 1. 0.]
Resting Heatrate: [145. 130. 130. 120. 120.]
Cholesterol levels: [233. 250. 204. 236. 354.]
thalach: [150. 187. 172. 178. 163.]
target: [1. 1. 1. 1. 1.]


### Basic Descriptive Statistics (for each column)

In [8]:
print("---Basic Descriptive Statistics on age--- ")
print(f" Mean, {round(np.mean(age))}")
print(f" Median, {round(np.median(age))}")
print(f" Standard Deviation {round(np.std(age))}")
print(f" Min: {np.min(age)}")
print(f" Max: {np.max(age)}")

---Basic Descriptive Statistics on age--- 
 Mean, 54
 Median, 55
 Standard Deviation 9
 Min: 29.0
 Max: 77.0


In [10]:
print("---Basic Descriptive Statistics on sex---")
print(f" Mean, {round(np.mean(sex))}")
print(f" Median, {round(np.median(sex))}")
print(f" Standard Deviation {round(np.std(sex))}")
print(f" Min: {np.min(sex)}")
print(f" Max: {np.max(sex)}")

---Basic Descriptive Statistics on sex---
 Mean, 1
 Median, 1
 Standard Deviation 0
 Min: 0.0
 Max: 1.0


In [11]:
print("---Basic Descriptive Statistics on heartrate--- ")
print(f" Mean, {round(np.mean(resting_heartrate))}")
print(f" Median, {round(np.median(resting_heartrate))}")
print(f" Standard Deviation {round(np.std(resting_heartrate))}")
print(f" Min: {np.min(resting_heartrate)}")
print(f" Max: {np.max(resting_heartrate)}")

---Basic Descriptive Statistics on heartrate--- 
 Mean, 132
 Median, 130
 Standard Deviation 18
 Min: 94.0
 Max: 200.0


In [12]:
print("---Basic Descriptive Statistics on cholesterol levels--- ")
print(f" Mean, {round(np.mean(chol))}")
print(f" Median, {round(np.median(chol))}")
print(f" Standard Deviation {round(np.std(chol))}")
print(f" Min: {np.min(chol)}")
print(f" Max: {np.max(chol)}")

---Basic Descriptive Statistics on cholesterol levels--- 
 Mean, 246
 Median, 240
 Standard Deviation 52
 Min: 126.0
 Max: 564.0


In [14]:
print("---Basic Descriptive Statistics on thalach--- ")
print(f" Mean, {round(np.mean(thalach))}")
print(f" Median, {round(np.median(thalach))}")
print(f" Standard Deviation {round(np.std(thalach))}")
print(f" Min: {np.min(thalach)}")
print(f" Max: {np.max(thalach)}")

---Basic Descriptive Statistics on thalach--- 
 Mean, 150
 Median, 153
 Standard Deviation 23
 Min: 71.0
 Max: 202.0


In [15]:
print("---Basic Descriptive Statistics on target--- ")
print(f" Mean, {round(np.mean(target))}")
print(f" Median, {round(np.median(target))}")
print(f" Standard Deviation {round(np.std(target))}")
print(f" Min: {np.min(target)}")
print(f" Max: {np.max(target)}")

---Basic Descriptive Statistics on target--- 
 Mean, 1
 Median, 1
 Standard Deviation 0
 Min: 0.0
 Max: 1.0


### Data Filtering

In [17]:
print(f"# of rows before filtering... {len(data)}")

# of rows before filtering... 303


In [43]:
# How many male patients with age above 50 and resting heartrate above 130
filtered_data=data[(age>50) & (sex==0) & (chol>250) & (target==1)]
print(f"Number of rows after filtering: {len(filtered_data)}")

Number of rows after filtering: 28


In [44]:
# How many male patients with age above 50 and resting heartrate above 130
filtered_data=data[(age>50) & (sex==0) & (chol>250) & (target==0)]
print(f"Number of rows after filtering: {len(filtered_data)}")

Number of rows after filtering: 13


In [45]:
# How many female patients with age above 50 and resting heartrate above 130
filtered_data=data[(age>50) & (sex==1) & (chol>250) & (target==1)]
print(f"Number of rows after filtering: {len(filtered_data)}")

Number of rows after filtering: 12


In [47]:
# How many female patients with age above 50 and resting heartrate above 130
filtered_data=data[(age>50) & (sex==1) & (chol>250) & (target==0)]
print(f"Number of rows after filtering: {len(filtered_data)}")

Number of rows after filtering: 45


### Number of Unique Categories

In [21]:
print(f"Unique resting heartrate counts: {np.unique(resting_heartrate)}")
print(f"How many unique resting heartrates: {len(np.unique(resting_heartrate))}")

Unique resting heartrate counts: [ 94. 100. 101. 102. 104. 105. 106. 108. 110. 112. 114. 115. 117. 118.
 120. 122. 123. 124. 125. 126. 128. 129. 130. 132. 134. 135. 136. 138.
 140. 142. 144. 145. 146. 148. 150. 152. 154. 155. 156. 160. 164. 165.
 170. 172. 174. 178. 180. 192. 200.]
How many unique resting heartrates: 49


In [22]:
print(f"Unique cholesterol level counts: {np.unique(chol)}")
print(f"How many unique cjolesterol levels: {len(np.unique(chol))}")

Unique cholesterol level counts: [126. 131. 141. 149. 157. 160. 164. 166. 167. 168. 169. 172. 174. 175.
 176. 177. 178. 180. 182. 183. 184. 185. 186. 187. 188. 192. 193. 195.
 196. 197. 198. 199. 200. 201. 203. 204. 205. 206. 207. 208. 209. 210.
 211. 212. 213. 214. 215. 216. 217. 218. 219. 220. 221. 222. 223. 224.
 225. 226. 227. 228. 229. 230. 231. 232. 233. 234. 235. 236. 237. 239.
 240. 241. 242. 243. 244. 245. 246. 247. 248. 249. 250. 252. 253. 254.
 255. 256. 257. 258. 259. 260. 261. 262. 263. 264. 265. 266. 267. 268.
 269. 270. 271. 273. 274. 275. 276. 277. 278. 281. 282. 283. 284. 286.
 288. 289. 290. 293. 294. 295. 298. 299. 300. 302. 303. 304. 305. 306.
 307. 308. 309. 311. 313. 315. 318. 319. 321. 322. 325. 326. 327. 330.
 335. 340. 341. 342. 353. 354. 360. 394. 407. 409. 417. 564.]
How many unique cjolesterol levels: 152


In [23]:
print(f"Unique thalach counts: {np.unique(thalach)}")
print(f"How many unique thalach levels: {len(np.unique(thalach))}")

Unique thalach counts: [ 71.  88.  90.  95.  96.  97.  99. 103. 105. 106. 108. 109. 111. 112.
 113. 114. 115. 116. 117. 118. 120. 121. 122. 123. 124. 125. 126. 127.
 128. 129. 130. 131. 132. 133. 134. 136. 137. 138. 139. 140. 141. 142.
 143. 144. 145. 146. 147. 148. 149. 150. 151. 152. 153. 154. 155. 156.
 157. 158. 159. 160. 161. 162. 163. 164. 165. 166. 167. 168. 169. 170.
 171. 172. 173. 174. 175. 177. 178. 179. 180. 181. 182. 184. 185. 186.
 187. 188. 190. 192. 194. 195. 202.]
How many unique thalach levels: 91


In [36]:
# Default rowvar= True ( calculate the corelation for each rows)
# rowvar= False (each column represent a variable)

corr_heartdisease=np.corrcoef(thalach,chol,rowvar=False)
print(f"Corr thalach and cholesterol levels: {corr_heartdisease}")

Corr thalach and cholesterol levels: [[ 1.         -0.00993984]
 [-0.00993984  1.        ]]


###  Correlation Analysis
- Age and cholesterol levels: 21% (weak positive)
- Resting Heartrate and cholesterol levels: 12% (weak positive)
- Thalach and cholesterol levels:  -0.9% (Extremely weak negative)

## Summary
- Initially, in our hypothesis we said that the older a male participant is and the higher their cholesterol levels, the more likely a patient is to get heart disease. After going through and testing the data, we realized that this was not the case and there is actually a completely different correlation between people getting heart disease. However, after analyzing the difference in these statistics between male and female patients, we saw more of a correlation between the male patients that are older and have higher cholesterol levels having heart disease, compared to the females that are older and have high cholesterol levels.