In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 1. Import Libraries
## Start by importing the necessary Python libraries:

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 2. Load the Dataset
## Assuming you've downloaded the dataset and placed it in your working directory:

In [None]:
df = pd.read_csv('/kaggle/input/heart-attack-risk-assessment-dataset/Heart_Attack_Risk_Levels_Dataset.csv')

# 3. Explore the Data
## Understand the structure and contents of the dataset

In [None]:
df.head()          # View the first few rows
df.info()          # Get a summary of the dataset
df.describe()      # Statistical summary
df.isnull().sum()  # Check for missing values

# 4. Visualize the Data
## Use visualization to gain insights:

In [None]:
# Distribution of Age
sns.histplot(df['Age'], bins=30)
plt.title('Age Distribution')
plt.show()

In [None]:
# Heart Rate vs. Risk Level

import seaborn as sns
import matplotlib.pyplot as plt

# Boxplot: Heart rate vs. Risk level
sns.boxplot(x='Risk_Level', y='Heart rate', data=df)
plt.title('Heart Rate by Risk Level')
plt.xlabel('Risk Level')
plt.ylabel('Heart Rate')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

#This will generate the correct boxplot visualizing how heart rate varies across different heart attack risk levels.

In [None]:
# Correlation Heatmap (Numeric Features Only)
plt.figure(figsize=(10, 8))
numeric_df = df.select_dtypes(include=['number'])  # Exclude non-numeric columns
sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap of Numeric Features')
plt.tight_layout()
plt.show()

#This will give you a clear visualization of how numeric features like Troponin, CK-MB, Heart rate, etc., are related to one another.

In [None]:
#1. Troponin vs. Risk_Level
sns.boxplot(x='Risk_Level', y='Troponin', data=df)
plt.title('Troponin Level by Risk Level')
plt.xlabel('Risk Level')
plt.ylabel('Troponin')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


# 2. Blood sugar vs. Risk_Level
sns.boxplot(x='Risk_Level', y='Blood sugar', data=df)
plt.title('Blood Sugar Level by Risk Level')
plt.xlabel('Risk Level')
plt.ylabel('Blood Sugar (mg/dL)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


#3. CK-MB vs. Risk_Level
sns.boxplot(x='Risk_Level', y='CK-MB', data=df)
plt.title('CK-MB Enzyme Level by Risk Level')
plt.xlabel('Risk Level')
plt.ylabel('CK-MB')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Analysis Documentation 

---

# 💓 Heart Attack Risk Assessment

## 🧠 Overview
This project provides a step-by-step data analysis and machine learning workflow to assess heart attack risk using patient health data. The notebook guides you through the entire data analysis pipeline — from loading and cleaning data to visualizing patterns and building predictive models.

---

## 📊 Dataset
The dataset used is available on Kaggle:  
[[Dataset](https://raw.githubusercontent.com/Christine-97/Data_analysis_with_jupyter/main/Heart_Attack_Risk_Levels_Dataset.csv)](https://raw.githubusercontent.com/Christine-97/Data_analysis_with_jupyter/main/Heart_Attack_Risk_Levels_Dataset.csv)
https://raw.githubusercontent.com/Christine-97/Data_analysis_with_jupyter/main/Heart_Attack_Risk_Levels_Dataset.csvsment-dataset)

It includes features such as:
- Age, Gender  
- Heart rate, Blood Pressure (Systolic and Diastolic)  
- Blood sugar, CK-MB, Troponin  
- Result, Risk Level, and Recommendation

---

## ⚙️ Installation
Required libraries include:
1. pandas
2. numpy
3. matplotlib
4. seaborn
5. scikit-learn

---

## 📝 Usage
To run the analysis:
* Launch Jupyter Notebook:
    jupyter notebook

* Run cells step-by-step to explore:
1. Data cleaning
2. Visualization

---

## 📈 Results

Correlation heatmap shows relationships between clinical indicators and heart attack risk.
Box plots highlight differences in heart rate, blood pressure, and troponin levels by risk category.
A classification model (e.g., Decision Tree or Random Forest) was trained to predict Risk_Level with promising accuracy.

---