# Exploratory Data Analysis

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('heart.csv')
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.shape
df.describe()

## Visualizing Data

In [None]:
df['HeartDisease'].value_counts().plot(kind='bar')

In [None]:
def plotting(var,num=1):
    plt.subplot(2,2,num)
    sns.histplot(x=df[var], kde=True)
    plt.tight_layout()

In [None]:
df.head(10)

In [None]:
plotting('Age', 1)
plotting('RestingBP',2)
plotting('Cholesterol',3)
plotting('MaxHR',4)

In [None]:
df['Cholesterol'].value_counts()

## Correcting the Data

In [None]:
ch_mean = df.loc[df['Cholesterol'] !=0, 'Cholesterol'].mean()
df['Cholesterol'] = df['Cholesterol'].replace(0,ch_mean)
df['Cholesterol'] = df['Cholesterol'].round(2)

In [None]:
rbp_mean = df.loc[df['RestingBP'] !=0, 'RestingBP'].mean()
df['RestingBP'] = df['RestingBP'].replace(0, rbp_mean)
df['RestingBP'] = df['RestingBP'].round(2)

In [None]:
plotting('Age', 1)
plotting('RestingBP',2)
plotting('Cholesterol',3)
plotting('MaxHR',4)

In [None]:
cat_cols = ['Sex', 'ChestPainType', 'FastingBS','RestingECG', 'ExerciseAngina', 'Oldpeak', 'ST_Slope','HeartDisease']

cat_cols

## Visualizing The Corrected Data

In [None]:
sns.countplot(x=df['Sex'], hue=df['HeartDisease'])

In [None]:
sns.countplot(x=df['ChestPainType'], hue=df['HeartDisease'])

In [None]:
sns.countplot(x=df['FastingBS'], hue=df['HeartDisease'])

In [None]:
sns.boxplot(y=df['Cholesterol'], x=df['HeartDisease'])

In [None]:
sns.heatmap(df.corr(numeric_only=True), annot=True)

## Data Preprocessing


In [None]:
df_encoded = pd.get_dummies(df, drop_first=True)
df_encoded = df_encoded.astype(int)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

numerical_rows = ['Age', 'RestingBP', 'Cholesterol', 'MaxHR', 'Oldpeak']

df_encoded[numerical_rows] = scaler.fit_transform(df_encoded[numerical_rows])
df_encoded.head()
