In [2]:
import pandas as pd

In [3]:
heart_var = pd.read_csv('/content/heart.csv')
heart_var.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [4]:
heart_var.select_dtypes(include=['object']).describe()

Unnamed: 0,Sex,ChestPainType,RestingECG,ExerciseAngina,ST_Slope
count,918,918,918,918,918
unique,2,4,3,2,3
top,M,ASY,Normal,N,Flat
freq,725,496,552,547,460


In [5]:
cat=heart_var.select_dtypes(include=['object']).columns.tolist()
for i in cat:
    obs=heart_var[i].value_counts()
    avg_hd=heart_var.groupby(['HeartDisease'])[i].value_counts(normalize=True)[1]
    display(pd.DataFrame({"Total Patients":obs, "Proportion of Heart Disease":avg_hd*100})\
            .sort_values("Proportion of Heart Disease", ascending=False)
            .style.set_caption('Variable: {}'.format(i))\
            .format({"Proportion of Heart Disease": "{:,.1f}%"})\
            .highlight_max(props='font-weight:bold; color:white; background-color:#DE9393;', axis=0))

Unnamed: 0_level_0,Total Patients,Proportion of Heart Disease
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1
M,725,90.2%
F,193,9.8%


Unnamed: 0_level_0,Total Patients,Proportion of Heart Disease
ChestPainType,Unnamed: 1_level_1,Unnamed: 2_level_1
ASY,496,77.2%
NAP,203,14.2%
ATA,173,4.7%
TA,46,3.9%


Unnamed: 0_level_0,Total Patients,Proportion of Heart Disease
RestingECG,Unnamed: 1_level_1,Unnamed: 2_level_1
Normal,552,56.1%
ST,178,23.0%
LVH,188,20.9%


Unnamed: 0_level_0,Total Patients,Proportion of Heart Disease
ExerciseAngina,Unnamed: 1_level_1,Unnamed: 2_level_1
Y,371,62.2%
N,547,37.8%


Unnamed: 0_level_0,Total Patients,Proportion of Heart Disease
ST_Slope,Unnamed: 1_level_1,Unnamed: 2_level_1
Flat,460,75.0%
Up,395,15.4%
Down,63,9.6%


In [6]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Plot 1
plot_df=heart_var.copy()
p1=plot_df.HeartDisease.value_counts()
p1=p1.rename('count').reset_index().sort_values('count', ascending=False)
x1=p1['HeartDisease'].apply(lambda x: 'Heart Disease' if x==1 else 'No Disease')

# Plot 2
p2=plot_df.groupby('HeartDisease')['Age'].median().round(0).astype(int)
p2=p2.rename('age').reset_index().sort_values('age', ascending=False)

# Plot 3
p3=plot_df.groupby('Sex')['HeartDisease'].value_counts(normalize=True)
p3=p3.mul(100).rename('pct').reset_index()
x2=p3.Sex.apply(lambda x: 'Women' if x=='F' else 'Men').unique()[::-1]
y0=p3[p3.HeartDisease==0]['pct'][::-1]
y1=p3[p3.HeartDisease==1]['pct'][::-1]


temp = dict(layout=go.Layout(font=dict(family="Franklin Gothic", size=12)))
fig = make_subplots(rows=2, cols=2,
                    vertical_spacing=0.12,
                    specs=[[{}, {}], [{'colspan': 2}, None]],
                    subplot_titles=("Number of Patients", "Median Age",
                                    "Prevalence of Heart Disease among Men and Women"))
# Plot 1
fig.add_trace(go.Bar(x=x1, y=p1['count'], text=p1['count'],texttemplate='n = %{text}', textposition='outside',
                     marker=dict(color=['#AF4343', '#C6AA97'], opacity=0.8),
                     hovertemplate='Number of Patients with %{x} = %{y}<extra></extra>',
                     name="Heart Disease", showlegend=False),row=1,col=1)
# Plot 2
fig.add_trace(go.Bar(x=x1, y=p2['age'], text=p2['age'], textposition='outside',
                     marker=dict(color=['#AF4343', '#C6AA97'], opacity=0.8),
                     hovertemplate='Median Age of Patients with %{x} = %{y} years<extra></extra>',
                     name="Heart Disease", showlegend=False),row=1,col=2)

# Plot 3
fig.add_trace(go.Bar(x=x2, y=y1, name='Heart Disease', text=y1, textposition='outside',
                     texttemplate='%{text:.1f}%', width=0.38,
                     hovertemplate='Proportion of %{x} with Heart Disease = %{y:.2f}%<extra></extra>',
                     marker=dict(color='#AF4343', opacity=0.8)), row=2,col=1)
fig.add_trace(go.Bar(x=x2, y=y0, name='No Disease', text=y0, textposition='outside',
                     texttemplate='%{text:.1f}%', width=0.38,
                     hovertemplate='Proportion of %{x} without Heart Disease = %{y:.2f}%<extra></extra>',
                     marker=dict(color='#C6AA97', opacity=0.8)), row=2,col=1)

fig.update_traces(marker=dict(line=dict(width=1, color='#000000')))
fig.update_layout(title="Heart Disease Statistics", showlegend=True, template=temp,
                  legend=dict(orientation="h", yanchor="bottom", y=.4, xanchor="right", x=.97),
                  barmode='group', bargap=.15, height=1000, width=700)
fig.update_yaxes(title="", tickmode = 'array', range=(0, 570), dtick=5, row=1,col=1)
fig.update_yaxes(title="Age, in years", tickmode = 'array', range=(0, 65), dtick=6, row=1,col=2)
fig.update_yaxes(title="", ticksuffix='%',tickmode = 'array', range=(0, 89), dtick=5, row=2,col=1)
fig.show()

In [7]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=2, cols=2,
                    vertical_spacing=0.15,
                    subplot_titles=("Distribution of Age by Heart Disease",
                                    "Distribution of Resting Blood Pressure<br>by Heart Disease",
                                    "Distribution of Cholesterol by Heart Disease",
                                    "Distribution of ST Segment Depression<br>by Heart Disease", ""))

fig.add_trace(go.Histogram(x=heart_var[heart_var.HeartDisease==1]['Age'], histnorm='probability density',
                           marker=dict(color='#AF4343', line=dict(width=1, color='#000000')),
                           opacity=0.8, name="Heart Disease"),
              row=1,col=1)
fig.add_trace(go.Histogram(x=heart_var[heart_var.HeartDisease==0]['Age'], histnorm='probability density',
                           marker=dict(color='#C6AA97', line=dict(width=1, color='#000000')),
                           opacity=0.8, name="No Disease"),
              row=1,col=1)
fig.add_trace(go.Histogram(x=heart_var[heart_var.HeartDisease==1]['RestingBP'], histnorm='probability density',
                           marker=dict(color='#AF4343', line=dict(width=1, color='#000000')),
                           opacity=0.8, name="Heart Disease"),
              row=1,col=2)
fig.add_trace(go.Histogram(x=heart_var[heart_var.HeartDisease==0]['RestingBP'], histnorm='probability density',
                           marker=dict(color='#C6AA97', line=dict(width=1, color='#000000')),
                           opacity=0.8, name="No Disease"),
              row=1,col=2)
fig.add_trace(go.Histogram(x=heart_var[heart_var.HeartDisease==1]['Cholesterol'], histnorm='probability density',
                           marker=dict(color='#AF4343', line=dict(width=1, color='#000000')),
                           opacity=0.8, name="Heart Disease"),
              row=2,col=1)
fig.add_trace(go.Histogram(x=heart_var[heart_var.HeartDisease==0]['Cholesterol'], histnorm='probability density',
                           marker=dict(color='#C6AA97', line=dict(width=1, color='#000000')),
                           opacity=0.8, name="No Disease"),
              row=2,col=1)
fig.add_trace(go.Histogram(x=heart_var[heart_var.HeartDisease==1]['Oldpeak'], histnorm='probability density',
                           marker=dict(color='#AF4343', line=dict(width=1, color='#000000')),
                           opacity=0.8, name="Heart Disease"),
              row=2,col=2)
fig.add_trace(go.Histogram(x=heart_var[heart_var.HeartDisease==0]['Oldpeak'], histnorm='probability density',
                           marker=dict(color='#C6AA97', line=dict(width=1, color='#000000')),
                           opacity=0.8, name="No Disease"),
              row=2,col=2)

fig.update_layout(title_text='Numerical Variable Distribution', template='ggplot2', showlegend=False,
                  barmode='overlay', height=1000, width=700)
fig.update_xaxes(title_text='Age', row=1, col=1)
fig.update_yaxes(title_text='Density', row=1, col=1)
fig.update_xaxes(title_text='RestingBP', row=1, col=2)
fig.update_yaxes(title_text='Density', row=1, col=2)
fig.update_xaxes(title_text='Cholesterol', row=2, col=1)
fig.update_yaxes(title_text='Density', row=2, col=1)
fig.update_xaxes(title_text='Oldpeak', row=2, col=2)
fig.update_yaxes(title_text='Density', row=2, col=2)
fig.show()

In [11]:
import plotly.express as px
import plotly.figure_factory as ff

# Scatter plots
hd=heart_var.HeartDisease.apply(lambda x: 'Heart Disease' if x==1 else 'No Disease')
fig = px.scatter_matrix(heart_var,
                        dimensions=["Age", "Cholesterol", "RestingBP", "MaxHR", "Oldpeak"],
                        color=hd, color_discrete_sequence=['#C6AA97','#AF4343'])
fig.update_traces(marker=dict(line_color='white', size=7, opacity=.7, line_width=0.5),
                  diagonal_visible=False, showupperhalf=False)
fig.update_layout(title='Heart Disease Pair Plots', template=temp,
                  legend=dict(title="",orientation="h", yanchor="bottom", y=.99, xanchor="center", x=.49,
                              traceorder='reversed'), width=700, height=800)
fig.show()

# Correlations
corr=heart_var.corr(numeric_only=True)
x = corr.columns.tolist()
y = corr.index.tolist()
z = corr.values
text = corr.values.round(2)

fig = ff.create_annotated_heatmap(z=z, x=x, y=y, annotation_text=text, colorscale='matter',
                                  reversescale=True, showscale=True,
                                  hovertemplate="Correlation of %{x} and %{y}= %{z:.3f}")
fig.update_layout(template=temp, title="Heart Disease Correlations")
fig.show()