In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from itertools import combinations

df = pd.read_csv('data/heart.csv')
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221,0,1,164,1,0.0,2,0,2,1
1021,60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
1022,47,1,0,110,275,0,0,118,1,1.0,1,1,2,0
1023,50,0,0,110,254,0,0,159,0,0.0,2,0,2,1


In [17]:
# Calculate percentages for each cp value
cp_target_counts = df.groupby(['cp', 'target']).size().reset_index(name='counts')
cp_totals = df.groupby('cp').size().reset_index(name='total')
cp_target_counts = cp_target_counts.merge(cp_totals, on='cp')
cp_target_counts['percentage'] = (cp_target_counts['counts'] / cp_target_counts['total']) * 100

# Convert target to string for discrete colors
cp_target_counts['target'] = cp_target_counts['target'].astype(str)

# Sort by target in descending order to put '1' at the bottom
cp_target_counts = cp_target_counts.sort_values('target', ascending=False)

# Create stacked bar chart with percentages
fig = px.bar(cp_target_counts, x='cp', y='percentage', color='target', 
             labels={'cp': 'Chest Pain Type', 'percentage': 'Percentage (%)', 'target': 'Heart Disease'},
             title='Percentage Distribution of Heart Disease by Chest Pain Type',
             color_discrete_map={'1': '#EF553B', '0': '#00CC96'},
             text='percentage')

fig.update_traces(texttemplate='%{text:.1f}%', textposition='inside')
fig.update_layout(yaxis_range=[0, 100], xaxis=dict(tickmode='array', tickvals=[0,1,2,3]))
fig.show()



