In [1]:
import plotly.express as px
import pandas as pd 
import altair as alt 
import numpy as np
alt.data_transformers.disable_max_rows()
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
df = pd.read_csv('hospital_readmissions.csv')
df.shape

(25000, 17)

In [3]:
df.head(3) 

Unnamed: 0,age,time_in_hospital,n_lab_procedures,n_procedures,n_medications,n_outpatient,n_inpatient,n_emergency,medical_specialty,diag_1,diag_2,diag_3,glucose_test,A1Ctest,change,diabetes_med,readmitted
0,[70-80),8,72,1,18,2,0,0,Missing,Circulatory,Respiratory,Other,no,no,no,yes,no
1,[70-80),3,34,2,13,0,0,0,Other,Other,Other,Other,no,no,no,yes,no
2,[50-60),5,45,0,18,0,0,0,Missing,Circulatory,Circulatory,Circulatory,no,no,yes,yes,yes


In [4]:
# Cleaning 
cols = ['time_in_hospital', 'n_lab_procedures', 'n_procedures', 
        'n_medications', 'n_outpatient', 'n_inpatient', 'n_emergency']
for col in cols:
    df[col] = df[col].astype(float)

clean_df = df.dropna()

In [5]:
df.shape

(25000, 17)

In [6]:
clean_df['glucose_test'].unique()

array(['no', 'normal', 'high'], dtype=object)

In [7]:
data = {'age': clean_df['age'], 
        'glucose': clean_df['glucose_test']}
stackbar_df = pd.DataFrame(data) 
stackbar_df.head(3)

Unnamed: 0,age,glucose
0,[70-80),no
1,[70-80),no
2,[50-60),no


In [8]:
stackbar_df = clean_df.groupby(['age', 'glucose_test']).size().reset_index(name='count')
stackbar_df.head(3)

Unnamed: 0,age,glucose_test,count
0,[40-50),high,82
1,[40-50),no,2391
2,[40-50),normal,59


In [15]:
fig = px.bar(
    stackbar_df, 
    x = 'age',
    y = 'count',
    color = 'glucose_test',
    barmode = 'stack', 
    title = 'Glucose Levels By Age Group',
    height = 500,
    width = 600,
    labels = {'age': 'Age Group', 'count': 'Count', 'glucose_test': 'Glucose Levels'} 
)
#fig.show()

In [14]:
selection = alt.selection_point(fields=['glucose_test'])

bar = alt.Chart(stackbar_df).mark_bar(size=50, stroke='black', strokeWidth=0.2).encode(
    x = alt.X('age:O', title='Ages', axis=alt.Axis(labelAngle=0)),
    y = alt.Y('count:Q', title='Counts'),
    color = alt.condition(selection, 
                          alt.Color('glucose_test:N', legend = None, scale=alt.Scale(range=['red', '#d3d3d3', 'green'])), 
                          alt.value('grey')),
    opacity = alt.condition(selection, alt.value(1), alt.value(0.1)),
    tooltip = [
        alt.Tooltip('glucose_test', title='Glucose Test'),
        alt.Tooltip('count', title='Count')]
).properties(title = 'Glucose Level Amongst Age Groups',
             width = 400, 
             height = 300
            ).transform_filter(selection)

legend = alt.Chart(stackbar_df).mark_rect().encode(
    y = alt.Y('glucose_test', title='Glucose Level'),
    color = alt.condition(selection, alt.Color('glucose_test:N').legend(None), alt.value('grey'))
).add_params(
    selection
)

chart = (bar | legend).configure_title(
    font='Times New Roman',
    fontSize=15
).configure_axis(
    labelFont='Times New Roman',
    titleFont='Times New Roman'
).configure_legend(
    labelFont='Times New Roman',
    titleFont='Times New Roman'
)
chart

In [11]:
line_df = clean_df.groupby(['time_in_hospital', 'age'])['n_lab_procedures'].mean().reset_index(name='procedure_avg_by_hospital_time').round(2)
line_df.head(3)

Unnamed: 0,time_in_hospital,age,procedure_avg_by_hospital_time
0,1.0,[40-50),33.33
1,1.0,[50-60),32.37
2,1.0,[60-70),32.24


In [13]:
input_radio = alt.binding_radio(options = [None, '[40-50)', '[50-60)', '[60-70)', '[70-80)','[80-90)', '[90-100)'],
                                    labels = ['All','[40-50]', '[50,60]', '[60,70]', '[70-80]','[80-90]', '[90-100]'],
                                     name = "Age: " )

selection1 = alt.selection_point(fields = ['age'], bind = input_radio)
selection2 = alt.selection_point(on='mouseover', fields=['age'])

line = alt.Chart(line_df).mark_line().encode(
    x = alt.X('time_in_hospital:Q', title = "Time in Hospital"),
    y = alt.Y('procedure_avg_by_hospital_time:Q', title = 'Average Number of Lab Procedures', scale=alt.Scale(domain=[20, 70])),
    color = alt.Color('age:O', title = "Age"),
    opacity = alt.condition(selection2, alt.value(1), alt.value(0.1))
).properties(title = 'Average Procedures During Hospital Stay by Age',
             width = 400, 
             height = 300
            ).add_params(selection1, selection2
                   ).transform_filter(selection1)

point = alt.Chart(line_df).mark_point(size = 8).encode(
    x = alt.X('time_in_hospital:Q', title = "Time in Hospital"),
    y = alt.Y('procedure_avg_by_hospital_time:Q', title = 'Average Number of Lab Procedures', scale=alt.Scale(domain=[20, 70])),
    color = alt.Color('age:O', title = "Age"),
    tooltip = [
        alt.Tooltip('age', title='Patient Age'), 
        alt.Tooltip('time_in_hospital', title='Time in Hospital'), 
        alt.Tooltip('procedure_avg_by_hospital_time', title= 'Average Procedure Done')],
    opacity = alt.condition(selection2, alt.value(1), alt.value(0.1))
).properties(title = 'Average Procedures During Hospital Stay by Age',
             width = 400, 
             height = 300
            ).add_params(selection1, selection2
                   ).transform_filter(selection1)

chart = (line + point).configure_title(
    font='Times New Roman',
    fontSize=15
).configure_axis(
    labelFont='Times New Roman',
    titleFont='Times New Roman'
).configure_legend(
    labelFont='Times New Roman',
    titleFont='Times New Roman'
)

chart

In [1]:
import pandas as pd

# Read data
df = pd.read_csv("hospital_readmissions.csv")

# Convert to numeric
cols = ['time_in_hospital', 'n_lab_procedures', 'n_procedures', 
        'n_medications', 'n_outpatient', 'n_inpatient', 'n_emergency']

for col in cols:
    df[col] = df[col].astype(float)

# Clean data
clean_df = df.dropna().copy()


clean_df['is_readmitted'] = clean_df['readmitted'].apply(lambda x: 1 if x == 'yes' else 0)
clean_df = clean_df[clean_df['medical_specialty'] != 'Missing'].copy()
specialty_stats = clean_df.groupby('medical_specialty').filter(lambda x: len(x) > 50)
specialty_agg = specialty_stats.groupby('medical_specialty').agg(
    readmission_rate=('is_readmitted', 'mean'),
    count=('medical_specialty', 'count')
).reset_index()


top_10_specialties = specialty_agg.nlargest(10, 'readmission_rate')
overall_rate = clean_df['is_readmitted'].mean()
top_10_specialties.to_csv('specialty_readmission.csv', index=False)


print(f"Overall readmission rate: {overall_rate:.1%}")
print("\nTop 10 specialties (without Missing):")
print(top_10_specialties)

Overall readmission rate: 45.2%

Top 10 specialties (without Missing):
        medical_specialty  readmission_rate  count
2  Family/GeneralPractice          0.495218   1882
1        Emergency/Trauma          0.493899   1885
0              Cardiology          0.449965   1409
3        InternalMedicine          0.447686   3565
4                   Other          0.414790   2664
5                 Surgery          0.412201   1213
