In [1]:
import numpy as np 
import pandas as pd
import altair as alt
from vega_datasets import data

In [2]:
hosp=pd.read_csv('C:/Users/ABC/Desktop/Projects/healthcare analytics/train_data.csv')
hosp.sample(5)

Unnamed: 0,case_id,Hospital_code,Hospital_type_code,City_Code_Hospital,Hospital_region_code,Available Extra Rooms in Hospital,Department,Ward_Type,Ward_Facility_Code,Bed Grade,patientid,City_Code_Patient,Type of Admission,Severity of Illness,Visitors with Patient,Age,Admission_Deposit,Stay
87046,87047,32,f,9,Y,2,radiotherapy,S,B,3.0,17424,8.0,Trauma,Moderate,2,31-40,5812.0,41-50
85642,85643,28,b,11,X,4,anesthesia,R,F,2.0,63504,2.0,Trauma,Moderate,3,0-10,8296.0,21-30
145553,145554,25,e,1,X,2,radiotherapy,Q,E,3.0,21034,10.0,Trauma,Minor,6,31-40,3914.0,51-60
232104,232105,28,b,11,X,5,gynecology,R,F,2.0,101020,15.0,Trauma,Moderate,2,31-40,4057.0,31-40
168269,168270,9,d,5,Z,5,radiotherapy,Q,F,1.0,66447,9.0,Emergency,Moderate,2,11-20,5305.0,21-30


In [19]:
Hospital_code ='HOSP_' + hosp['Hospital_code'].astype(str)  
City_code ='CITY_' + hosp['City_Code_Hospital'].astype(str)

In [26]:
print("Unique Cities:{}".format(City_code.nunique()))
print("Number of unique Hospitals present in the dataset : {}".format(Hospital_code.nunique()))
print("Number of Cases present in the dataset : {}".format(hosp.shape[0]))
print("Number of unique Patients present in the dataset : {}".format(hosp['patientid'].nunique()))

Unique Cities:11
Number of unique Hospitals present in the dataset : 32
Number of Cases present in the dataset : 318438
Number of unique Patients present in the dataset : 92017


In [37]:
Hospital_type=hosp['Hospital_type_code'].value_counts().to_frame().reset_index().rename(columns={'index':'type','Hospital_type_code':'count'})

In [32]:
base = alt.Chart(Hospital_visaits)

bar = base.mark_bar(color='royalblue').encode(
    x=alt.X('total', bin=alt.Bin(step=2500)),
    y='count()',
    tooltip=[alt.Tooltip('count():Q')]
).properties(
    width=600,
    height=400,
    title="Cases observed in each Hospital with Global Mean"
)

rule = base.mark_rule(color='red').encode(
    x='mean(total)',
    size=alt.value(5),
    tooltip=[alt.Tooltip('mean(total):Q')]
)

(bar+rule)

In [35]:
print("Average cases per hospital: {}".format(Hospital_visits['total'].mean()))

Average cases per hospital: 9951.1875


# 32 hospitals had 318438 cases and treated 92017 patients
# Average cases to hospitals are 9951.

Type a hospitals has highest cases and type G has lowest one

In [39]:
Hospital_visits=hosp['Hospital_code'].value_counts().to_frame().reset_index().rename(columns={'index':'hospital','Hospital_code':'total'})
bars = alt.Chart(Hospital_type).mark_bar(color="purple").encode(
    x='type',
    y="count",
    tooltip=[alt.Tooltip('count:Q')]
    
)

text = bars.mark_text(
    align='center',
    baseline='middle',
    dy=-7 ,
    size=15,
).encode(text='count')

(bars + text).properties(
    width=600,
    height=400,
    title="Types of Hospital")

# Cases handled by different hospitals over cities

In [5]:
city_df=hosp.groupby(['City_Code_Hospital','Hospital_code']).size().reset_index().rename(columns={0:'count'})
city_df['Hospital_code']=city_df['Hospital_code'].astype('category')
city_df['City_Code_Hospital']=city_df['City_Code_Hospital'].astype('category')
city_df['hosp_city_code']=city_df['City_Code_Hospital'].astype(str)+"-"+city_df['Hospital_code'].astype(str)

In [6]:
bars=alt.Chart(city_df).mark_bar().encode(
    x='count',
    y='City_Code_Hospital',
    color=alt.Color('hosp_city_code',title='Hospital'),
    tooltip=[alt.Tooltip('hosp_city_code:N'),
             alt.Tooltip('City_Code_Hospital:N'),
             alt.Tooltip('count:Q'),
            ]
    
).properties(
    width=550,
    height=400,
    title="Cases observed in Hospital -  (Hover over each segment of Bar to understand distribution)")

text = alt.Chart(city_df).mark_text(dx=-20, dy=3, color='white').encode(
    x=alt.X('count', stack='zero'),
    y=alt.Y('City_Code_Hospital',title="City"),
    detail='hosp_city_code',
    text=alt.Text('count'))

bars+text

# How Often did patient visited hospital

In [8]:
pat_visit=hosp['patientid'].value_counts().to_frame().reset_index().rename(columns={'index':'patient','patientid':'total'})
chart = alt.layer(
    data=pat_visit[:10]
)
chart += alt.Chart().mark_line().encode(
    alt.X('total:Q',title='Number of Visits'),
    alt.Y('patient:N',title='PatientId'),
    detail='patient:N'
    
).properties(title="Most Visited Patients",width=550,height=200)

chart += alt.Chart().mark_point(color='royalblue',
    size=100,
    opacity=1,
    filled=True
).encode(
   alt.X('total:Q',title='Number of Visits'),
    alt.Y('patient:N',title='PatientId'),
    tooltip=[alt.Tooltip('patient:Q'),
             alt.Tooltip('total:Q')
            ]
)

chart

In [9]:
base = alt.Chart(pat_visit.sample(5000))

bar = base.mark_bar(color='royalblue').encode(
    x=alt.X('total',bin=alt.Bin(step=1),title="Number of Visits"),
    y=alt.Y('count()',title="Number of Patients"),
    tooltip=[alt.Tooltip('total:N'),
            alt.Tooltip('count():Q')]
).properties(
    width=600,
    height=400,
    title="How often a patient visit the same hospital with Global Mean"
)

rule = base.mark_rule(color='red').encode(
    x='mean(total)',
    size=alt.value(5),
    tooltip=[alt.Tooltip('mean(total)')])

bar + rule

# Did patient visited the same hospitals

In [10]:
change_visit=hosp[['patientid','Hospital_code']].groupby('patientid').size().reset_index().rename(columns={0:'total_changes'})
change_visit['total_changes'].describe()

count    92017.000000
mean         3.460643
std          2.357492
min          1.000000
25%          2.000000
50%          3.000000
75%          5.000000
max         50.000000
Name: total_changes, dtype: float64

In [11]:
print("Patients visited the same hospital: {} %".format(round(change_visit[change_visit['total_changes']==1].shape[0]/change_visit.shape[0]*100)))

Patients visited the same hospital: 22 %


# Most Visited Department

In [12]:
dep_df=hosp['Department'].value_counts().to_frame().reset_index().rename(columns={'index':'department','Department':'total'})

In [13]:
alt.Chart(dep_df).transform_joinaggregate(
    TotalTime='sum(total)',
).transform_calculate(
    PercentOfTotalVisits="datum.total / datum.TotalTime"
).mark_bar(color="maroon").encode(
    alt.X('PercentOfTotalVisits:Q', axis=alt.Axis(format='.0%')),
    y='department:N',
    tooltip=[alt.Tooltip('department:N'),
             alt.Tooltip('PercentOfTotalVisits:Q')]
).properties(height=400,width=600,title="Distribution of Departments visited by patients")

# Do old age people stay longer?

In [16]:
stay_bin=hosp['Stay'].value_counts().to_frame().reset_index().rename(columns={'index':'Stay','Stay':'count'})

In [17]:
bars = alt.Chart(stay_bin).mark_bar(color="green").encode(
    x='Stay',
    y=alt.Y("count",title="Number of Days")
)

text = bars.mark_text(
    align='center',
    baseline='middle',
    dy=-7 ,
    size=15,
).encode(
    text='count',
    tooltip=[alt.Tooltip('Stay'),
            alt.Tooltip('count')]
    
)

(bars + text).properties(
    width=600,
    height=400,
    title="Days Stayed by Patients in Hospital "
)