In [1]:
import pandas as pd
import warnings
from IPython.utils import io
import sys
import numpy as np

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

stars_dir = '~/GitHub/stars-data-builder/'

## Load SAS pack primary output data

In [2]:
# 2025 -- prognosticated

df_2025 = pd.read_csv(stars_dir + 'Reproduce_Stars_Input/2025/SAS_output/CMS_Stars_2025_predictions_from_Oct_2024_data.csv')
df_2025['PROVIDER_ID'] = df_2025['PROVIDER_ID'].astype(str)
df_2025 = df_2025.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2025.columns = df_2025.columns.str.strip()

## Replace the imputed 666666 suffixes of VHA hospitals with their original 'F' suffix
prvdrs = []
for p in df_2025['PROVIDER_ID'].tolist():
    p = str(p)
    if '666666' in p:
        p = p[:-6]
        p = p + 'F'
    while len(p) < 6:
        p = '0' + p
    prvdrs.append(p)
    
df_2025['PROVIDER_ID'] = prvdrs

for i in list(df_2025):
    if i == 'cnt_grp':
        df_2025[i] = df_2025[i].astype(str).str[0]
    if i == 'PROVIDER_ID':
        pass
    else:
        df_2025[i] = pd.to_numeric(df_2025[i], errors='coerce')

df_2025['Release year'] = ['2025']*df_2025.shape[0]
df_2025 = df_2025[~df_2025['star'].isin([np.nan, float("NaN")])]
prvdrs_2025 = df_2025['PROVIDER_ID'].unique()

print(len(prvdrs_2025), 'hospitals with stars predictions for 2025')
df_2025.head()


2900 hospitals with stars predictions for 2025


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year
0,10001,0.297087,0.359381,0.345929,0.031544,-0.605072,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.154858,7,7,11,8,10,5,2,1,3.0,4.0,2025
1,10005,-0.842133,0.628147,-0.398706,0.114241,-0.331341,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.14942,6,7,9,8,11,5,2,1,3.0,3.0,2025
2,10006,-1.540552,-0.10599,0.685865,-1.307398,-0.438027,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.55154,7,8,9,8,9,5,2,1,3.0,2.0,2025
3,10007,-3.327859,-0.931837,-0.320822,1.283103,-3.018509,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-1.087652,3,3,7,8,7,5,2,1,3.0,1.0,2025
5,10011,-0.553031,-0.59124,0.30465,-0.043858,-1.017479,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.316463,7,7,9,8,7,5,2,1,3.0,2.0,2025


In [3]:
# 2024 -- actual

df_2024 = pd.read_csv(stars_dir + '2024/2024-07 Stars Release/SAS_CSV_output/CMS_Stars_Jul_2024.csv')
df_2024['PROVIDER_ID'] = df_2024['PROVIDER_ID'].astype(str)
df_2024 = df_2024.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2024.columns = df_2024.columns.str.strip()

prvdrs = df_2024['PROVIDER_ID'].astype(str).tolist()
for i, p in enumerate(prvdrs):
    if len(p) < 6:
        prvdrs[i] = '0' + p
df_2024['PROVIDER_ID'] = prvdrs

for i in list(df_2024):
    if i == 'cnt_grp':
        df_2024[i] = df_2024[i].astype(str).str[0]
    if i == 'PROVIDER_ID':
        pass
    else:
        df_2024[i] = pd.to_numeric(df_2024[i], errors='coerce')

df_2024['Release year'] = ['2024']*df_2024.shape[0]
df_2024 = df_2024[~df_2024['star'].isin([np.nan, float("NaN")])]
prvdrs_2024 = df_2024['PROVIDER_ID'].unique()

print(len(prvdrs_2024), 'hospitals with stars predictions for 2024')
df_2024.head()


2847 hospitals with stars predictions for 2024


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year
0,10001,0.007071,0.230867,-0.146867,0.137779,-0.677624,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.030968,7,7,11,8,10,5,2,1,3.0,3.0,2024
1,10005,-1.440587,0.720263,-0.08772,-0.255125,-0.489001,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.292577,6,7,9,8,12,5,2,1,3.0,2.0,2024
2,10006,-1.462748,-0.269475,-0.173331,-1.097088,-0.754912,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.751171,7,7,9,8,11,5,2,1,3.0,1.0,2024
3,10007,-3.527615,-0.470903,0.393999,0.199767,-1.804879,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.965631,3,2,7,8,7,4,1,1,2.0,1.0,2024
5,10011,-0.47672,-0.402191,0.346623,-0.030498,-1.046286,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.249367,7,7,9,8,8,5,2,1,3.0,3.0,2024


In [4]:
# 2023 -- actual

df_2023 = pd.read_csv(stars_dir + '2023/2023-07 Stars Release/SAS_CSV_output/CMS_Stars_Jul_2023.csv')
print(df_2023.shape)
df_2023['PROVIDER_ID'] = df_2023['PROVIDER_ID'].astype(str)
df_2023 = df_2023.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2023.columns = df_2023.columns.str.strip()
print(df_2023.shape)

prvdrs = df_2023['PROVIDER_ID'].astype(str).tolist()
for i, p in enumerate(prvdrs):
    if len(p) < 6:
        prvdrs[i] = '0' + p
df_2023['PROVIDER_ID'] = prvdrs

for i in list(df_2023):
    if i == 'cnt_grp':
        df_2023[i] = df_2023[i].astype(str).str[0]
    if i == 'PROVIDER_ID':
        pass
    else:
        df_2023[i] = pd.to_numeric(df_2023[i], errors='coerce')

df_2023['Release year'] = ['2023']*df_2023.shape[0]
df_2023 = df_2023[~df_2023['star'].isin([np.nan, float("NaN")])]
prvdrs_2023 = df_2023['PROVIDER_ID'].unique()

print(len(prvdrs_2023), 'hospitals with a star ratings in 2023')
df_2023.head()


(4654, 27)
(4654, 27)
3076 hospitals with a star ratings in 2023


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year
0,10001,-0.647058,0.284081,0.312074,-0.127836,-1.024044,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.162208,7,8,11,8,10,5,2,1,3.0,3.0,2023
1,10005,-1.564103,0.560369,-0.237844,-0.166838,-0.302742,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.346181,6,7,10,8,11,5,2,1,3.0,2.0,2023
2,10006,-1.694318,-0.554988,-0.089526,-1.241108,-0.17935,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.809109,7,7,9,8,11,5,2,1,3.0,1.0,2023
3,10007,-2.40715,-0.488553,0.022657,0.993806,-0.65976,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.492604,3,2,6,8,7,4,1,1,2.0,2.0,2023
5,10011,-0.517349,-0.624302,0.42877,0.134223,-2.385055,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.413511,7,7,9,8,8,5,2,1,3.0,2.0,2023


In [5]:
# 2022 - actual

df_2022 = pd.read_csv(stars_dir + '2022/2022-07 Stars Release/SAS_CSV_output/CMS_Stars_Jul_2022.csv')
df_2022['PROVIDER_ID'] = df_2022['PROVIDER_ID'].astype(int).astype(str)
df_2022 = df_2022.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2022.columns = df_2022.columns.str.strip()

prvdrs = df_2022['PROVIDER_ID'].astype(str).tolist()
for i, p in enumerate(prvdrs):
    if len(p) < 6:
        prvdrs[i] = '0' + p
df_2022['PROVIDER_ID'] = prvdrs

for i in list(df_2022):
    if i == 'cnt_grp':
        df_2022[i] = df_2022[i].astype(str).str[0]
    if i == 'PROVIDER_ID':
        pass
    else:
        df_2022[i] = pd.to_numeric(df_2022[i], errors='coerce')

df_2022['Release year'] = ['2022']*df_2022.shape[0]
df_2022 = df_2022[~df_2022['star'].isin([np.nan, float("NaN")])]
prvdrs_2022 = df_2022['PROVIDER_ID'].unique()

print(len(prvdrs_2022), 'hospitals with a star ratings in 2022')
df_2022.head()


3121 hospitals with a star ratings in 2022


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year
0,10001,-0.846744,-0.321758,0.591483,0.0721,-0.320038,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.149487,7,8,11,8,9,5,2,1,3.0,3.0,2022
1,10005,-1.354201,1.419195,-0.145974,0.056106,0.003965,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.004997,6,7,10,8,11,5,2,1,3.0,3.0,2022
2,10006,-1.645507,0.033444,-0.269957,-0.966379,0.02668,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.623446,7,7,9,8,9,5,2,1,3.0,2.0,2022
3,10007,-1.663574,-0.804531,1.163333,0.357169,0.841066,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.107545,3,2,6,8,5,4,1,1,2.0,3.0,2022
5,10011,-1.032452,-1.670115,0.164188,-0.31908,-1.17147,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.769217,7,7,9,8,5,5,2,1,3.0,2.0,2022


In [6]:
# 2021 - actual

df_2021 = pd.read_csv(stars_dir + '2021/2021-04 Stars Release/SAS_CSV_output/CMS_Stars_Apr_2021.csv')
df_2021['PROVIDER_ID'] = df_2021['PROVIDER_ID'].astype(int).astype(str)
df_2021 = df_2021.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2021.columns = df_2021.columns.str.strip()

prvdrs = df_2021['PROVIDER_ID'].astype(str).tolist()
for i, p in enumerate(prvdrs):
    if len(p) < 6:
        prvdrs[i] = '0' + p
df_2021['PROVIDER_ID'] = prvdrs

for i in list(df_2021):
    if i == 'cnt_grp':
        df_2021[i] = df_2021[i].astype(str).str[0]
    if i == 'PROVIDER_ID':
        pass
    else:
        df_2021[i] = pd.to_numeric(df_2021[i], errors='coerce')

df_2021['Release year'] = ['2021']*df_2021.shape[0]
df_2021 = df_2021[~df_2021['star'].isin([np.nan, float("NaN")])]
prvdrs_2021 = df_2021['PROVIDER_ID'].unique()

print(len(prvdrs_2021), 'hospitals with a star ratings in 2021')
df_2021.head()

3355 hospitals with a star ratings in 2021


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year
0,10001,-0.752932,-0.070355,0.556127,0.071425,-0.050316,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.0491,7,8,11,8,11,5,2,1,3.0,3.0,2021
1,10005,-2.799407,0.064559,-0.114854,0.055327,-0.071058,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.62329,6,7,10,8,14,5,2,1,3.0,2.0,2021
2,10006,-1.984738,-0.079649,0.302983,-0.966582,-0.462672,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.655678,7,7,9,8,11,5,2,1,3.0,2.0,2021
3,10007,-2.050643,-0.885254,1.200369,0.356272,-0.642649,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.380554,4,2,6,8,7,4,1,1,2.0,2.0,2021
4,10008,-1.612519,0.261221,-0.00278,,-0.867725,0.22,0.22,0.22,0.22,0.12,,0.282051,0.282051,0.282051,0.153846,-0.515416,3,1,4,0,8,3,1,1,1.0,2.0,2021


## Merge 2025 with 2024, 2023, 2022, and 2021

In [7]:
main_df = df_2025.merge(df_2024, how='outer')
main_df = main_df.merge(df_2023, how='outer')
main_df = main_df.merge(df_2022, how='outer')
main_df = main_df.merge(df_2021, how='outer')

print(df_2021.shape)
print(df_2022.shape)
print(df_2023.shape)
print(df_2024.shape)
print(df_2025.shape)
print(main_df.shape)

main_df.sort_values(by=['summary_score', 'cnt_grp', 'star'], inplace=True, ascending=False)
print(list(main_df))

prvdrs = main_df['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in main_df')

main_df.head()

(3355, 28)
(3121, 28)
(3076, 28)
(2847, 28)
(2900, 28)
(15299, 28)
['PROVIDER_ID', 'Std_Outcomes_Mortality_score', 'Std_Outcomes_Readmission_score', 'Std_Outcomes_Safety_score', 'Std_PatientExp_score', 'Std_Process_score', 'std_weight_PatientExperience', 'std_weight_Readmission', 'std_weight_Mortality', 'std_weight_safety', 'std_weight_Process', 'weight_PatientExperience', 'weight_Outcomes_Readmission', 'weight_Outcomes_Mortality', 'weight_Outcomes_Safety', 'weight_Process', 'summary_score', 'Outcomes_Mortality_cnt', 'Outcomes_safety_cnt', 'Outcomes_Readmission_cnt', 'Patient_Experience_cnt', 'Process_cnt', 'Total_measure_group_cnt', 'MortSafe_Group_cnt', 'report_indicator', 'cnt_grp', 'star', 'Release year']
3562 hospitals in main_df


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year
6857,170183,,4.290477,2.032927,2.223623,0.873621,0.22,0.22,0.22,0.22,0.12,0.282051,0.282051,,0.282051,0.153846,2.545103,0,3,3,8,3,4,1,1,2.0,5.0,2023
7683,330270,0.546448,7.145042,1.162883,1.589132,1.830755,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,2.517262,1,5,3,8,3,4,1,1,2.0,5.0,2023
14053,330270,2.882665,5.122073,1.454899,1.353749,0.20903,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,2.404028,1,6,3,8,2,3,1,1,1.0,5.0,2021
14289,360351,,3.795795,1.824891,1.475751,2.547755,0.22,0.22,0.22,0.22,0.12,0.282051,0.282051,,0.282051,0.153846,2.393521,0,3,3,8,2,3,1,1,1.0,5.0,2021
9935,170183,,2.762484,2.093786,2.082586,1.056321,0.22,0.22,0.22,0.22,0.12,0.282051,0.282051,,0.282051,0.153846,2.119624,0,3,3,8,1,3,1,1,1.0,5.0,2022


## Load SAS pack secondary (domain-specific) files
These data files pertain to standardized scores for each component of a particular measure domain.

In [8]:
# For 2025 -- predicted

fnames = ['outcome_readmission', 'outcome_safety', 'process', 'ptexp']
f_ext = ['readmission_', 'safety_', 'process_', 'patient_exp_']

with io.capture_output() as captured: 
    df_2025 = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2025/predicted/Oct2024/outcome_mortality.sas7bdat')
    #df_2025 = df_2025.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    ls = []
    for p in df_2025['PROVIDER_ID'].tolist():
        p = int(p)
        p = str(p)
        if '666666' in p:
            p = p[:-6]
            p = p + 'F'
        while len(p) < 6:
            p = '0' + p
        ls.append(p)
    df_2025['PROVIDER_ID'] = ls
    

    
cols = list(df_2025)
new_cols = []
ind = cols.index('C1')
for i, c in enumerate(cols):
    if i >= ind:
        c = 'mortality_' + c
    new_cols.append(c)
print(new_cols)
df_2025.columns = new_cols

for j, f in enumerate(fnames):
    with io.capture_output() as captured: tdf = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2025/predicted/Oct2024/' + f + '.sas7bdat')
    #tdf = tdf.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    ls = []
    for p in tdf['PROVIDER_ID'].tolist():
        p = int(p)
        p = str(p)
        if '666666' in p:
            p = p[:-6]
            p = p + 'F'
        while len(p) < 6:
            p = '0' + p
        ls.append(p)
    tdf['PROVIDER_ID'] = ls
    
    cols = list(tdf)
    new_cols = []
    ind = cols.index('C1')
    for i, c in enumerate(cols):
        if i >= ind:
            c = f_ext[j] + c
        new_cols.append(c)
    print(new_cols)
    tdf.columns = new_cols

    df_2025 = df_2025.merge(tdf, on='PROVIDER_ID', how='outer')
    
df_2025['Release year'] = ['2025']*df_2025.shape[0]
df_2025 = df_2025.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2025 = df_2025[df_2025['PROVIDER_ID'].isin(prvdrs_2025)]
prvdrs = df_2025['PROVIDER_ID'].unique()

print(len(prvdrs), 'hospitals in 2025')
df_2025.head()


['PROVIDER_ID', 'std_MORT_30_AMI', 'std_MORT_30_CABG', 'std_MORT_30_COPD', 'std_MORT_30_HF', 'std_MORT_30_PN', 'std_MORT_30_STK', 'std_PSI_4_SURG_COMP', 'mortality_C1', 'mortality_C2', 'mortality_C3', 'mortality_C4', 'mortality_C5', 'mortality_C6', 'mortality_C7', 'mortality_total_cnt', 'mortality_measure_wt', 'mortality_score_before_std', 'mortality_Mean', 'mortality_StdDev', 'mortality_grp_score']
['PROVIDER_ID', 'std_EDAC_30_AMI', 'std_EDAC_30_HF', 'std_EDAC_30_PN', 'std_OP_32', 'std_READM_30_CABG', 'std_READM_30_COPD', 'std_READM_30_HIP_KNEE', 'std_READM_30_HOSP_WIDE', 'std_OP_35_ADM', 'std_OP_35_ED', 'std_OP_36', 'readmission_C1', 'readmission_C2', 'readmission_C3', 'readmission_C4', 'readmission_C5', 'readmission_C6', 'readmission_C7', 'readmission_C8', 'readmission_C9', 'readmission_C10', 'readmission_C11', 'readmission_total_cnt', 'readmission_measure_wt', 'readmission_score_before_std', 'readmission_Mean', 'readmission_StdDev', 'readmission_grp_score']
['PROVIDER_ID', 'std_COM

Unnamed: 0,PROVIDER_ID,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_HCP_COVID_19,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_PC_01,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,Release year
0,10001,1.406667,-1.04537,1.012712,0.652001,-0.313538,0.023985,-0.732699,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,0.143394,-0.073481,0.730006,0.297087,1.021693,-0.36918,-0.44784,0.291152,0.607499,0.86006,0.191466,0.767466,-1.029383,0.627143,-0.259345,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.205521,0.013485,0.534354,0.359381,0.696915,0.303101,0.74229,-0.468871,,0.477107,-0.098662,0.103228,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.25073,-0.019735,0.781849,0.345929,-0.524696,1.04392,0.080311,0.781915,-1.064893,-1.1425,,-3.168749,0.589498,0.141249,0.446595,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,10.0,0.1,-0.281735,0.034345,0.522385,-0.605072,-1.160819,0.560234,-0.240795,-0.264121,-0.32048,0.919364,0.711438,0.013951,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.027346,1.301665e-17,0.866926,0.031544,2025
1,10005,-1.493176,,0.151652,-0.365326,-1.763074,-0.0829,-0.576635,1.0,0.0,1.0,1.0,1.0,1.0,1.0,6.0,0.166667,-0.688243,-0.073481,0.730006,-0.842133,,-0.331102,0.548845,-1.034497,,2.449658,1.035812,1.652476,1.889286,-0.072125,-2.996117,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,9.0,0.111111,0.349137,0.013485,0.534354,0.628147,1.155349,-0.333682,-1.066986,1.1482,,-2.746595,-0.418689,-0.057836,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.331463,-0.019735,0.781849,-0.398706,-0.395655,-0.264002,-1.616515,0.374613,0.300777,-0.272553,-0.094732,0.33368,-1.160754,0.346866,0.922113,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.138742,0.034345,0.522385,-0.331341,-0.252167,0.560234,-1.177965,0.752728,0.66872,-0.048276,-0.233316,0.522354,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.099039,1.301665e-17,0.866926,0.114241,2025
2,10006,-3.139032,-2.730472,-0.510702,-0.134116,-0.624153,-0.243229,-1.004953,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-1.198094,-0.073481,0.730006,-1.540552,-0.543063,0.368574,-0.017553,1.106936,-0.951609,0.86006,0.191466,-0.117544,,,-1.285635,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.043152,0.013485,0.534354,-0.10599,-0.831198,0.977982,1.031569,1.1482,1.108414,0.595968,0.964283,-0.863156,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.516508,-0.019735,0.781849,0.685865,-0.524696,-0.590982,-1.065046,0.316427,-0.316581,0.597395,,-0.45258,,0.552484,-0.266681,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,9.0,0.111111,-0.194473,0.034345,0.522385,-0.438027,-1.160819,-1.449464,-1.177965,-1.28097,-1.30968,-1.015916,-1.178071,-0.494452,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-1.133417,1.301665e-17,0.866926,-1.307398,2025
3,10007,,,-2.69647,-1.012717,-3.799328,,,0.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0,0.333333,-2.502838,-0.073481,0.730006,-3.327859,,-0.859429,-0.763969,-0.218713,,-0.431487,0.191466,-0.707551,,,-0.601442,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,7.0,0.142857,-0.484446,0.013485,0.534354,-0.931837,0.08567,,,,,,-1.269045,0.371668,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,0.333333,-0.270569,-0.019735,0.781849,-0.320822,-0.524696,-2.770851,-0.322685,,0.581394,-0.707526,,-4.884224,,,-2.168752,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,7.0,0.142857,-1.542477,0.034345,0.522385,-3.018509,1.565137,1.565082,0.696376,1.769578,1.657921,0.919364,0.711438,0.013951,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,1.112356,1.301665e-17,0.866926,1.283103,2025
5,10011,-0.160816,1.000826,-0.775644,-0.966475,-1.555997,0.451529,-1.333802,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.477197,-0.073481,0.730006,-0.553031,0.941154,-1.292562,-0.096585,0.393125,-1.431334,-0.133438,0.360335,-1.887565,,,0.424848,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.302447,0.013485,0.534354,-0.59124,-0.525576,-0.03434,0.519768,1.1482,,0.27151,-0.114664,0.264292,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.218456,-0.019735,0.781849,0.30465,-0.484991,,-0.428737,,-0.559782,-1.1425,-1.45485,0.619592,,,-0.028923,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,7.0,0.142857,-0.49717,0.034345,0.522385,-1.017479,-0.252167,0.560234,-0.240795,-0.264121,0.66872,-0.048276,-0.233316,-0.494452,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.038022,1.301665e-17,0.866926,-0.043858,2025


In [9]:
# For 2024 -- actual

fnames = ['outcome_readmission', 'outcome_safety', 'process', 'ptexp']
f_ext = ['readmission_', 'safety_', 'process_', 'patient_exp_']

with io.capture_output() as captured: 
    df_2024 = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2024/actual/outcome_mortality.sas7bdat')
    #df_2024 = df_2024.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    ls = []
    for p in df_2024['PROVIDER_ID'].tolist():
        if len(p) < 6: 
            p = '0' + p
        ls.append(p)
    df_2024['PROVIDER_ID'] = ls
    

    
cols = list(df_2024)
new_cols = []
ind = cols.index('C1')
for i, c in enumerate(cols):
    if i >= ind:
        c = 'mortality_' + c
    new_cols.append(c)
print(new_cols)
df_2024.columns = new_cols

for j, f in enumerate(fnames):
    with io.capture_output() as captured: tdf = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2024/actual/' + f + '.sas7bdat')
    #tdf = tdf.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    ls = []
    for p in df_2024['PROVIDER_ID'].tolist():
        if len(p) < 6: 
            p = '0' + p
        ls.append(p)
    df_2024['PROVIDER_ID'] = ls
    
    cols = list(tdf)
    new_cols = []
    ind = cols.index('C1')
    for i, c in enumerate(cols):
        if i >= ind:
            c = f_ext[j] + c
        new_cols.append(c)
    print(new_cols)
    tdf.columns = new_cols

    df_2024 = df_2024.merge(tdf, on='PROVIDER_ID', how='outer')
    
df_2024['Release year'] = ['2024']*df_2024.shape[0]
df_2024 = df_2024.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2024 = df_2024[df_2024['PROVIDER_ID'].isin(prvdrs_2024)]
prvdrs = df_2024['PROVIDER_ID'].unique()

print(len(prvdrs), 'hospitals in 2024')
df_2024.head()


['PROVIDER_ID', 'std_MORT_30_AMI', 'std_MORT_30_CABG', 'std_MORT_30_COPD', 'std_MORT_30_HF', 'std_MORT_30_PN', 'std_MORT_30_STK', 'std_PSI_4_SURG_COMP', 'mortality_C1', 'mortality_C2', 'mortality_C3', 'mortality_C4', 'mortality_C5', 'mortality_C6', 'mortality_C7', 'mortality_total_cnt', 'mortality_measure_wt', 'mortality_score_before_std', 'mortality_Mean', 'mortality_StdDev', 'mortality_grp_score']
['PROVIDER_ID', 'std_EDAC_30_AMI', 'std_EDAC_30_HF', 'std_EDAC_30_PN', 'std_OP_32', 'std_READM_30_CABG', 'std_READM_30_COPD', 'std_READM_30_HIP_KNEE', 'std_READM_30_HOSP_WIDE', 'std_OP_35_ADM', 'std_OP_35_ED', 'std_OP_36', 'readmission_C1', 'readmission_C2', 'readmission_C3', 'readmission_C4', 'readmission_C5', 'readmission_C6', 'readmission_C7', 'readmission_C8', 'readmission_C9', 'readmission_C10', 'readmission_C11', 'readmission_total_cnt', 'readmission_measure_wt', 'readmission_score_before_std', 'readmission_Mean', 'readmission_StdDev', 'readmission_grp_score']
['PROVIDER_ID', 'std_COM

Unnamed: 0,PROVIDER_ID,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_HCP_COVID_19,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_3B,std_OP_8,std_PC_01,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_C12,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,Release year
0,10001,0.470514,-1.535074,0.282996,1.510467,0.129818,-0.554262,-0.748332,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.063411,-0.068391,0.704371,0.007071,1.123231,-0.791585,-0.63392,0.29207,0.584835,0.293805,0.886353,0.423838,-1.03233,0.626949,-0.252865,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.138216,0.014038,0.537879,0.230867,0.879845,-0.232451,0.488068,-0.71166,,-0.210159,0.003034,-1.162386,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.135101,-0.021526,0.773315,-0.146867,-0.431525,0.940083,-0.029295,0.419351,-0.942734,-1.140182,,-3.129707,,0.000412,0.583665,0.371166,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,10.0,0.1,-0.335877,0.029039,0.538522,-0.677624,-0.373358,-0.400063,-0.243371,0.124487,0.479704,0.876397,0.133873,0.361496,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.119895,1.093644e-15,0.870204,0.137779,2024
1,10005,-0.914388,,-0.524963,-1.584524,-1.953144,-0.829076,-0.692504,1.0,0.0,1.0,1.0,1.0,1.0,1.0,6.0,0.166667,-1.0831,-0.068391,0.704371,-1.440587,,-0.736899,0.486741,-1.03689,,2.375902,1.641861,2.033114,1.890125,-0.073504,-2.967381,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,9.0,0.111111,0.401452,0.014038,0.537879,0.720263,1.609414,-2.431638,-0.539793,0.114598,,0.421669,0.024071,0.176149,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.089361,-0.021526,0.773315,-0.08772,-0.722505,0.123623,-1.210843,-0.383371,0.310092,-0.273613,-0.62181,0.33593,0.277803,-1.342585,0.090196,0.605497,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,12.0,0.083333,-0.234299,0.029039,0.538522,-0.489001,-0.373358,0.659449,-1.990779,0.124487,0.479704,-0.124536,-0.390422,-0.160629,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.222011,1.093644e-15,0.870204,-0.255125,2024
2,10006,-3.424522,-0.831956,-0.524963,-0.346527,-0.4597,-1.873369,-0.229927,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-1.098709,-0.068391,0.704371,-1.462748,-1.118873,0.39047,0.268626,1.109892,-1.462457,1.508362,-1.946801,1.343425,,,-1.270808,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.130907,0.014038,0.537879,-0.269475,-2.585604,0.836443,0.4962,1.213392,,-1.028257,0.918161,-0.939297,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.155566,-0.021526,0.773315,-0.173331,-0.832877,-0.583976,-0.830344,-0.555383,-0.107517,0.592956,0.242409,-0.44207,,-1.134905,-0.403273,-0.097497,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,11.0,0.090909,-0.377498,0.029039,0.538522,-0.754912,-1.363769,-0.400063,-1.117075,-0.950959,-0.558423,-1.125468,-1.439013,-0.682755,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.954691,1.093644e-15,0.870204,-1.097088,2024
3,10007,,,-3.316094,-0.346527,-3.996805,,,0.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0,0.333333,-2.553142,-0.068391,0.704371,-3.527615,,0.272686,0.607081,-0.219069,,-0.573736,0.130845,-1.300386,,,-0.592179,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,7.0,0.142857,-0.239251,0.014038,0.537879,-0.470903,,,,,,,0.278624,0.287694,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,0.5,0.283159,-0.021526,0.773315,0.393999,-2.789467,-1.346006,0.511414,,0.546132,-0.706898,,-4.827162,,,,2.011485,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,7.0,0.142857,-0.942929,0.029039,0.538522,-1.804879,-0.373358,1.718961,0.630332,0.124487,-0.558423,-0.124536,0.133873,-0.160629,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.173838,1.093644e-15,0.870204,0.199767,2024
5,10011,-0.568162,0.574279,-0.818766,-0.862359,-1.009916,0.819808,-0.964139,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.404179,-0.068391,0.704371,-0.47672,0.793358,-0.900956,-0.291705,0.394298,-1.677962,-0.313474,0.130845,-0.380799,,,0.425764,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.202292,0.014038,0.537879,-0.402191,-0.579291,-0.201752,0.219719,0.83415,,0.15493,-0.272556,1.570457,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.246522,-0.021526,0.773315,0.346623,0.100266,0.178053,0.05081,,-0.924578,-1.140182,-2.299412,0.61884,,,,-0.859074,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,8.0,0.125,-0.534409,0.029039,0.538522,-1.046286,-0.373358,-0.400063,0.630332,0.124487,0.479704,-0.124536,0.133873,-0.682755,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.02654,1.093644e-15,0.870204,-0.030498,2024


In [10]:
# For 2023

fnames = ['outcome_readmission', 'outcome_safety', 'process', 'ptexp']
f_ext = ['readmission_', 'safety_', 'process_', 'patient_exp_']

with io.capture_output() as captured: 
    df_2023 = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2023/actual/outcome_mortality.sas7bdat')
    #df_2023 = df_2023.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    ls = []
    for p in df_2023['PROVIDER_ID'].tolist():
        if len(p) < 6: 
            p = '0' + p
        ls.append(p)
    df_2023['PROVIDER_ID'] = ls
    
    
cols = list(df_2023)
new_cols = []
ind = cols.index('C1')
for i, c in enumerate(cols):
    if i >= ind:
        c = 'mortality_' + c
    new_cols.append(c)
print(new_cols)
df_2023.columns = new_cols

for j, f in enumerate(fnames):
    with io.capture_output() as captured: tdf = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2023/actual/' + f + '.sas7bdat')
    #tdf = tdf.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    ls = []
    for p in tdf['PROVIDER_ID'].tolist():
        if len(p) < 6: p = '0' + p
        ls.append(p)
    tdf['PROVIDER_ID'] = ls
    
    cols = list(tdf)
    new_cols = []
    ind = cols.index('C1')
    for i, c in enumerate(cols):
        if i >= ind:
            c = f_ext[j] + c
        new_cols.append(c)
    print(new_cols)
    tdf.columns = new_cols

    df_2023 = df_2023.merge(tdf, on='PROVIDER_ID', how='outer')
    
    
df_2023['Release year'] = ['2023']*df_2023.shape[0]
df_2023 = df_2023.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2023 = df_2023[df_2023['PROVIDER_ID'].isin(prvdrs_2023)]
prvdrs = df_2023['PROVIDER_ID'].unique()

print(len(prvdrs), 'hospitals in 2023')
df_2023.head()


['PROVIDER_ID', 'std_MORT_30_AMI', 'std_MORT_30_CABG', 'std_MORT_30_COPD', 'std_MORT_30_HF', 'std_MORT_30_PN', 'std_MORT_30_STK', 'std_PSI_4_SURG_COMP', 'mortality_C1', 'mortality_C2', 'mortality_C3', 'mortality_C4', 'mortality_C5', 'mortality_C6', 'mortality_C7', 'mortality_total_cnt', 'mortality_measure_wt', 'mortality_score_before_std', 'mortality_Mean', 'mortality_StdDev', 'mortality_grp_score']
['PROVIDER_ID', 'std_EDAC_30_AMI', 'std_EDAC_30_HF', 'std_EDAC_30_PN', 'std_OP_32', 'std_READM_30_CABG', 'std_READM_30_COPD', 'std_READM_30_HIP_KNEE', 'std_READM_30_HOSP_WIDE', 'std_OP_35_ADM', 'std_OP_35_ED', 'std_OP_36', 'readmission_C1', 'readmission_C2', 'readmission_C3', 'readmission_C4', 'readmission_C5', 'readmission_C6', 'readmission_C7', 'readmission_C8', 'readmission_C9', 'readmission_C10', 'readmission_C11', 'readmission_total_cnt', 'readmission_measure_wt', 'readmission_score_before_std', 'readmission_Mean', 'readmission_StdDev', 'readmission_grp_score']
['PROVIDER_ID', 'std_COM

Unnamed: 0,PROVIDER_ID,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_HCP_COVID_19,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_3B,std_OP_8,std_PC_01,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_C12,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,Release year
0,10001,-0.041613,-2.19781,-0.019543,1.708727,0.366398,-1.543654,-1.741473,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.495567,-0.05126,0.686658,-0.647058,0.207832,-0.702681,0.28604,0.105508,0.219318,-0.048609,-0.198058,0.924456,0.159253,0.872692,0.062022,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.171616,0.020822,0.530811,0.284081,0.024204,0.444297,0.745807,-0.407579,1.025843,0.314693,-0.315731,-0.212425,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.202389,-0.013939,0.693192,0.312074,-1.039363,0.992908,0.10185,-1.77947,-0.904124,-0.237088,,-0.634479,,0.488322,-1.763346,-0.609958,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,10.0,0.1,-0.538475,0.030917,0.556023,-1.024044,-1.07585,-0.175531,-1.051153,0.808598,0.68837,0.037272,-0.445032,0.339427,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.109237,2.611408e-16,0.85451,-0.127836,2023
1,10005,-0.22706,,0.327558,-3.058794,-2.178369,-1.652062,0.037145,1.0,0.0,1.0,1.0,1.0,1.0,1.0,6.0,0.166667,-1.125264,-0.05126,0.686658,-1.564103,0.084469,-0.194954,0.13891,-0.342026,,2.081928,0.212564,1.264919,-0.422406,-0.324483,0.6838,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,0.1,0.318272,0.020822,0.530811,0.560369,1.323856,-2.537332,-0.430824,-0.543966,,1.269319,-0.733262,0.400536,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.17881,-0.013939,0.693192,-0.237844,-0.329039,0.635389,-1.352864,-0.401411,0.265629,-0.237088,0.091268,0.562534,,-1.240294,0.350919,0.143403,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,11.0,0.090909,-0.137414,0.030917,0.556023,-0.302742,-0.12019,0.826626,-1.051153,-0.306002,-0.421465,0.037272,-0.445032,0.339427,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.142565,2.611408e-16,0.85451,-0.166838,2023
2,10006,-3.843277,-0.631715,0.587885,-0.453288,-0.453103,-2.898752,-0.810482,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-1.214676,-0.05126,0.686658,-1.694318,-0.849561,0.276217,-1.455655,1.537615,-2.896632,1.989296,-1.429926,0.924456,,,-0.559756,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.273772,0.020822,0.530811,-0.554988,-2.141884,0.349265,0.881622,0.601683,,-0.349052,0.89047,-0.764089,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.075998,-0.013939,0.693192,-0.089526,-1.7666,-0.692541,-0.85,1.263743,0.305281,0.493077,-0.717089,-0.168974,,0.675589,0.615202,0.085452,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,11.0,0.090909,-0.068805,0.030917,0.556023,-0.17935,-1.07585,-0.175531,-2.019165,-1.420603,-0.421465,-1.119948,-1.527722,-0.724034,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-1.06054,2.611408e-16,0.85451,-1.241108,2023
3,10007,,,-1.581501,-1.395705,-2.135237,,,0.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0,0.333333,-1.704148,-0.05126,0.686658,-2.40715,,-1.291644,0.727429,-0.968573,,0.136655,,-0.096931,,,0.062022,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,6.0,0.166667,-0.238507,0.020822,0.530811,-0.488553,,,,,,,0.093366,-0.089833,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,0.5,0.001767,-0.013939,0.693192,0.022657,-2.41773,-0.845764,0.065931,,0.800939,-0.237088,,-1.831492,,,,2.113732,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,7.0,0.142857,-0.335924,0.030917,0.556023,-0.65976,-0.12019,1.828782,0.884869,0.808598,0.68837,1.194493,0.637658,0.871157,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.849217,2.611408e-16,0.85451,0.993806,2023
5,10011,-1.339742,-0.631715,0.240783,-0.453288,-0.151182,-0.893207,0.382842,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.406501,-0.05126,0.686658,-0.517349,-0.281212,-0.54427,-0.079796,0.553041,-1.458502,-0.697033,-0.198058,0.470506,,,-0.559756,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.310564,0.020822,0.530811,-0.624302,0.024204,0.483894,-0.046884,1.170201,,-0.369268,-0.231381,0.952201,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.283281,-0.013939,0.693192,0.42877,-0.075352,-0.130725,0.173687,,-0.646382,-0.967252,-2.586416,-4.824023,,,,-1.305368,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,8.0,0.125,-1.295229,0.030917,0.556023,-2.385055,-0.12019,-0.175531,0.884869,0.808598,-0.421465,0.037272,0.096313,-0.192304,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.114695,2.611408e-16,0.85451,0.134223,2023


In [11]:
# For 2022

fnames = ['outcome_readmission', 'outcome_safety', 'process', 'ptexp']
f_ext = ['readmission_', 'safety_', 'process_', 'patient_exp_']

with io.capture_output() as captured: 
    df_2022 = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2022/actual/outcome_mortality.sas7bdat')
    
cols = list(df_2022)
new_cols = []
ind = cols.index('C1')
for i, c in enumerate(cols):
    if i >= ind:
        c = 'mortality_' + c
    new_cols.append(c)
print(new_cols)
df_2022.columns = new_cols

print(df_2022.shape)
for j, f in enumerate(fnames):
    with io.capture_output() as captured: tdf = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2022/actual/' + f + '.sas7bdat')
    
    cols = list(tdf)
    new_cols = []
    ind = cols.index('C1')
    for i, c in enumerate(cols):
        if i >= ind:
            c = f_ext[j] + c
        new_cols.append(c)
    print(new_cols)
    tdf.columns = new_cols
    
    print(tdf.shape)
    
    df_2022 = df_2022.merge(tdf, on='PROVIDER_ID', how='outer')
    
print(df_2022.shape)
df_2022['Release year'] = ['2022']*df_2022.shape[0]
print(df_2022.shape)
df_2022 = df_2022.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
print(df_2022.shape)
df_2022 = df_2022[df_2022['PROVIDER_ID'].isin(prvdrs_2022)]
print(df_2022.shape)
prvdrs = df_2022['PROVIDER_ID'].unique()
print(df_2022.shape)

print(len(prvdrs), 'hospitals in 2022')
df_2022.head()


['PROVIDER_ID', 'std_MORT_30_AMI', 'std_MORT_30_CABG', 'std_MORT_30_COPD', 'std_MORT_30_HF', 'std_MORT_30_PN', 'std_MORT_30_STK', 'std_PSI_4_SURG_COMP', 'mortality_C1', 'mortality_C2', 'mortality_C3', 'mortality_C4', 'mortality_C5', 'mortality_C6', 'mortality_C7', 'mortality_total_cnt', 'mortality_measure_wt', 'mortality_score_before_std', 'mortality_Mean', 'mortality_StdDev', 'mortality_grp_score']
(4489, 21)
['PROVIDER_ID', 'std_EDAC_30_AMI', 'std_EDAC_30_HF', 'std_EDAC_30_PN', 'std_OP_32', 'std_READM_30_CABG', 'std_READM_30_COPD', 'std_READM_30_HIP_KNEE', 'std_READM_30_HOSP_WIDE', 'std_OP_35_ADM', 'std_OP_35_ED', 'std_OP_36', 'readmission_C1', 'readmission_C2', 'readmission_C3', 'readmission_C4', 'readmission_C5', 'readmission_C6', 'readmission_C7', 'readmission_C8', 'readmission_C9', 'readmission_C10', 'readmission_C11', 'readmission_total_cnt', 'readmission_measure_wt', 'readmission_score_before_std', 'readmission_Mean', 'readmission_StdDev', 'readmission_grp_score']
(4489, 29)
['

Unnamed: 0,PROVIDER_ID,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_IMM_3,std_OP_22,std_OP_23,std_OP_29,std_OP_33,std_PC_01,std_SEP_1,std_OP_3B,std_OP_18B,std_OP_8,std_OP_10,std_OP_13,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_C12,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,Release year
0,10001,0.703271,-1.054647,-0.072635,1.355101,0.507204,-3.873239,-1.364075,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.542717,0.000879,0.641984,-0.846744,0.012064,-0.457136,0.497621,1.664542,-0.606328,-1.47008,-1.684933,-0.053004,0.60215,-0.703618,0.623486,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.143203,0.027686,0.531111,-0.321758,-0.427377,0.325583,0.722043,0.29595,1.045465,0.077072,-0.002603,1.040003,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.384517,0.002796,0.645363,0.591483,0.643289,-0.863287,,0.009932,,0.442175,-0.264434,,-0.906488,-0.897899,0.376713,-0.520081,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,9.0,0.111111,-0.220009,0.040971,0.815466,-0.320038,-0.536306,-0.084743,-0.269007,-0.072623,0.801375,-0.116805,0.284308,0.481635,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.060979,-5.363828e-17,0.84576,0.0721,2022
1,10005,-0.113897,,-0.359269,-3.038802,-1.230466,-1.23433,0.765784,1.0,0.0,1.0,1.0,1.0,1.0,1.0,6.0,0.166667,-0.868497,0.000879,0.641984,-1.354201,1.031113,-0.432991,-0.296054,1.172588,,1.650104,0.258268,1.220694,1.951595,0.635548,0.623486,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,0.1,0.781435,0.027686,0.531111,1.419195,0.866214,0.354127,-0.227217,0.485385,,-0.692282,-0.320719,-1.105376,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.09141,0.002796,0.645363,-0.145974,0.125136,-0.329809,-0.478581,0.434506,0.689565,-0.218358,0.264906,,0.478618,-0.249889,-1.577127,1.347285,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.044205,0.040971,0.815466,0.003965,-0.536306,0.876653,-0.269007,-0.072623,0.801375,-0.116805,-0.254812,-0.048857,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.047452,-5.363828e-17,0.84576,0.056106,2022
2,10006,-3.484718,-0.531852,0.405087,-0.616522,-0.386455,-0.92748,-1.84663,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-1.05551,0.000879,0.641984,-1.645507,0.284947,0.516707,-1.339736,2.015937,-0.816654,1.549453,-1.490613,-0.371429,,,0.060418,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,0.045448,0.027686,0.531111,0.033444,-1.720968,0.341894,0.587157,0.587502,,-0.326058,0.435881,-1.105376,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.171424,0.002796,0.645363,-0.269957,0.297854,0.203668,,0.131239,,0.442175,-1.558378,,-0.634008,1.753052,-1.738453,1.667405,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,9.0,0.111111,0.062728,0.040971,0.815466,0.02668,-0.536306,-0.084743,-2.195088,-1.06817,-1.291935,-1.058688,-0.254812,-0.048857,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.817325,-5.363828e-17,0.84576,-0.966379,2022
3,10007,,,-0.645902,-0.729187,-1.826238,,,0.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0,0.333333,-1.067109,0.000879,0.641984,-1.663574,,-0.131181,-0.335737,-1.357461,,-0.262267,,-0.371429,,,0.060418,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,6.0,0.166667,-0.399609,0.027686,0.531111,-0.804531,,,,,,,1.201078,0.306057,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,0.5,0.753568,0.002796,0.645363,1.163333,,-0.329809,,0.131239,,,2.382268,,0.751098,,0.699366,,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,5.0,0.2,0.726832,0.040971,0.815466,0.841066,-0.536306,-0.084743,-0.269007,0.922923,0.801375,0.825078,-0.254812,1.012127,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.302079,-5.363828e-17,0.84576,0.357169,2022
5,10011,-2.054673,-1.446744,-0.645902,0.172127,-0.287159,-0.25241,-0.118808,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.661939,0.000879,0.641984,-1.032452,-1.1349,-2.416895,-0.871468,-0.092437,-1.868286,0.240989,-0.324692,-1.326703,,,0.060418,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.85933,0.027686,0.531111,-1.670115,-0.211778,0.411215,0.217907,-0.272356,,0.328319,0.659421,-0.371431,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.108757,0.002796,0.645363,0.164188,,-0.863287,,,-1.087459,,-1.499562,,-0.906488,,-0.214816,,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,5.0,0.2,-0.914322,0.040971,0.815466,-1.17147,-0.536306,-0.084743,-0.269007,-0.072623,-0.24528,-0.116805,-0.254812,-0.579349,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.269866,-5.363828e-17,0.84576,-0.31908,2022


In [12]:
# For 2021

fnames = ['outcome_readmission', 'outcome_safety', 'process', 'ptexp']
f_ext = ['readmission_', 'safety_', 'process_', 'patient_exp_']

with io.capture_output() as captured: 
    df_2021 = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2021/actual/outcome_mortality.sas7bdat')
    
cols = list(df_2021)
new_cols = []
ind = cols.index('C1')
for i, c in enumerate(cols):
    if i >= ind:
        c = 'mortality_' + c
    new_cols.append(c)
print(new_cols)
df_2021.columns = new_cols

for j, f in enumerate(fnames):
    with io.capture_output() as captured: tdf = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2021/actual/' + f + '.sas7bdat')
    
    cols = list(tdf)
    new_cols = []
    ind = cols.index('C1')
    for i, c in enumerate(cols):
        if i >= ind:
            c = f_ext[j] + c
        new_cols.append(c)
    print(new_cols)
    tdf.columns = new_cols

    df_2021 = df_2021.merge(tdf, on='PROVIDER_ID', how='outer')
    
    
df_2021['Release year'] = ['2021']*df_2021.shape[0]
df_2021 = df_2021.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2021 = df_2021[df_2021['PROVIDER_ID'].isin(prvdrs_2021)]
prvdrs = df_2021['PROVIDER_ID'].unique()

print(len(prvdrs), 'hospitals in 2021')
df_2021.head()

['PROVIDER_ID', 'std_MORT_30_AMI', 'std_MORT_30_CABG', 'std_MORT_30_COPD', 'std_MORT_30_HF', 'std_MORT_30_PN', 'std_MORT_30_STK', 'std_PSI_4_SURG_COMP', 'mortality_C1', 'mortality_C2', 'mortality_C3', 'mortality_C4', 'mortality_C5', 'mortality_C6', 'mortality_C7', 'mortality_total_cnt', 'mortality_measure_wt', 'mortality_score_before_std', 'mortality_Mean', 'mortality_StdDev', 'mortality_grp_score']
['PROVIDER_ID', 'std_EDAC_30_AMI', 'std_EDAC_30_HF', 'std_EDAC_30_PN', 'std_OP_32', 'std_READM_30_CABG', 'std_READM_30_COPD', 'std_READM_30_HIP_KNEE', 'std_READM_30_HOSP_WIDE', 'std_OP_35_ADM', 'std_OP_35_ED', 'std_OP_36', 'readmission_C1', 'readmission_C2', 'readmission_C3', 'readmission_C4', 'readmission_C5', 'readmission_C6', 'readmission_C7', 'readmission_C8', 'readmission_C9', 'readmission_C10', 'readmission_C11', 'readmission_total_cnt', 'readmission_measure_wt', 'readmission_score_before_std', 'readmission_Mean', 'readmission_StdDev', 'readmission_grp_score']
['PROVIDER_ID', 'std_COM

Unnamed: 0,PROVIDER_ID,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_IMM_3,std_OP_22,std_OP_23,std_OP_29,std_OP_30,std_OP_33,std_PC_01,std_SEP_1,std_OP_3B,std_OP_18B,std_ED_2B,std_OP_8,std_OP_10,std_OP_13,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_C12,process_C13,process_C14,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,Release year
0,10001,0.730853,-2.055159,0.069106,0.103403,-0.022557,-1.941449,-0.327016,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.491831,-0.004941,0.64666,-0.752932,-0.027028,-0.705673,1.039984,2.185101,-1.542989,-1.037529,-1.164917,-0.070634,1.1826,-1.18578,1.207127,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.010885,0.027901,0.551294,-0.070355,0.303431,-0.078888,0.680925,0.244905,1.037183,0.403644,0.098335,0.31923,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.376096,0.009921,0.658437,0.556127,0.644574,-0.979501,,-0.459111,0.466001,,0.473469,0.031191,,-0.761776,-0.054072,0.140655,-0.102729,0.684379,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.007553,0.037755,0.600242,-0.050316,-0.537213,-0.085263,-0.269656,-0.073296,0.801144,-0.115323,0.284361,0.478432,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.060398,-1.104271e-16,0.845613,0.071425,2021
1,10005,-1.095515,,-1.273727,-3.077512,-1.846949,-2.26162,-1.335906,1.0,0.0,1.0,1.0,1.0,1.0,1.0,6.0,0.166667,-1.815205,-0.004941,0.64666,-2.799407,0.539823,-0.24435,-0.744184,0.119295,,0.896062,0.210441,-0.420048,0.989506,-0.195267,-0.516355,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,0.1,0.063492,0.027901,0.551294,0.064559,0.303431,1.102714,-0.609029,0.4594,,-1.712026,0.039355,-0.043766,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.065703,0.009921,0.658437,-0.114854,0.12667,-0.346365,-0.150232,-0.40149,0.144736,0.716782,-0.34011,0.504799,0.076084,0.572659,0.228145,-0.410303,-1.202673,0.412738,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,14.0,0.071429,-0.004897,0.037755,0.600242,-0.071058,-0.537213,0.874962,-0.269656,-0.073296,0.801144,-0.115323,-0.254265,-0.052073,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.046785,-1.104271e-16,0.845613,0.055327,2021
2,10006,-2.921882,-1.298828,-0.915638,-0.590615,-1.318836,0.683952,-2.65689,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-1.288391,-0.004941,0.64666,-1.984738,0.479089,0.888725,-0.932855,0.04806,-0.33382,0.992741,-1.164917,0.395251,,,-0.516355,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.016009,0.027901,0.551294,-0.079649,-0.649202,1.102714,0.856593,-0.011884,,0.242305,0.332149,-0.406762,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.209416,0.009921,0.658437,0.302983,0.299305,0.28677,,-0.459111,-0.417479,,0.473469,-1.567236,,-0.211057,-0.148144,-0.4393,-1.249813,0.793035,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.23996,0.037755,0.600242,-0.462672,-0.537213,-0.085263,-2.19386,-1.067899,-1.292453,-1.055803,-0.254265,-0.052073,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.817354,-1.104271e-16,0.845613,-0.966582,2021
3,10007,,,-0.468027,-1.515972,-2.231032,-1.109004,,0.0,0.0,1.0,1.0,1.0,1.0,0.0,4.0,0.25,-1.331009,-0.004941,0.64666,-2.050643,,-1.009175,-0.498092,-0.877991,,0.219305,,-0.652991,,,0.058139,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,6.0,0.166667,-0.460134,0.027901,0.551294,-0.885254,,,,,,,1.229489,0.371087,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,0.5,0.800288,0.009921,0.658437,1.200369,,-0.979501,,-4.377296,-0.17653,,,2.280829,,0.720929,0.496922,,-0.401285,,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,7.0,0.142857,-0.34799,0.037755,0.600242,-0.642649,-0.537213,-0.085263,-0.269656,0.921307,0.801144,0.825157,-0.254265,1.008938,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.301269,-1.104271e-16,0.845613,0.356272,2021
4,10008,,,-1.81086,-0.301441,-1.030774,,,0.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0,0.333333,-1.047692,-0.004941,0.64666,-1.612519,,,0.342723,-0.735521,,0.219305,,0.861136,,,,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,4.0,0.25,0.171911,0.027901,0.551294,0.261221,,,,,,,,0.008091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.008091,0.009921,0.658437,-0.00278,-3.584977,0.28677,,-1.726759,0.385685,,,-0.975226,,1.081015,0.174389,,0.494383,,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,8.0,0.125,-0.48309,0.037755,0.600242,-0.867725,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,-1.104271e-16,0.845613,,2021


In [13]:
print(df_2025.shape)
print(df_2024.shape)
print(df_2023.shape)
print(df_2022.shape)
print(df_2021.shape)

(2900, 122)
(2847, 124)
(3076, 124)
(3121, 124)
(3355, 128)


In [14]:
ls = np.setdiff1d(list(df_2025), list(df_2024)) # in 2024, not in 2023
print(ls, 'in 2025, not in 2024')

ls = np.setdiff1d(list(df_2024), list(df_2023)) # in 2024, not in 2023
print(ls, 'in 2024, not in 2023')

ls = np.setdiff1d(list(df_2023), list(df_2022)) # in 2023, not in 2022
print(ls, 'in 2023, not in 2022')

ls = np.setdiff1d(list(df_2022), list(df_2021)) # in 2022, not in 2021
print(ls, 'in 2022, not in 2021')


tdf = df_2025.merge(df_2024, how='outer')
tdf = tdf.merge(df_2023, how='outer')
tdf = tdf.merge(df_2022, how='outer')
tdf = tdf.merge(df_2021, how='outer')
main_df = main_df.merge(tdf, how='outer', on=['PROVIDER_ID', 'Release year'])

print(main_df.shape)

print(len(main_df['PROVIDER_ID'].unique()))
main_df.sort_values(by=['PROVIDER_ID'], ascending=True, inplace=True)

ls = main_df['PROVIDER_ID'].unique().tolist()

print(ls[:10])

main_df.head()

[] in 2025, not in 2024
[] in 2024, not in 2023
['std_HCP_COVID_19'] in 2023, not in 2022
[] in 2022, not in 2021
(15299, 155)
3562
['010001', '010005', '010006', '010007', '010008', '010011', '010012', '010016', '010019', '010021']


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_HCP_COVID_19,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_PC_01,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,std_OP_3B,process_C12,std_OP_33,std_OP_30,std_ED_2B,process_C13,process_C14
9156,10001,-0.846744,-0.321758,0.591483,0.0721,-0.320038,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.149487,7,8,11,8,9,5,2,1,3.0,3.0,2022,0.703271,-1.054647,-0.072635,1.355101,0.507204,-3.873239,-1.364075,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.542717,0.000879,0.641984,-0.846744,0.012064,-0.457136,0.497621,1.664542,-0.606328,-1.47008,-1.684933,-0.053004,0.60215,-0.703618,0.623486,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.143203,0.027686,0.531111,-0.321758,-0.427377,0.325583,0.722043,0.29595,1.045465,0.077072,-0.002603,1.040003,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.384517,0.002796,0.645363,0.591483,,0.643289,0.376713,-0.520081,-0.906488,-0.863287,,0.009932,-0.897899,0.442175,-0.264434,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,9.0,0.111111,-0.220009,0.040971,0.815466,-0.320038,-0.536306,-0.084743,-0.269007,-0.072623,0.801375,-0.116805,0.284308,0.481635,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.060979,-5.363828e-17,0.84576,0.0721,,1.0,,,,,
7879,10001,-0.752932,-0.070355,0.556127,0.071425,-0.050316,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.0491,7,8,11,8,11,5,2,1,3.0,3.0,2021,0.730853,-2.055159,0.069106,0.103403,-0.022557,-1.941449,-0.327016,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.491831,-0.004941,0.64666,-0.752932,-0.027028,-0.705673,1.039984,2.185101,-1.542989,-1.037529,-1.164917,-0.070634,1.1826,-1.18578,1.207127,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.010885,0.027901,0.551294,-0.070355,0.303431,-0.078888,0.680925,0.244905,1.037183,0.403644,0.098335,0.31923,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.376096,0.009921,0.658437,0.556127,,0.644574,-0.102729,0.684379,-0.761776,-0.979501,,-0.459111,0.140655,0.473469,0.031191,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,11.0,0.090909,0.007553,0.037755,0.600242,-0.050316,-0.537213,-0.085263,-0.269656,-0.073296,0.801144,-0.115323,0.284361,0.478432,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.060398,-1.104271e-16,0.845613,0.071425,,1.0,,0.466001,-0.054072,1.0,1.0
7663,10001,0.007071,0.230867,-0.146867,0.137779,-0.677624,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.030968,7,7,11,8,10,5,2,1,3.0,3.0,2024,0.470514,-1.535074,0.282996,1.510467,0.129818,-0.554262,-0.748332,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.063411,-0.068391,0.704371,0.007071,1.123231,-0.791585,-0.63392,0.29207,0.584835,0.293805,0.886353,0.423838,-1.03233,0.626949,-0.252865,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.138216,0.014038,0.537879,0.230867,0.879845,-0.232451,0.488068,-0.71166,,-0.210159,0.003034,-1.162386,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.135101,-0.021526,0.773315,-0.146867,-0.431525,0.940083,-0.029295,0.419351,-0.942734,-1.140182,,-3.129707,0.000412,0.583665,0.371166,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,10.0,0.1,-0.335877,0.029039,0.538522,-0.677624,-0.373358,-0.400063,-0.243371,0.124487,0.479704,0.876397,0.133873,0.361496,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.119895,1.093644e-15,0.870204,0.137779,,1.0,,,,,
5218,10001,0.297087,0.359381,0.345929,0.031544,-0.605072,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.154858,7,7,11,8,10,5,2,1,3.0,4.0,2025,1.406667,-1.04537,1.012712,0.652001,-0.313538,0.023985,-0.732699,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,0.143394,-0.073481,0.730006,0.297087,1.021693,-0.36918,-0.44784,0.291152,0.607499,0.86006,0.191466,0.767466,-1.029383,0.627143,-0.259345,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.205521,0.013485,0.534354,0.359381,0.696915,0.303101,0.74229,-0.468871,,0.477107,-0.098662,0.103228,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.25073,-0.019735,0.781849,0.345929,-0.524696,1.04392,0.080311,0.781915,-1.064893,-1.1425,,-3.168749,0.589498,0.141249,0.446595,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,10.0,0.1,-0.281735,0.034345,0.522385,-0.605072,-1.160819,0.560234,-0.240795,-0.264121,-0.32048,0.919364,0.711438,0.013951,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.027346,1.301665e-17,0.866926,0.031544,,,,,,,
9313,10001,-0.647058,0.284081,0.312074,-0.127836,-1.024044,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.162208,7,8,11,8,10,5,2,1,3.0,3.0,2023,-0.041613,-2.19781,-0.019543,1.708727,0.366398,-1.543654,-1.741473,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.495567,-0.05126,0.686658,-0.647058,0.207832,-0.702681,0.28604,0.105508,0.219318,-0.048609,-0.198058,0.924456,0.159253,0.872692,0.062022,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.171616,0.020822,0.530811,0.284081,0.024204,0.444297,0.745807,-0.407579,1.025843,0.314693,-0.315731,-0.212425,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.202389,-0.013939,0.693192,0.312074,-1.039363,0.992908,0.10185,-1.77947,-0.904124,-0.237088,,-0.634479,0.488322,-1.763346,-0.609958,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,10.0,0.1,-0.538475,0.030917,0.556023,-1.024044,-1.07585,-0.175531,-1.051153,0.808598,0.68837,0.037272,-0.445032,0.339427,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.109237,2.611408e-16,0.85451,-0.127836,,1.0,,,,,


## Load SAS pack input files
These data originate from Care Compare

In [15]:
df_2025 = pd.read_csv(stars_dir + "Reproduce_Stars_Input/2025/Input_File/data_for_2025_prognostications_from_Oct2024.csv")
df_2025['Release year'] = ['2025']*df_2025.shape[0]
df_2025 = df_2025.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)

ls = []
for p in df_2025['PROVIDER_ID'].tolist():
    p = int(p)
    p = str(p)
    if '666666' in p:
        p = p[:-6]
        p = p + 'F'
    while len(p) < 6:
        p = '0' + p
    ls.append(p)

df_2025['PROVIDER_ID'] = ls
df_2025 = df_2025[df_2025['PROVIDER_ID'].isin(prvdrs_2025)]

print(df_2025.shape)
prvdrs = df_2025['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in 2025 prognostications')

df_2025.head()

(2900, 92)
2900 hospitals in 2025 prognostications


Unnamed: 0,PROVIDER_ID,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,HCP_COVID_19_DEN,HCP_COVID_19,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,Release year
0,10001,9538.0,16332.0,240.0,94.0,103195.0,103195.0,10.082,23.712,6.618,0.858,11.232,65.234,0.496,0.169,1.209,,0.445,0.491,0.141,0.044,10.6,0.177,-13.8,14.3,0.137,0.189,0.105,0.079,0.108,0.03,2924.0,34.0,679.0,130.0,296.0,490.0,414.0,489.0,610.0,122.0,291.0,32.0,0.333,,0.054,,0.021,,217.0,345.0,0.05,52960.0,,,0.47,17.0,194.78,125.0,0.98,4115.0,0.96,,0.0,0.02,46.0,0.68,131.0,17.0,2.0,4.0,3.0,3.0,3.0,4.0,4.0,3.0,643.0,,0.038,157.0,0.102,151.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,2025
1,10005,3410.0,7200.0,102.0,33.0,35424.0,33872.0,2.074,3.262,2.867,0.315,1.631,9.503,0.964,1.226,0.0,,2.452,0.631,0.135,0.039,9.8,0.161,,-8.4,0.139,0.231,0.127,0.092,0.145,0.027,1056.0,144.0,176.0,143.0,,305.0,97.0,301.0,162.0,133.0,28.0,138.0,0.459,,0.134,,0.028,,144.0,1154.0,0.03,56820.0,0.69,13.0,0.96,180.0,191.14,43.0,1.01,2407.0,0.72,,0.026,0.01,193.0,0.76,288.0,17.0,3.0,4.0,2.0,4.0,4.0,3.0,3.0,3.5,714.0,,,,,,14.2,739.0,7.9,107.0,5.5,107.0,1.9,406.0,2025
2,10006,4826.0,8700.0,73.0,116.0,68020.0,64728.0,5.279,11.164,1.935,1.086,5.397,37.771,0.0,0.0,0.0,0.0,0.371,0.026,0.147,0.044,-4.9,0.177,13.4,4.5,0.142,0.198,0.122,0.102,0.166,0.04,2560.0,90.0,508.0,154.0,315.0,621.0,271.0,616.0,455.0,141.0,292.0,84.0,,,0.108,,0.029,,177.0,349.0,0.01,42286.0,,,0.85,82.0,201.13,102.0,1.16,2560.0,0.66,,0.0,0.0,22.0,0.56,162.0,18.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.5,1620.0,,0.052,102.0,0.115,91.0,12.1,1355.0,,,,,1.4,484.0,2025
3,10007,235.0,1369.0,4.0,,5267.0,5267.0,0.144,0.747,0.096,,0.101,1.994,,,,,,1.003,0.151,0.044,20.9,0.19,,21.5,,0.29,0.141,0.135,,0.034,234.0,33.0,35.0,41.0,,85.0,,101.0,33.0,42.0,,33.0,,,0.073,,,,129.0,594.0,0.04,11202.0,,,0.23,111.0,,,0.93,345.0,0.26,,0.0,,,0.24,21.0,24.0,5.0,5.0,4.0,5.0,5.0,4.0,4.0,3.0,175.0,,,,,,13.4,109.0,,,,,1.2,59.0,2025
5,10011,9905.0,10399.0,117.0,19.0,80106.0,80106.0,10.751,13.381,3.261,0.178,8.719,42.14,0.744,0.299,0.0,,0.573,0.498,0.159,0.043,30.0,0.187,-12.4,6.3,0.129,0.225,0.14,0.106,0.128,0.038,1636.0,32.0,294.0,58.0,148.0,344.0,86.0,336.0,258.0,55.0,136.0,28.0,,,0.078,,,,190.0,333.0,0.05,41057.0,0.43,23.0,1.0,24.0,208.8,67.0,0.95,,,,0.008,,,0.6,205.0,26.0,3.0,4.0,3.0,3.0,4.0,3.0,3.0,2.5,1334.0,,0.021,70.0,0.119,70.0,12.8,68.0,,,,,0.9,249.0,2025


In [16]:
with io.capture_output() as captured: 
    df_2024 = pd.read_sas(stars_dir + '2024/2024-07 Stars Release/alldata_2024jul.sas7bdat')
df_2024['Release year'] = ['2024']*df_2024.shape[0]
df_2024 = df_2024.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2024 = df_2024[df_2024['PROVIDER_ID'].isin(prvdrs_2024)]

print(df_2024.shape)
prvdrs = df_2024['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in 2024')

df_2024.head()

(2847, 96)
2847 hospitals in 2024


Unnamed: 0,PROVIDER_ID,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,HCP_COVID_19_DEN,HCP_COVID_19,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,Release year
0,10001,9149.0,17310.0,214.0,,104733.0,104733.0,9.597,24.766,5.994,,11.4,67.066,0.938,0.363,1.335,,0.965,0.507,0.142,0.038,23.4,0.19,-15.4,23.6,0.148,0.18,0.089,0.088,0.12,0.027,2912.0,49.0,614.0,117.0,274.0,403.0,398.0,400.0,549.0,107.0,278.0,49.0,,,,,0.38,79.0,0.061,1410.0,0.028,178.0,214.0,348.0,0.05,52960.0,,,0.47,17.0,184.68,134.0,1.21,3905.0,0.95,2496.0,0.836,0.0,32.0,0.65,127.0,15.0,3.0,3.0,3.0,3.0,4.0,4.0,3.5,3.5,544.0,2542.0385,0.041,132.0,0.105,126.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,2024
1,10005,3194.0,8277.0,96.0,,36794.0,34887.0,1.989,4.019,2.626,,1.847,10.066,2.514,0.995,0.762,,0.541,0.497,0.128,0.034,22.1,0.166,,-6.2,0.153,0.233,0.149,0.099,0.136,0.023,1052.0,172.0,129.0,136.0,,285.0,81.0,289.0,121.0,126.0,27.0,155.0,,,57.0,16.0,0.477,130.0,0.12,1057.0,0.042,189.0,145.0,1074.0,0.03,56820.0,0.58,12.0,0.96,180.0,183.49,43.0,0.97,2700.0,0.8,2552.0,0.807,0.02,200.0,0.69,252.0,18.0,3.0,4.0,1.0,3.0,4.0,3.0,3.0,3.0,824.0,978.028994,,,,,14.2,739.0,7.9,107.0,5.5,107.0,1.9,406.0,2024
2,10006,5343.0,8715.0,111.0,,63727.0,60304.0,5.801,11.166,2.95,,5.283,27.805,0.172,0.358,0.0,,1.514,0.072,0.134,0.053,-4.7,0.176,28.1,-0.4,0.172,0.195,0.125,0.099,0.165,0.046,2310.0,138.0,441.0,158.0,273.0,472.0,227.0,469.0,388.0,148.0,254.0,145.0,,,,,0.462,39.0,0.101,978.0,0.045,221.0,168.0,360.0,0.01,42286.0,0.75,16.0,0.85,82.0,173.63,96.0,1.17,2536.0,0.67,1882.0,0.796,0.04,28.0,0.57,126.0,19.0,2.0,3.0,2.0,2.0,3.0,2.0,2.0,2.5,1503.0,1753.979899,0.036,95.0,0.124,89.0,12.1,1355.0,,,,,1.4,484.0,2024
3,10007,,,,,,5511.0,,,,,,2.66,,,,,,0.376,0.157,0.042,-1.9,0.2,,-9.4,,0.285,0.125,0.137,,,258.0,26.0,31.0,34.0,,72.0,,88.0,26.0,34.0,,,,,,,,,0.034,146.0,,,132.0,1275.0,0.04,11202.0,,,0.23,111.0,,,0.95,350.0,0.53,252.0,0.601,,,0.93,43.0,24.0,3.0,5.0,4.0,3.0,3.0,3.0,3.5,3.0,189.0,228.286193,,,,,13.4,109.0,,,,,1.2,59.0,2024
5,10011,12136.0,11795.0,142.0,,83101.0,83101.0,13.096,15.144,3.798,,6.946,59.529,0.916,0.528,0.263,,0.72,0.638,0.149,0.042,26.0,0.197,-9.0,14.5,0.123,0.209,0.135,0.103,0.132,0.035,1366.0,27.0,251.0,47.0,127.0,309.0,86.0,304.0,225.0,43.0,121.0,28.0,,,,,,,0.057,332.0,,,213.0,341.0,0.05,41057.0,0.25,16.0,1.0,24.0,189.28,83.0,0.72,2377.0,0.81,1487.0,0.889,,,0.44,201.0,33.0,3.0,3.0,4.0,3.0,4.0,3.0,3.5,2.5,2023.0,1293.230115,0.026,65.0,0.126,64.0,12.8,68.0,,,,,0.9,249.0,2024


In [17]:
with io.capture_output() as captured: 
    df_2023 = pd.read_sas(stars_dir + '2023/2023-07 Stars Release/alldata_2023jul.sas7bdat')
df_2023['Release year'] = ['2023']*df_2023.shape[0]
df_2023 = df_2023.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2023 = df_2023[df_2023['PROVIDER_ID'].isin(prvdrs_2023)]

print(df_2023.shape)
prvdrs = df_2023['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in 2023')

df_2023.head()

(3076, 96)
3076 hospitals in 2023


Unnamed: 0,PROVIDER_ID,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,HCP_COVID_19_DEN,HCP_COVID_19,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,Release year
0,10001,10024.0,17731.0,154.0,200.0,101908.0,101451.0,10.597,26.63,4.548,1.845,9.412,72.686,0.661,0.3,1.099,0.0,0.85,0.66,0.142,0.042,21.8,0.199,1.9,-1.5,0.164,0.159,0.083,0.085,0.124,0.024,3058.0,98.0,755.0,202.0,319.0,436.0,489.0,407.0,630.0,182.0,317.0,102.0,,,,,0.425,146.0,0.057,1488.0,0.067,208.0,205.0,323.0,0.03,51079.0,,,0.81,16.0,173.39,120.0,1.01,3795.0,0.97,2323.0,0.737,0.09,34.0,0.46,146.0,15.0,2.0,3.0,2.0,4.0,4.0,3.0,3.0,3.5,434.0,2046.895485,0.047,172.0,0.117,165.0,14.1,254.0,10.2,214.0,4.7,214.0,1.0,688.0,2023
1,10005,3713.0,8670.0,88.0,,38413.0,35686.0,2.45,4.995,2.512,,1.999,10.484,3.673,1.201,1.194,,0.0,0.858,0.139,0.04,9.3,0.176,4.7,2.2,0.166,0.218,0.169,0.081,0.126,0.018,1258.0,178.0,157.0,234.0,38.0,361.0,100.0,369.0,153.0,195.0,52.0,134.0,,,,,0.545,191.0,0.138,1214.0,0.043,208.0,146.0,1003.0,0.03,54503.0,0.73,15.0,0.99,108.0,142.88,35.0,0.91,2593.0,0.9,2026.0,0.821,0.01,194.0,0.59,242.0,16.0,3.0,4.0,2.0,3.0,3.0,3.0,3.0,3.5,717.0,819.043002,,,,,14.6,850.0,11.1,108.0,5.7,108.0,0.9,362.0,2023
2,10006,7318.0,11755.0,91.0,,62709.0,54159.0,7.924,15.296,2.523,,4.164,22.618,0.757,0.196,0.396,,1.441,0.088,0.142,0.048,-2.3,0.177,25.9,42.3,0.189,0.178,0.122,0.078,0.165,0.034,2555.0,246.0,550.0,235.0,312.0,538.0,261.0,528.0,468.0,209.0,295.0,234.0,,,,,0.412,97.0,0.11,1168.0,0.014,217.0,144.0,363.0,0.01,41137.0,0.57,14.0,0.88,75.0,157.42,84.0,1.1,2292.0,0.64,2694.0,0.651,0.0,37.0,0.58,142.0,17.0,2.0,3.0,1.0,2.0,3.0,2.0,2.0,2.5,1358.0,1487.163359,0.035,117.0,0.156,109.0,12.5,1505.0,,,,,1.1,468.0,2023
3,10007,,,,,,5413.0,,,,,,2.148,,,,,,0.466,0.151,,36.3,0.197,,-12.6,,0.217,0.139,0.103,,,272.0,,51.0,72.0,,99.0,,106.0,45.0,63.0,,,,,,,,,0.059,169.0,,,119.0,1202.0,0.03,11120.0,,,0.63,68.0,,,0.99,318.0,0.61,277.0,0.574,,,0.93,55.0,23.0,3.0,5.0,4.0,4.0,4.0,4.0,4.0,4.0,173.0,142.073902,,,,,15.3,118.0,,,,,1.0,56.0,2023
5,10011,11886.0,11988.0,154.0,,81837.0,81837.0,12.884,15.434,4.069,,8.226,56.451,0.621,0.907,0.0,,1.459,0.62,0.146,0.042,17.9,0.206,13.0,7.7,0.152,0.171,0.122,0.082,0.138,0.024,1524.0,36.0,298.0,101.0,139.0,346.0,101.0,330.0,265.0,90.0,127.0,35.0,,,,,,,0.053,361.0,,,192.0,339.0,0.05,42062.0,0.2,15.0,0.18,83.0,136.95,76.0,0.82,1944.0,0.75,1476.0,0.851,,,0.34,242.0,29.0,3.0,3.0,4.0,4.0,3.0,3.0,3.5,3.0,1949.0,1034.277124,0.035,64.0,0.138,61.0,13.6,72.0,,,,,1.1,131.0,2023


In [18]:
with io.capture_output() as captured: 
    df_2022 = pd.read_sas(stars_dir + '2022/2022-07 Stars Release/all_data_2022jul.sas7bdat')
df_2022['Release year'] = ['2022']*df_2022.shape[0]
df_2022 = df_2022.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2022 = df_2022[df_2022['PROVIDER_ID'].isin(prvdrs_2022)]

prvdrs = df_2022['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in 2022')

df_2022.head()

3121 hospitals in 2022


Unnamed: 0,PROVIDER_ID,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_33,OP_33_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,Release year
0,10001,8391.0,13866.0,171.0,229.0,102470.0,102470.0,8.711,19.223,4.938,2.025,7.631,78.514,0.574,0.312,0.608,0.0,0.786,0.56,0.155,0.049,16.1,0.212,6.5,-7.1,0.198,0.145,0.089,0.083,0.116,0.026,1805.0,165.0,904.0,310.0,424.0,503.0,533.0,474.0,725.0,270.0,412.0,175.0,,,,,0.459,122.0,0.04,925.0,0.048,147.0,183.0,176.0,0.03,59762.0,,,0.9,52.0,184.28,115.0,0.81,4817.0,0.97,0.0,18.0,0.55,56.0,21.0,3.0,3.0,3.0,3.0,4.0,3.0,3.5,3.5,507.0,2828.228824,0.038,200.0,0.135,193.0,14.1,511.0,,,11.3,192.0,6.7,192.0,0.9,1003.0,2022
1,10005,2864.0,8199.0,79.0,,37761.0,35442.0,1.808,4.57,2.084,,1.506,11.292,0.553,0.875,0.48,,1.328,0.708,0.147,0.039,15.5,0.181,-17.4,12.9,0.155,0.18,0.167,0.086,0.124,0.02,764.0,168.0,223.0,378.0,36.0,468.0,122.0,477.0,213.0,314.0,57.0,166.0,,,,,0.415,94.0,0.149,739.0,0.013,154.0,122.0,588.0,0.02,68296.0,0.67,12.0,0.97,180.0,145.66,40.0,1.19,1915.0,0.91,0.03,105.0,0.64,145.0,30.0,3.0,4.0,3.0,3.0,4.0,3.0,3.0,3.0,568.0,1106.469176,,,,,14.8,1086.0,1.0,21.0,9.5,120.0,5.4,120.0,0.9,514.0,2022
2,10006,6011.0,10398.0,92.0,,63658.0,62016.0,5.334,10.217,2.432,,4.672,28.079,0.562,0.392,0.411,,1.07,0.356,0.157,0.048,-8.1,0.182,0.1,39.2,0.15,0.163,0.124,0.078,0.157,0.032,1563.0,306.0,615.0,381.0,363.0,663.0,286.0,634.0,528.0,331.0,344.0,306.0,,,,,0.279,61.0,0.158,728.0,0.007,152.0,171.0,183.0,0.01,47004.0,,,0.92,89.0,193.03,87.0,1.19,2456.0,0.93,0.0,15.0,0.33,54.0,23.0,3.0,3.0,1.0,2.0,2.0,2.0,3.0,3.0,1136.0,2131.116508,0.034,127.0,0.138,120.0,13.6,1908.0,,,,,,,1.0,502.0,2022
3,10007,,,,,,5230.0,,,,,,2.033,,,,,,0.0,0.157,,8.0,0.2,,13.9,,0.192,0.126,0.089,,,144.0,,70.0,103.0,,147.0,,152.0,67.0,90.0,,,,,,,,,0.022,90.0,,,110.0,586.0,0.02,12514.0,,,0.92,169.0,,,0.94,,,,,1.0,32.0,27.0,3.0,3.0,3.0,4.0,4.0,4.0,3.0,4.0,179.0,188.206619,,,,,18.4,184.0,,,,,,,1.0,31.0,2022
5,10011,10861.0,11435.0,151.0,,81783.0,81783.0,11.733,14.72,4.032,,8.211,95.085,0.511,0.611,0.992,,0.609,0.252,0.163,0.042,64.8,0.195,33.4,27.4,0.139,0.161,0.11,0.089,0.143,0.025,918.0,43.0,351.0,166.0,126.0,349.0,104.0,331.0,305.0,150.0,123.0,49.0,,,,,,,0.073,259.0,,,183.0,87.0,0.03,52482.0,,,,,161.7,82.0,1.06,,,,,0.34,47.0,31.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,2.5,2139.0,1327.131542,0.041,69.0,0.153,64.0,16.6,96.0,0.75,40.0,,,,,1.0,248.0,2022


In [20]:
with io.capture_output() as captured: 
    df_2021 = pd.read_sas(stars_dir + '2021/2021-04 Stars Release/all_data_2021apr.sas7bdat')
df_2021['Release year'] = ['2021']*df_2021.shape[0]
df_2021 = df_2021.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2021 = df_2021[df_2021['PROVIDER_ID'].isin(prvdrs_2021)]

print(df_2021.shape)
prvdrs = df_2021['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in 2021')
df_2021.head()

(3355, 100)
3355 hospitals in 2021


Unnamed: 0,PROVIDER_ID,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,OP_30,OP_30_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,ED_2B,ED_2B_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_33,OP_33_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,Release year
0,10001,7846.0,13268.0,165.0,216.0,102765.0,102765.0,8.086,18.498,4.615,1.884,7.305,76.294,0.742,0.324,0.65,0.0,0.548,0.537,0.156,0.046,21.6,0.207,7.0,-20.6,0.166,0.157,0.113,0.084,0.119,0.023,4474.0,258.0,1106.0,443.0,620.0,594.0,644.0,554.0,858.0,374.0,586.0,250.0,,,,,0.389,211.0,0.072,2117.0,0.028,211.0,178.0,349.0,0.03,57844.0,,,0.81,64.0,0.98,162.0,170.87,165.0,0.93,4817.0,0.97,0.0,24.0,0.6,102.0,103.0,655.0,21.0,3.0,3.0,3.0,3.0,4.0,3.0,3.5,3.5,507.0,3896.300852,0.047,281.0,0.149,268.0,13.4,606.0,,,10.8,190.0,7.1,190.0,0.8,993.0,2021
1,10005,3088.0,7928.0,74.0,,40143.0,37697.0,1.988,4.66,1.97,,1.548,12.386,0.0,1.073,0.508,,1.938,0.565,0.159,0.039,10.2,0.187,-7.0,22.9,0.171,0.195,0.168,0.099,0.138,0.023,2018.0,221.0,304.0,591.0,52.0,625.0,169.0,637.0,291.0,502.0,80.0,213.0,,,62.0,18.0,0.427,246.0,0.142,1504.0,0.033,273.0,115.0,1408.0,0.02,71631.0,0.71,24.0,0.82,204.0,0.94,413.0,190.88,51.0,1.0,1915.0,0.91,0.03,193.0,0.68,330.0,82.0,1034.0,30.0,3.0,4.0,3.0,3.0,4.0,3.0,3.0,3.0,568.0,1538.334998,,,,,16.3,1150.0,1.0,12.0,11.1,121.0,6.2,121.0,1.1,483.0,2021
2,10006,5874.0,10270.0,90.0,,64819.0,62418.0,4.781,8.992,2.439,,4.588,28.161,0.0,0.222,0.82,,0.654,0.426,0.152,0.046,-17.8,0.186,-5.5,27.5,0.125,0.184,0.125,0.095,0.157,0.028,3620.0,392.0,771.0,565.0,443.0,763.0,362.0,738.0,659.0,489.0,422.0,363.0,,,,,0.429,119.0,0.145,1386.0,0.026,265.0,152.0,362.0,0.01,41321.0,,,0.81,94.0,0.87,125.0,217.08,109.0,1.07,2456.0,0.93,0.0,35.0,0.33,105.0,110.0,552.0,23.0,3.0,3.0,1.0,2.0,2.0,2.0,3.0,3.0,1136.0,2816.123681,0.041,139.0,0.132,130.0,16.4,1948.0,,,,,,,1.1,454.0,2021
3,10007,,,,,,4783.0,,,,,,1.84,,,,,,0.0,0.161,,29.1,0.194,,16.9,0.153,0.203,0.141,0.09,,,443.0,,83.0,136.0,,200.0,29.0,205.0,78.0,116.0,,,,,,,,,0.091,208.0,,,108.0,1295.0,0.03,1116.0,,,0.13,82.0,0.9,40.0,,,0.92,,,,,0.98,52.0,62.0,525.0,27.0,3.0,3.0,3.0,4.0,4.0,4.0,3.0,4.0,179.0,277.574249,,,,,17.7,213.0,,,,,,,1.0,76.0,2021
4,10008,,,,,,,,,,,,,,,,,,,0.148,,,0.194,,-3.6,,0.178,0.12,0.105,,,127.0,,,30.0,,39.0,,43.0,27.0,32.0,,,,,,,,,0.034,148.0,,,91.0,335.0,0.01,7012.0,,,0.59,34.0,0.97,30.0,,,0.99,189.0,0.48,,,0.43,14.0,86.0,396.0,,,,,,,,,,,58.903206,,,,,17.5,77.0,,,,,,,,,2021


## Merge SAS pack output data with the input data

In [21]:
input_df = df_2025.merge(df_2024, how='outer')
input_df = input_df.merge(df_2023, how='outer')
input_df = input_df.merge(df_2022, how='outer')
input_df = input_df.merge(df_2021, how='outer')

print(input_df.shape)
prvdrs = input_df['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in input df')
input_df.head()


(15299, 102)
3562 hospitals in input df


Unnamed: 0,PROVIDER_ID,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,HCP_COVID_19_DEN,HCP_COVID_19,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,Release year,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_33,OP_33_DEN,OP_30,OP_30_DEN,ED_2B,ED_2B_DEN
0,10001,9538.0,16332.0,240.0,94.0,103195.0,103195.0,10.082,23.712,6.618,0.858,11.232,65.234,0.496,0.169,1.209,,0.445,0.491,0.141,0.044,10.6,0.177,-13.8,14.3,0.137,0.189,0.105,0.079,0.108,0.03,2924.0,34.0,679.0,130.0,296.0,490.0,414.0,489.0,610.0,122.0,291.0,32.0,0.333,,0.054,,0.021,,217.0,345.0,0.05,52960.0,,,0.47,17.0,194.78,125.0,0.98,4115.0,0.96,,0.0,0.02,46.0,0.68,131.0,17.0,2.0,4.0,3.0,3.0,3.0,4.0,4.0,3.0,643.0,,0.038,157.0,0.102,151.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,2025,,,,,,,,,,
1,10005,3410.0,7200.0,102.0,33.0,35424.0,33872.0,2.074,3.262,2.867,0.315,1.631,9.503,0.964,1.226,0.0,,2.452,0.631,0.135,0.039,9.8,0.161,,-8.4,0.139,0.231,0.127,0.092,0.145,0.027,1056.0,144.0,176.0,143.0,,305.0,97.0,301.0,162.0,133.0,28.0,138.0,0.459,,0.134,,0.028,,144.0,1154.0,0.03,56820.0,0.69,13.0,0.96,180.0,191.14,43.0,1.01,2407.0,0.72,,0.026,0.01,193.0,0.76,288.0,17.0,3.0,4.0,2.0,4.0,4.0,3.0,3.0,3.5,714.0,,,,,,14.2,739.0,7.9,107.0,5.5,107.0,1.9,406.0,2025,,,,,,,,,,
2,10006,4826.0,8700.0,73.0,116.0,68020.0,64728.0,5.279,11.164,1.935,1.086,5.397,37.771,0.0,0.0,0.0,0.0,0.371,0.026,0.147,0.044,-4.9,0.177,13.4,4.5,0.142,0.198,0.122,0.102,0.166,0.04,2560.0,90.0,508.0,154.0,315.0,621.0,271.0,616.0,455.0,141.0,292.0,84.0,,,0.108,,0.029,,177.0,349.0,0.01,42286.0,,,0.85,82.0,201.13,102.0,1.16,2560.0,0.66,,0.0,0.0,22.0,0.56,162.0,18.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.5,1620.0,,0.052,102.0,0.115,91.0,12.1,1355.0,,,,,1.4,484.0,2025,,,,,,,,,,
3,10007,235.0,1369.0,4.0,,5267.0,5267.0,0.144,0.747,0.096,,0.101,1.994,,,,,,1.003,0.151,0.044,20.9,0.19,,21.5,,0.29,0.141,0.135,,0.034,234.0,33.0,35.0,41.0,,85.0,,101.0,33.0,42.0,,33.0,,,0.073,,,,129.0,594.0,0.04,11202.0,,,0.23,111.0,,,0.93,345.0,0.26,,0.0,,,0.24,21.0,24.0,5.0,5.0,4.0,5.0,5.0,4.0,4.0,3.0,175.0,,,,,,13.4,109.0,,,,,1.2,59.0,2025,,,,,,,,,,
4,10011,9905.0,10399.0,117.0,19.0,80106.0,80106.0,10.751,13.381,3.261,0.178,8.719,42.14,0.744,0.299,0.0,,0.573,0.498,0.159,0.043,30.0,0.187,-12.4,6.3,0.129,0.225,0.14,0.106,0.128,0.038,1636.0,32.0,294.0,58.0,148.0,344.0,86.0,336.0,258.0,55.0,136.0,28.0,,,0.078,,,,190.0,333.0,0.05,41057.0,0.43,23.0,1.0,24.0,208.8,67.0,0.95,,,,0.008,,,0.6,205.0,26.0,3.0,4.0,3.0,3.0,4.0,3.0,3.0,2.5,1334.0,,0.021,70.0,0.119,70.0,12.8,68.0,,,,,0.9,249.0,2025,,,,,,,,,,


In [22]:
main_df = main_df.merge(input_df, how='outer', on=['PROVIDER_ID', 'Release year'])
print(main_df.shape)

main_df.rename(columns={'PROVIDER_ID': 'Facility ID'}, inplace=True)

prvdrs = main_df['Facility ID'].unique()
print(len(prvdrs), 'hospitals in main_df')
print(main_df['Release year'].unique())

main_df.head()

(15299, 255)
3562 hospitals in main_df
['2022' '2021' '2024' '2025' '2023']


Unnamed: 0,Facility ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_HCP_COVID_19,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_PC_01,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,std_OP_3B,process_C12,std_OP_33,std_OP_30,std_ED_2B,process_C13,process_C14,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,HCP_COVID_19_DEN,HCP_COVID_19,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_33,OP_33_DEN,OP_30,OP_30_DEN,ED_2B,ED_2B_DEN
0,10001,-0.846744,-0.321758,0.591483,0.0721,-0.320038,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.149487,7,8,11,8,9,5,2,1,3.0,3.0,2022,0.703271,-1.054647,-0.072635,1.355101,0.507204,-3.873239,-1.364075,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.542717,0.000879,0.641984,-0.846744,0.012064,-0.457136,0.497621,1.664542,-0.606328,-1.47008,-1.684933,-0.053004,0.60215,-0.703618,0.623486,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.143203,0.027686,0.531111,-0.321758,-0.427377,0.325583,0.722043,0.29595,1.045465,0.077072,-0.002603,1.040003,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.384517,0.002796,0.645363,0.591483,,0.643289,0.376713,-0.520081,-0.906488,-0.863287,,0.009932,-0.897899,0.442175,-0.264434,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,9.0,0.111111,-0.220009,0.040971,0.815466,-0.320038,-0.536306,-0.084743,-0.269007,-0.072623,0.801375,-0.116805,0.284308,0.481635,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.060979,-5.363828e-17,0.84576,0.0721,,1.0,,,,,,8391.0,13866.0,171.0,229.0,102470.0,102470.0,8.711,19.223,4.938,2.025,7.631,78.514,0.574,0.312,0.608,0.0,0.786,0.56,0.155,0.049,16.1,0.212,6.5,-7.1,0.198,0.145,0.089,0.083,0.116,0.026,1805.0,165.0,904.0,310.0,424.0,503.0,533.0,474.0,725.0,270.0,412.0,175.0,0.459,122.0,0.04,925.0,0.048,147.0,183.0,176.0,0.03,59762.0,,,0.9,52.0,184.28,115.0,0.81,4817.0,0.97,,,0.0,18.0,0.55,56.0,21.0,3.0,3.0,3.0,3.0,4.0,3.0,3.5,3.5,507.0,2828.228824,0.038,200.0,0.135,193.0,14.1,511.0,11.3,192.0,6.7,192.0,0.9,1003.0,,,,,,,,,,
1,10001,-0.752932,-0.070355,0.556127,0.071425,-0.050316,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.0491,7,8,11,8,11,5,2,1,3.0,3.0,2021,0.730853,-2.055159,0.069106,0.103403,-0.022557,-1.941449,-0.327016,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.491831,-0.004941,0.64666,-0.752932,-0.027028,-0.705673,1.039984,2.185101,-1.542989,-1.037529,-1.164917,-0.070634,1.1826,-1.18578,1.207127,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.010885,0.027901,0.551294,-0.070355,0.303431,-0.078888,0.680925,0.244905,1.037183,0.403644,0.098335,0.31923,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.376096,0.009921,0.658437,0.556127,,0.644574,-0.102729,0.684379,-0.761776,-0.979501,,-0.459111,0.140655,0.473469,0.031191,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,11.0,0.090909,0.007553,0.037755,0.600242,-0.050316,-0.537213,-0.085263,-0.269656,-0.073296,0.801144,-0.115323,0.284361,0.478432,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.060398,-1.104271e-16,0.845613,0.071425,,1.0,,0.466001,-0.054072,1.0,1.0,7846.0,13268.0,165.0,216.0,102765.0,102765.0,8.086,18.498,4.615,1.884,7.305,76.294,0.742,0.324,0.65,0.0,0.548,0.537,0.156,0.046,21.6,0.207,7.0,-20.6,0.166,0.157,0.113,0.084,0.119,0.023,4474.0,258.0,1106.0,443.0,620.0,594.0,644.0,554.0,858.0,374.0,586.0,250.0,0.389,211.0,0.072,2117.0,0.028,211.0,178.0,349.0,0.03,57844.0,,,0.81,64.0,170.87,165.0,0.93,4817.0,0.97,,,0.0,24.0,0.6,102.0,21.0,3.0,3.0,3.0,3.0,4.0,3.0,3.5,3.5,507.0,3896.300852,0.047,281.0,0.149,268.0,13.4,606.0,10.8,190.0,7.1,190.0,0.8,993.0,,,,,,,0.98,162.0,103.0,655.0
2,10001,0.007071,0.230867,-0.146867,0.137779,-0.677624,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.030968,7,7,11,8,10,5,2,1,3.0,3.0,2024,0.470514,-1.535074,0.282996,1.510467,0.129818,-0.554262,-0.748332,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.063411,-0.068391,0.704371,0.007071,1.123231,-0.791585,-0.63392,0.29207,0.584835,0.293805,0.886353,0.423838,-1.03233,0.626949,-0.252865,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.138216,0.014038,0.537879,0.230867,0.879845,-0.232451,0.488068,-0.71166,,-0.210159,0.003034,-1.162386,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.135101,-0.021526,0.773315,-0.146867,-0.431525,0.940083,-0.029295,0.419351,-0.942734,-1.140182,,-3.129707,0.000412,0.583665,0.371166,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,10.0,0.1,-0.335877,0.029039,0.538522,-0.677624,-0.373358,-0.400063,-0.243371,0.124487,0.479704,0.876397,0.133873,0.361496,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.119895,1.093644e-15,0.870204,0.137779,,1.0,,,,,,9149.0,17310.0,214.0,,104733.0,104733.0,9.597,24.766,5.994,,11.4,67.066,0.938,0.363,1.335,,0.965,0.507,0.142,0.038,23.4,0.19,-15.4,23.6,0.148,0.18,0.089,0.088,0.12,0.027,2912.0,49.0,614.0,117.0,274.0,403.0,398.0,400.0,549.0,107.0,278.0,49.0,0.38,79.0,0.061,1410.0,0.028,178.0,214.0,348.0,0.05,52960.0,,,0.47,17.0,184.68,134.0,1.21,3905.0,0.95,2496.0,0.836,0.0,32.0,0.65,127.0,15.0,3.0,3.0,3.0,3.0,4.0,4.0,3.5,3.5,544.0,2542.0385,0.041,132.0,0.105,126.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,,,,,,,,,,
3,10001,0.297087,0.359381,0.345929,0.031544,-0.605072,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.154858,7,7,11,8,10,5,2,1,3.0,4.0,2025,1.406667,-1.04537,1.012712,0.652001,-0.313538,0.023985,-0.732699,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,0.143394,-0.073481,0.730006,0.297087,1.021693,-0.36918,-0.44784,0.291152,0.607499,0.86006,0.191466,0.767466,-1.029383,0.627143,-0.259345,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.205521,0.013485,0.534354,0.359381,0.696915,0.303101,0.74229,-0.468871,,0.477107,-0.098662,0.103228,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.25073,-0.019735,0.781849,0.345929,-0.524696,1.04392,0.080311,0.781915,-1.064893,-1.1425,,-3.168749,0.589498,0.141249,0.446595,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,10.0,0.1,-0.281735,0.034345,0.522385,-0.605072,-1.160819,0.560234,-0.240795,-0.264121,-0.32048,0.919364,0.711438,0.013951,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.027346,1.301665e-17,0.866926,0.031544,,,,,,,,9538.0,16332.0,240.0,94.0,103195.0,103195.0,10.082,23.712,6.618,0.858,11.232,65.234,0.496,0.169,1.209,,0.445,0.491,0.141,0.044,10.6,0.177,-13.8,14.3,0.137,0.189,0.105,0.079,0.108,0.03,2924.0,34.0,679.0,130.0,296.0,490.0,414.0,489.0,610.0,122.0,291.0,32.0,0.333,,0.054,,0.021,,217.0,345.0,0.05,52960.0,,,0.47,17.0,194.78,125.0,0.98,4115.0,0.96,,0.0,0.02,46.0,0.68,131.0,17.0,2.0,4.0,3.0,3.0,3.0,4.0,4.0,3.0,643.0,,0.038,157.0,0.102,151.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,,,,,,,,,,
4,10001,-0.647058,0.284081,0.312074,-0.127836,-1.024044,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.162208,7,8,11,8,10,5,2,1,3.0,3.0,2023,-0.041613,-2.19781,-0.019543,1.708727,0.366398,-1.543654,-1.741473,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.495567,-0.05126,0.686658,-0.647058,0.207832,-0.702681,0.28604,0.105508,0.219318,-0.048609,-0.198058,0.924456,0.159253,0.872692,0.062022,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.171616,0.020822,0.530811,0.284081,0.024204,0.444297,0.745807,-0.407579,1.025843,0.314693,-0.315731,-0.212425,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.202389,-0.013939,0.693192,0.312074,-1.039363,0.992908,0.10185,-1.77947,-0.904124,-0.237088,,-0.634479,0.488322,-1.763346,-0.609958,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,10.0,0.1,-0.538475,0.030917,0.556023,-1.024044,-1.07585,-0.175531,-1.051153,0.808598,0.68837,0.037272,-0.445032,0.339427,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.109237,2.611408e-16,0.85451,-0.127836,,1.0,,,,,,10024.0,17731.0,154.0,200.0,101908.0,101451.0,10.597,26.63,4.548,1.845,9.412,72.686,0.661,0.3,1.099,0.0,0.85,0.66,0.142,0.042,21.8,0.199,1.9,-1.5,0.164,0.159,0.083,0.085,0.124,0.024,3058.0,98.0,755.0,202.0,319.0,436.0,489.0,407.0,630.0,182.0,317.0,102.0,0.425,146.0,0.057,1488.0,0.067,208.0,205.0,323.0,0.03,51079.0,,,0.81,16.0,173.39,120.0,1.01,3795.0,0.97,2323.0,0.737,0.09,34.0,0.46,146.0,15.0,2.0,3.0,2.0,4.0,4.0,3.0,3.0,3.5,434.0,2046.895485,0.047,172.0,0.117,165.0,14.1,254.0,10.2,214.0,4.7,214.0,1.0,688.0,,,,,,,,,,


In [23]:
tdf = main_df[main_df['Release year'] == '2025']
tdf = tdf[tdf['Facility ID'] == '140119']
print(tdf.shape)
tdf.head()

(1, 255)


Unnamed: 0,Facility ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_HCP_COVID_19,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_PC_01,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,std_OP_3B,process_C12,std_OP_33,std_OP_30,std_ED_2B,process_C13,process_C14,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,HCP_COVID_19_DEN,HCP_COVID_19,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_33,OP_33_DEN,OP_30,OP_30_DEN,ED_2B,ED_2B_DEN
4252,140119,2.67408,0.275756,0.285037,0.65613,-0.243037,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.826856,7,8,11,8,10,5,2,1,3.0,5.0,2025,1.563415,0.519368,2.072479,2.825384,2.378459,1.627273,2.163914,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,1.878613,-0.073481,0.730006,2.67408,-0.84796,-0.911786,0.0044,1.718774,0.007842,2.350308,0.191466,-0.707551,-1.686084,1.909133,-0.259345,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.160836,0.013485,0.534354,0.275756,-0.98401,0.42692,0.709768,0.286832,0.728623,-0.648859,-0.178669,1.284363,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.203121,-0.019735,0.781849,0.285037,0.869932,1.152913,-0.449947,-0.439992,-3.066628,0.597395,,0.405158,1.56186,0.552484,-2.109312,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,10.0,0.1,-0.092614,0.034345,0.522385,-0.243037,0.656485,0.560234,-0.240795,0.752728,0.66872,0.919364,0.711438,0.522354,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.568816,1.301665e-17,0.866926,0.65613,,,,,,,,30897.0,13142.0,283.0,322.0,158342.0,136271.0,34.535,21.285,7.766,2.723,9.601,110.369,0.405,0.188,0.644,0.367,1.146,0.526,0.151,0.044,22.0,0.162,18.7,4.0,0.107,0.111,0.058,0.063,0.106,0.041,4387.0,468.0,710.0,106.0,149.0,308.0,277.0,277.0,535.0,89.0,102.0,433.0,0.263,,0.079,,0.042,,324.0,335.0,0.01,60593.0,,,0.97,96.0,127.22,300.0,0.76,16232.0,0.98,,0.281,0.0,22.0,0.25,88.0,15.0,4.0,4.0,3.0,4.0,4.0,4.0,4.0,3.5,3147.0,,0.025,105.0,0.107,102.0,11.5,1720.0,12.8,779.0,3.8,779.0,1.1,1145.0,,,,,,,,,,


## Load general hospital information from hospitals-data-archive project

In [24]:
# CMS used July 2021 measure level results for July 2022 star rating reporting
# CMS used October 2020 measure level results for April 2021 star rating reporting

n_dir = stars_dir + 'CareCompare'
ls = ['Facility ID', 'Facility Name', 
      'Address', 'City', 'State', 'ZIP Code', 'County Name',
      'Hospital Type', 'Hospital Ownership', 'Emergency Services', 
      'Meets criteria for promoting interoperability of EHRs', 
      'Hospital overall rating', 
      'Hospital overall rating footnote',
     ]


## 2025
tdf = main_df[main_df['Release year'] == '2025']
prvdrs = tdf['Facility ID'].unique()
df_2025 = pd.read_csv(n_dir + '/hospitals_10_2024/Hospital_General_Information.csv')#, compression='gzip')
df_2025['Hospital overall rating footnote'] = df_2025['Hospital overall rating footnote'].astype(str)
df_2025['Facility ID'] = df_2025['Facility ID'].astype(str)
df_2025 = df_2025[df_2025['Facility ID'].isin(prvdrs_2025)]

cols1 = ['Provider ID', "Measure Start Date", "Measure End Date", 'Hospital Name', 'Address 1', 'City/Town', 
         'County/Parish', 'Telephone Number']
    
cols2 = ['Facility ID', "Start Date", "End Date", 'Facility Name', 'Address', 'City', 'County Name',
         'Phone Number']
    
for i, col in enumerate(cols1):
    if col in list(df_2025):
        df_2025.rename(columns={col: cols2[i]}, inplace=True)

df_2025 = df_2025.filter(items=ls, axis=1)
df_2025['file_month'] = '10'
df_2025['file_year'] = '2024'
df_2025 = df_2025.merge(tdf, how='outer')
print('Release years:', df_2025['Release year'].unique())


## 2024
tdf = main_df[main_df['Release year'] == '2024']
prvdrs = tdf['Facility ID'].unique()
df_2024 = pd.read_csv(n_dir + '/hospitals_01_2024/Hospital_General_Information.csv')#, compression='gzip')
df_2024['Hospital overall rating footnote'] = df_2024['Hospital overall rating footnote'].astype(str)
df_2024['Facility ID'] = df_2024['Facility ID'].astype(str)
df_2024 = df_2024[df_2024['Facility ID'].isin(prvdrs_2024)]

cols1 = ['Provider ID', "Measure Start Date", "Measure End Date", 'Hospital Name', 'Address 1', 'City/Town', 
         'County/Parish', 'Telephone Number']
    
cols2 = ['Facility ID', "Start Date", "End Date", 'Facility Name', 'Address', 'City', 'County Name',
         'Phone Number']
    
for i, col in enumerate(cols1):
    if col in list(df_2024):
        df_2024.rename(columns={col: cols2[i]}, inplace=True)

df_2024 = df_2024.filter(items=ls, axis=1)
df_2024['file_month'] = '01'
df_2024['file_year'] = '2024'
df_2024 = df_2024.merge(tdf, how='outer')
print('Release years:', df_2024['Release year'].unique())


## 2023
tdf = main_df[main_df['Release year'] == '2023']
prvdrs = tdf['Facility ID'].unique()
df_2023 = pd.read_csv(n_dir + '/hospitals_01_2023/Hospital_General_Information.csv')#, compression='gzip')
df_2023['Hospital overall rating footnote'] = df_2023['Hospital overall rating footnote'].astype(str)
df_2023['Facility ID'] = df_2023['Facility ID'].astype(str)
df_2023 = df_2023[df_2023['Facility ID'].isin(prvdrs_2023)]
df_2023 = df_2023.filter(items=ls, axis=1)
df_2023['file_month'] = '01'
df_2023['file_year'] = '2023'
df_2023 = df_2023.merge(tdf, how='outer')
print('Release years:', df_2023['Release year'].unique())


## 2022
tdf = main_df[main_df['Release year'] == '2022']
prvdrs = tdf['Facility ID'].unique()
df_2022 = pd.read_csv(n_dir + '/hospitals_07_2021/Hospital_General_Information.csv')#, compression='gzip')
df_2022['Hospital overall rating footnote'] = df_2022['Hospital overall rating footnote'].astype(str)
df_2022['Facility ID'] = df_2022['Facility ID'].astype(str)
df_2022 = df_2022[df_2022['Facility ID'].isin(prvdrs_2022)]
df_2022 = df_2022.filter(items=ls, axis=1)
df_2022['file_month'] = '07'
df_2022['file_year'] = '2021'
df_2022 = df_2022.merge(tdf, how='outer')
print('Release years:', df_2022['Release year'].unique())


## 2021
tdf = main_df[main_df['Release year'] == '2021']
prvdrs = tdf['Facility ID'].unique()
df_2021 = pd.read_csv(n_dir + '/hospitals_10_2020/Hospital_General_Information.csv')#, compression='gzip')
df_2021['Hospital overall rating footnote'] = df_2021['Hospital overall rating footnote'].astype(str)
df_2021['Facility ID'] = df_2021['Facility ID'].astype(str)
df_2021 = df_2021[df_2021['Facility ID'].isin(prvdrs_2021)]
df_2021 = df_2021.filter(items=ls, axis=1)
df_2021['file_month'] = '10'
df_2021['file_year'] = '2020'
df_2021 = df_2021.merge(tdf, how='outer')
print('Release years:', df_2021['Release year'].unique())

Release years: ['2025']
Release years: ['2024']
Release years: ['2023']
Release years: ['2022']
Release years: ['2021']


In [26]:
print(df_2025.shape)
print(df_2024.shape)
print(df_2023.shape)
print(df_2022.shape)
print(df_2021.shape)


(2900, 269)
(2847, 269)
(3076, 269)
(3121, 269)
(3355, 269)


In [27]:
mdf = df_2025.merge(df_2024, how='outer')
mdf = mdf.merge(df_2023, how='outer')
mdf = mdf.merge(df_2022, how='outer')
mdf = mdf.merge(df_2021, how='outer')

mdf = mdf[~mdf['Release year'].isin([np.nan, float('NaN')])]
print(mdf.shape)
print('Release years:', mdf['Release year'].unique())
mdf.head()

(15299, 269)
Release years: ['2025' '2024' '2023' '2022' '2021']


Unnamed: 0,Facility ID,Facility Name,Address,City,State,ZIP Code,County Name,Hospital Type,Hospital Ownership,Emergency Services,Meets criteria for promoting interoperability of EHRs,Hospital overall rating,Hospital overall rating footnote,file_month,file_year,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_HCP_COVID_19,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_PC_01,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,std_OP_3B,process_C12,std_OP_33,std_OP_30,std_ED_2B,process_C13,process_C14,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,HCP_COVID_19_DEN,HCP_COVID_19,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_33,OP_33_DEN,OP_30,OP_30_DEN,ED_2B,ED_2B_DEN
0,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301.0,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,Y,3,,10,2024,0.297087,0.359381,0.345929,0.031544,-0.605072,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.154858,7,7,11,8,10,5,2,1,3.0,4.0,2025,1.406667,-1.04537,1.012712,0.652001,-0.313538,0.023985,-0.732699,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,0.143394,-0.073481,0.730006,0.297087,1.021693,-0.36918,-0.44784,0.291152,0.607499,0.86006,0.191466,0.767466,-1.029383,0.627143,-0.259345,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.205521,0.013485,0.534354,0.359381,0.696915,0.303101,0.74229,-0.468871,,0.477107,-0.098662,0.103228,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.25073,-0.019735,0.781849,0.345929,-0.524696,1.04392,0.080311,0.781915,-1.064893,-1.1425,,-3.168749,0.589498,0.141249,0.446595,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,10.0,0.1,-0.281735,0.034345,0.522385,-0.605072,-1.160819,0.560234,-0.240795,-0.264121,-0.32048,0.919364,0.711438,0.013951,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.027346,1.301665e-17,0.866926,0.031544,,,,,,,,9538.0,16332.0,240.0,94.0,103195.0,103195.0,10.082,23.712,6.618,0.858,11.232,65.234,0.496,0.169,1.209,,0.445,0.491,0.141,0.044,10.6,0.177,-13.8,14.3,0.137,0.189,0.105,0.079,0.108,0.03,2924.0,34.0,679.0,130.0,296.0,490.0,414.0,489.0,610.0,122.0,291.0,32.0,0.333,,0.054,,0.021,,217.0,345.0,0.05,52960.0,,,0.47,17.0,194.78,125.0,0.98,4115.0,0.96,,0.0,0.02,46.0,0.68,131.0,17.0,2.0,4.0,3.0,3.0,3.0,4.0,4.0,3.0,643.0,,0.038,157.0,0.102,151.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,,,,,,,,,,
1,10005,MARSHALL MEDICAL CENTERS,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957.0,MARSHALL,Acute Care Hospitals,Government - Hospital District or Authority,Yes,Y,2,,10,2024,-0.842133,0.628147,-0.398706,0.114241,-0.331341,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.14942,6,7,9,8,11,5,2,1,3.0,3.0,2025,-1.493176,,0.151652,-0.365326,-1.763074,-0.0829,-0.576635,1.0,0.0,1.0,1.0,1.0,1.0,1.0,6.0,0.166667,-0.688243,-0.073481,0.730006,-0.842133,,-0.331102,0.548845,-1.034497,,2.449658,1.035812,1.652476,1.889286,-0.072125,-2.996117,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,9.0,0.111111,0.349137,0.013485,0.534354,0.628147,1.155349,-0.333682,-1.066986,1.1482,,-2.746595,-0.418689,-0.057836,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.331463,-0.019735,0.781849,-0.398706,-0.395655,-0.264002,-1.616515,0.374613,0.300777,-0.272553,-0.094732,0.33368,-1.160754,0.346866,0.922113,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.138742,0.034345,0.522385,-0.331341,-0.252167,0.560234,-1.177965,0.752728,0.66872,-0.048276,-0.233316,0.522354,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.099039,1.301665e-17,0.866926,0.114241,,,,,,,,3410.0,7200.0,102.0,33.0,35424.0,33872.0,2.074,3.262,2.867,0.315,1.631,9.503,0.964,1.226,0.0,,2.452,0.631,0.135,0.039,9.8,0.161,,-8.4,0.139,0.231,0.127,0.092,0.145,0.027,1056.0,144.0,176.0,143.0,,305.0,97.0,301.0,162.0,133.0,28.0,138.0,0.459,,0.134,,0.028,,144.0,1154.0,0.03,56820.0,0.69,13.0,0.96,180.0,191.14,43.0,1.01,2407.0,0.72,,0.026,0.01,193.0,0.76,288.0,17.0,3.0,4.0,2.0,4.0,4.0,3.0,3.0,3.5,714.0,,,,,,14.2,739.0,7.9,107.0,5.5,107.0,1.9,406.0,,,,,,,,,,
2,10006,NORTH ALABAMA MEDICAL CENTER,1701 VETERANS DRIVE,FLORENCE,AL,35630.0,LAUDERDALE,Acute Care Hospitals,Proprietary,Yes,Y,1,,10,2024,-1.540552,-0.10599,0.685865,-1.307398,-0.438027,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.55154,7,8,9,8,9,5,2,1,3.0,2.0,2025,-3.139032,-2.730472,-0.510702,-0.134116,-0.624153,-0.243229,-1.004953,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-1.198094,-0.073481,0.730006,-1.540552,-0.543063,0.368574,-0.017553,1.106936,-0.951609,0.86006,0.191466,-0.117544,,,-1.285635,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.043152,0.013485,0.534354,-0.10599,-0.831198,0.977982,1.031569,1.1482,1.108414,0.595968,0.964283,-0.863156,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.516508,-0.019735,0.781849,0.685865,-0.524696,-0.590982,-1.065046,0.316427,-0.316581,0.597395,,-0.45258,,0.552484,-0.266681,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,9.0,0.111111,-0.194473,0.034345,0.522385,-0.438027,-1.160819,-1.449464,-1.177965,-1.28097,-1.30968,-1.015916,-1.178071,-0.494452,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-1.133417,1.301665e-17,0.866926,-1.307398,,,,,,,,4826.0,8700.0,73.0,116.0,68020.0,64728.0,5.279,11.164,1.935,1.086,5.397,37.771,0.0,0.0,0.0,0.0,0.371,0.026,0.147,0.044,-4.9,0.177,13.4,4.5,0.142,0.198,0.122,0.102,0.166,0.04,2560.0,90.0,508.0,154.0,315.0,621.0,271.0,616.0,455.0,141.0,292.0,84.0,,,0.108,,0.029,,177.0,349.0,0.01,42286.0,,,0.85,82.0,201.13,102.0,1.16,2560.0,0.66,,0.0,0.0,22.0,0.56,162.0,18.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.5,1620.0,,0.052,102.0,0.115,91.0,12.1,1355.0,,,,,1.4,484.0,,,,,,,,,,
3,10007,MIZELL MEMORIAL HOSPITAL,702 N MAIN ST,OPP,AL,36467.0,COVINGTON,Acute Care Hospitals,Voluntary non-profit - Private,Yes,Y,1,,10,2024,-3.327859,-0.931837,-0.320822,1.283103,-3.018509,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-1.087652,3,3,7,8,7,5,2,1,3.0,1.0,2025,,,-2.69647,-1.012717,-3.799328,,,0.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0,0.333333,-2.502838,-0.073481,0.730006,-3.327859,,-0.859429,-0.763969,-0.218713,,-0.431487,0.191466,-0.707551,,,-0.601442,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,7.0,0.142857,-0.484446,0.013485,0.534354,-0.931837,0.08567,,,,,,-1.269045,0.371668,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,0.333333,-0.270569,-0.019735,0.781849,-0.320822,-0.524696,-2.770851,-0.322685,,0.581394,-0.707526,,-4.884224,,,-2.168752,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,7.0,0.142857,-1.542477,0.034345,0.522385,-3.018509,1.565137,1.565082,0.696376,1.769578,1.657921,0.919364,0.711438,0.013951,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,1.112356,1.301665e-17,0.866926,1.283103,,,,,,,,235.0,1369.0,4.0,,5267.0,5267.0,0.144,0.747,0.096,,0.101,1.994,,,,,,1.003,0.151,0.044,20.9,0.19,,21.5,,0.29,0.141,0.135,,0.034,234.0,33.0,35.0,41.0,,85.0,,101.0,33.0,42.0,,33.0,,,0.073,,,,129.0,594.0,0.04,11202.0,,,0.23,111.0,,,0.93,345.0,0.26,,0.0,,,0.24,21.0,24.0,5.0,5.0,4.0,5.0,5.0,4.0,4.0,3.0,175.0,,,,,,13.4,109.0,,,,,1.2,59.0,,,,,,,,,,
4,10011,ST. VINCENT'S EAST,50 MEDICAL PARK EAST DRIVE,BIRMINGHAM,AL,35235.0,JEFFERSON,Acute Care Hospitals,Voluntary non-profit - Private,Yes,Y,3,,10,2024,-0.553031,-0.59124,0.30465,-0.043858,-1.017479,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.316463,7,7,9,8,7,5,2,1,3.0,2.0,2025,-0.160816,1.000826,-0.775644,-0.966475,-1.555997,0.451529,-1.333802,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.477197,-0.073481,0.730006,-0.553031,0.941154,-1.292562,-0.096585,0.393125,-1.431334,-0.133438,0.360335,-1.887565,,,0.424848,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.302447,0.013485,0.534354,-0.59124,-0.525576,-0.03434,0.519768,1.1482,,0.27151,-0.114664,0.264292,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.218456,-0.019735,0.781849,0.30465,-0.484991,,-0.428737,,-0.559782,-1.1425,-1.45485,0.619592,,,-0.028923,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,7.0,0.142857,-0.49717,0.034345,0.522385,-1.017479,-0.252167,0.560234,-0.240795,-0.264121,0.66872,-0.048276,-0.233316,-0.494452,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.038022,1.301665e-17,0.866926,-0.043858,,,,,,,,9905.0,10399.0,117.0,19.0,80106.0,80106.0,10.751,13.381,3.261,0.178,8.719,42.14,0.744,0.299,0.0,,0.573,0.498,0.159,0.043,30.0,0.187,-12.4,6.3,0.129,0.225,0.14,0.106,0.128,0.038,1636.0,32.0,294.0,58.0,148.0,344.0,86.0,336.0,258.0,55.0,136.0,28.0,,,0.078,,,,190.0,333.0,0.05,41057.0,0.43,23.0,1.0,24.0,208.8,67.0,0.95,,,,0.008,,,0.6,205.0,26.0,3.0,4.0,3.0,3.0,4.0,3.0,3.0,2.5,1334.0,,0.021,70.0,0.119,70.0,12.8,68.0,,,,,0.9,249.0,,,,,,,,,,


In [28]:
df = pd.read_pickle('~/GitHub/hospitals-data-archive/files_for_app/GenDat4App.pkl')

df2 = df.set_index(('Curated Name and Num', 'Curated Name and Num', 'Curated Name and Num', 'Curated Name and Num'))
df2.columns = df2.columns.droplevel([0,1,2])
df2 = df2.reset_index()
df2.rename(columns={'Hospital Provider Number (PRVDR_NUM)': 'Facility ID',
                    'Total Facility (S3_1_C2_27)': 'Beds',
                   }, inplace=True)
df2 = df2.filter(items=['Facility ID', 'Beds', 'Lat', 'Lon'], axis=1)

df2['Beds'] = df2.groupby(['Facility ID'])['Beds'].transform('mean')
df2['Beds'] = np.round(df2['Beds'], 0)

prvdrs = main_df['Facility ID'].unique()
df2 = df2[df2['Facility ID'].isin(prvdrs)]

print(len(prvdrs))
print(len(df2['Facility ID'].unique()))

df2.drop_duplicates(inplace=True)
print(df2.shape)
df2.head()


3562
3433
(3433, 4)


Unnamed: 0,Facility ID,Beds,Lat,Lon
0,10001,377.0,31.214058,-85.361725
12,10005,193.0,,
24,10006,314.0,34.802756,-87.652191
36,10007,64.0,31.291972,-86.255415
48,10008,49.0,31.692595,-86.266156


In [29]:
hoarc = sorted(main_df['Facility ID'].unique())
hcris = sorted(df2['Facility ID'].unique())

if hoarc == hcris: 
    print('same')
else:
    ls1 = np.setdiff1d(hoarc, hcris)
    print(len(ls1), 'hospitals in hospitals data archive that are not in hcris:')
    for i in ls1:
        print(i, "and other 'F' hospitals")
        break

    ls2 = np.setdiff1d(hcris, hoarc)
    print(len(ls2), 'hospitals in hcris that are not in hospitals data archive')


129 hospitals in hospitals data archive that are not in hcris:
01014F and other 'F' hospitals
0 hospitals in hcris that are not in hospitals data archive


In [30]:
main_df = mdf.merge(df2, how='outer', on=['Facility ID'])
main_df = main_df[~main_df['Facility ID'].isin([np.nan, float('NaN')])]

In [31]:
zips1 = main_df['ZIP Code'].tolist()
zips2 = []
for z in zips1:
    try:
        z = int(z)
        z = str(z)
        zips2.append(z)
    except:
        zips2.append(np.nan)
        
main_df['ZIP Code'] = zips2
print(main_df.shape)
main_df.head()


(15299, 272)


Unnamed: 0,Facility ID,Facility Name,Address,City,State,ZIP Code,County Name,Hospital Type,Hospital Ownership,Emergency Services,Meets criteria for promoting interoperability of EHRs,Hospital overall rating,Hospital overall rating footnote,file_month,file_year,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_HCP_COVID_19,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_PC_01,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,std_OP_3B,process_C12,std_OP_33,std_OP_30,std_ED_2B,process_C13,process_C14,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,HCP_COVID_19_DEN,HCP_COVID_19,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_33,OP_33_DEN,OP_30,OP_30_DEN,ED_2B,ED_2B_DEN,Beds,Lat,Lon
0,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,Y,3,,10,2024,0.297087,0.359381,0.345929,0.031544,-0.605072,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.154858,7,7,11,8,10,5,2,1,3.0,4.0,2025,1.406667,-1.04537,1.012712,0.652001,-0.313538,0.023985,-0.732699,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,0.143394,-0.073481,0.730006,0.297087,1.021693,-0.36918,-0.44784,0.291152,0.607499,0.86006,0.191466,0.767466,-1.029383,0.627143,-0.259345,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.205521,0.013485,0.534354,0.359381,0.696915,0.303101,0.74229,-0.468871,,0.477107,-0.098662,0.103228,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.25073,-0.019735,0.781849,0.345929,-0.524696,1.04392,0.080311,0.781915,-1.064893,-1.1425,,-3.168749,0.589498,0.141249,0.446595,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,10.0,0.1,-0.281735,0.034345,0.522385,-0.605072,-1.160819,0.560234,-0.240795,-0.264121,-0.32048,0.919364,0.711438,0.013951,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.027346,1.301665e-17,0.866926,0.031544,,,,,,,,9538.0,16332.0,240.0,94.0,103195.0,103195.0,10.082,23.712,6.618,0.858,11.232,65.234,0.496,0.169,1.209,,0.445,0.491,0.141,0.044,10.6,0.177,-13.8,14.3,0.137,0.189,0.105,0.079,0.108,0.03,2924.0,34.0,679.0,130.0,296.0,490.0,414.0,489.0,610.0,122.0,291.0,32.0,0.333,,0.054,,0.021,,217.0,345.0,0.05,52960.0,,,0.47,17.0,194.78,125.0,0.98,4115.0,0.96,,0.0,0.02,46.0,0.68,131.0,17.0,2.0,4.0,3.0,3.0,3.0,4.0,4.0,3.0,643.0,,0.038,157.0,0.102,151.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,,,,,,,,,,,377.0,31.214058,-85.361725
1,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,Y,3,,1,2024,0.007071,0.230867,-0.146867,0.137779,-0.677624,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.030968,7,7,11,8,10,5,2,1,3.0,3.0,2024,0.470514,-1.535074,0.282996,1.510467,0.129818,-0.554262,-0.748332,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.063411,-0.068391,0.704371,0.007071,1.123231,-0.791585,-0.63392,0.29207,0.584835,0.293805,0.886353,0.423838,-1.03233,0.626949,-0.252865,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.138216,0.014038,0.537879,0.230867,0.879845,-0.232451,0.488068,-0.71166,,-0.210159,0.003034,-1.162386,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.135101,-0.021526,0.773315,-0.146867,-0.431525,0.940083,-0.029295,0.419351,-0.942734,-1.140182,,-3.129707,0.000412,0.583665,0.371166,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,10.0,0.1,-0.335877,0.029039,0.538522,-0.677624,-0.373358,-0.400063,-0.243371,0.124487,0.479704,0.876397,0.133873,0.361496,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.119895,1.093644e-15,0.870204,0.137779,,1.0,,,,,,9149.0,17310.0,214.0,,104733.0,104733.0,9.597,24.766,5.994,,11.4,67.066,0.938,0.363,1.335,,0.965,0.507,0.142,0.038,23.4,0.19,-15.4,23.6,0.148,0.18,0.089,0.088,0.12,0.027,2912.0,49.0,614.0,117.0,274.0,403.0,398.0,400.0,549.0,107.0,278.0,49.0,0.38,79.0,0.061,1410.0,0.028,178.0,214.0,348.0,0.05,52960.0,,,0.47,17.0,184.68,134.0,1.21,3905.0,0.95,2496.0,0.836,0.0,32.0,0.65,127.0,15.0,3.0,3.0,3.0,3.0,4.0,4.0,3.5,3.5,544.0,2542.0385,0.041,132.0,0.105,126.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,,,,,,,,,,,377.0,31.214058,-85.361725
2,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,Y,3,,1,2023,-0.647058,0.284081,0.312074,-0.127836,-1.024044,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.162208,7,8,11,8,10,5,2,1,3.0,3.0,2023,-0.041613,-2.19781,-0.019543,1.708727,0.366398,-1.543654,-1.741473,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.495567,-0.05126,0.686658,-0.647058,0.207832,-0.702681,0.28604,0.105508,0.219318,-0.048609,-0.198058,0.924456,0.159253,0.872692,0.062022,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.171616,0.020822,0.530811,0.284081,0.024204,0.444297,0.745807,-0.407579,1.025843,0.314693,-0.315731,-0.212425,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.202389,-0.013939,0.693192,0.312074,-1.039363,0.992908,0.10185,-1.77947,-0.904124,-0.237088,,-0.634479,0.488322,-1.763346,-0.609958,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,10.0,0.1,-0.538475,0.030917,0.556023,-1.024044,-1.07585,-0.175531,-1.051153,0.808598,0.68837,0.037272,-0.445032,0.339427,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.109237,2.611408e-16,0.85451,-0.127836,,1.0,,,,,,10024.0,17731.0,154.0,200.0,101908.0,101451.0,10.597,26.63,4.548,1.845,9.412,72.686,0.661,0.3,1.099,0.0,0.85,0.66,0.142,0.042,21.8,0.199,1.9,-1.5,0.164,0.159,0.083,0.085,0.124,0.024,3058.0,98.0,755.0,202.0,319.0,436.0,489.0,407.0,630.0,182.0,317.0,102.0,0.425,146.0,0.057,1488.0,0.067,208.0,205.0,323.0,0.03,51079.0,,,0.81,16.0,173.39,120.0,1.01,3795.0,0.97,2323.0,0.737,0.09,34.0,0.46,146.0,15.0,2.0,3.0,2.0,4.0,4.0,3.0,3.0,3.5,434.0,2046.895485,0.047,172.0,0.117,165.0,14.1,254.0,10.2,214.0,4.7,214.0,1.0,688.0,,,,,,,,,,,377.0,31.214058,-85.361725
3,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Houston,Acute Care Hospitals,Government - Hospital District or Authority,Yes,,3,,7,2021,-0.846744,-0.321758,0.591483,0.0721,-0.320038,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.149487,7,8,11,8,9,5,2,1,3.0,3.0,2022,0.703271,-1.054647,-0.072635,1.355101,0.507204,-3.873239,-1.364075,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.542717,0.000879,0.641984,-0.846744,0.012064,-0.457136,0.497621,1.664542,-0.606328,-1.47008,-1.684933,-0.053004,0.60215,-0.703618,0.623486,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.143203,0.027686,0.531111,-0.321758,-0.427377,0.325583,0.722043,0.29595,1.045465,0.077072,-0.002603,1.040003,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.384517,0.002796,0.645363,0.591483,,0.643289,0.376713,-0.520081,-0.906488,-0.863287,,0.009932,-0.897899,0.442175,-0.264434,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,9.0,0.111111,-0.220009,0.040971,0.815466,-0.320038,-0.536306,-0.084743,-0.269007,-0.072623,0.801375,-0.116805,0.284308,0.481635,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.060979,-5.363828e-17,0.84576,0.0721,,1.0,,,,,,8391.0,13866.0,171.0,229.0,102470.0,102470.0,8.711,19.223,4.938,2.025,7.631,78.514,0.574,0.312,0.608,0.0,0.786,0.56,0.155,0.049,16.1,0.212,6.5,-7.1,0.198,0.145,0.089,0.083,0.116,0.026,1805.0,165.0,904.0,310.0,424.0,503.0,533.0,474.0,725.0,270.0,412.0,175.0,0.459,122.0,0.04,925.0,0.048,147.0,183.0,176.0,0.03,59762.0,,,0.9,52.0,184.28,115.0,0.81,4817.0,0.97,,,0.0,18.0,0.55,56.0,21.0,3.0,3.0,3.0,3.0,4.0,3.0,3.5,3.5,507.0,2828.228824,0.038,200.0,0.135,193.0,14.1,511.0,11.3,192.0,6.7,192.0,0.9,1003.0,,,,,,,,,,,377.0,31.214058,-85.361725
4,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,Y,2,,10,2020,-0.752932,-0.070355,0.556127,0.071425,-0.050316,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.0491,7,8,11,8,11,5,2,1,3.0,3.0,2021,0.730853,-2.055159,0.069106,0.103403,-0.022557,-1.941449,-0.327016,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.491831,-0.004941,0.64666,-0.752932,-0.027028,-0.705673,1.039984,2.185101,-1.542989,-1.037529,-1.164917,-0.070634,1.1826,-1.18578,1.207127,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.010885,0.027901,0.551294,-0.070355,0.303431,-0.078888,0.680925,0.244905,1.037183,0.403644,0.098335,0.31923,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.376096,0.009921,0.658437,0.556127,,0.644574,-0.102729,0.684379,-0.761776,-0.979501,,-0.459111,0.140655,0.473469,0.031191,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,11.0,0.090909,0.007553,0.037755,0.600242,-0.050316,-0.537213,-0.085263,-0.269656,-0.073296,0.801144,-0.115323,0.284361,0.478432,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.060398,-1.104271e-16,0.845613,0.071425,,1.0,,0.466001,-0.054072,1.0,1.0,7846.0,13268.0,165.0,216.0,102765.0,102765.0,8.086,18.498,4.615,1.884,7.305,76.294,0.742,0.324,0.65,0.0,0.548,0.537,0.156,0.046,21.6,0.207,7.0,-20.6,0.166,0.157,0.113,0.084,0.119,0.023,4474.0,258.0,1106.0,443.0,620.0,594.0,644.0,554.0,858.0,374.0,586.0,250.0,0.389,211.0,0.072,2117.0,0.028,211.0,178.0,349.0,0.03,57844.0,,,0.81,64.0,170.87,165.0,0.93,4817.0,0.97,,,0.0,24.0,0.6,102.0,21.0,3.0,3.0,3.0,3.0,4.0,3.0,3.5,3.5,507.0,3896.300852,0.047,281.0,0.149,268.0,13.4,606.0,10.8,190.0,7.1,190.0,0.8,993.0,,,,,,,0.98,162.0,103.0,655.0,377.0,31.214058,-85.361725


In [32]:

main_df.replace({'Facility Name': {'COPLEY MEMORIAL HOSPITAL': 'RUSH COPLEY'}}, inplace = True)
main_df['Name and Num'] = main_df['Facility Name'] + ' (' + main_df['Facility ID'] + ')'
main_df = main_df[~main_df['Name and Num'].isin([np.nan, float('NaN')])]
main_df['State'] = main_df['State'].replace(np.nan, 'Not given')
main_df['Hospital Type'] = main_df['Hospital Type'].replace(np.nan, 'Not given')
main_df['Hospital Ownership'] = main_df['Hospital Ownership'].replace(np.nan, 'Not given')


In [33]:
tdf = main_df[main_df['Release year'] == '2024']
tdf = tdf[tdf['Facility ID'] == '140119']
print(tdf.shape)
tdf.head()

(1, 273)


Unnamed: 0,Facility ID,Facility Name,Address,City,State,ZIP Code,County Name,Hospital Type,Hospital Ownership,Emergency Services,Meets criteria for promoting interoperability of EHRs,Hospital overall rating,Hospital overall rating footnote,file_month,file_year,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_HCP_COVID_19,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_PC_01,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,std_OP_3B,process_C12,std_OP_33,std_OP_30,std_ED_2B,process_C13,process_C14,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,HCP_COVID_19_DEN,HCP_COVID_19,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_33,OP_33_DEN,OP_30,OP_30_DEN,ED_2B,ED_2B_DEN,Beds,Lat,Lon,Name and Num
3961,140119,RUSH UNIVERSITY MEDICAL CENTER,1653 WEST CONGRESS PARKWAY,CHICAGO,IL,60612,COOK,Acute Care Hospitals,Voluntary non-profit - Private,Yes,Y,5,,1,2024,2.79693,0.201855,0.085723,0.657863,-0.396658,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.775723,7,8,11,8,10,5,2,1,3.0,5.0,2024,0.989852,0.99615,1.898914,3.264295,2.645093,1.644249,1.873249,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,1.901686,-0.068391,0.704371,2.79693,-0.010707,-0.732692,-0.178886,1.723258,-0.492687,1.248099,0.319722,-0.495748,-1.689883,1.911113,-0.252865,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.122611,0.014038,0.537879,0.201855,-1.491252,0.131755,0.616551,0.629388,0.32231,-0.731715,-0.020107,0.90119,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.044765,-0.021526,0.773315,0.085723,1.123714,1.103375,-0.810318,-0.440709,-3.393917,0.592956,,0.406658,1.371101,0.583665,-2.382227,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,10.0,0.1,-0.18457,0.029039,0.538522,-0.396658,0.617053,0.659449,-0.243371,0.124487,0.479704,0.876397,1.182464,0.883622,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.572475,1.093644e-15,0.870204,0.657863,,1.0,,,,,,30819.0,13216.0,267.0,350.0,158486.0,138471.0,33.976,21.149,7.412,2.912,7.606,108.008,0.677,0.284,0.405,0.687,1.315,0.518,0.15,0.041,22.0,0.179,6.6,11.5,0.108,0.116,0.055,0.066,0.114,0.04,4725.0,636.0,606.0,100.0,122.0,264.0,242.0,225.0,473.0,83.0,83.0,646.0,0.281,128.0,0.1,2072.0,0.043,399.0,349.0,331.0,0.01,60593.0,,,0.97,96.0,128.8,325.0,0.84,15349.0,0.98,14781.0,0.991,0.0,20.0,0.18,92.0,16.0,4.0,4.0,3.0,3.0,4.0,4.0,4.5,4.0,2954.0,5938.526338,0.023,97.0,0.115,95.0,11.5,1720.0,12.8,779.0,3.8,779.0,1.1,1145.0,,,,,,,,,,,677.0,41.875166,-87.66851,RUSH UNIVERSITY MEDICAL CENTER (140119)


In [34]:
## save main_df to file
main_df.to_pickle(stars_dir + 'FilesForApp/hosp_stars_dat.pkl')

In [35]:
main_df = pd.read_pickle(stars_dir + 'FilesForApp/hosp_stars_dat.pkl')
tdf = main_df[main_df['Facility ID'] == '140119']
tdf = tdf[tdf['Release year'] == '2024']
tdf.head()

Unnamed: 0,Facility ID,Facility Name,Address,City,State,ZIP Code,County Name,Hospital Type,Hospital Ownership,Emergency Services,Meets criteria for promoting interoperability of EHRs,Hospital overall rating,Hospital overall rating footnote,file_month,file_year,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_HCP_COVID_19,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_PC_01,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,std_OP_3B,process_C12,std_OP_33,std_OP_30,std_ED_2B,process_C13,process_C14,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,HCP_COVID_19_DEN,HCP_COVID_19,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_33,OP_33_DEN,OP_30,OP_30_DEN,ED_2B,ED_2B_DEN,Beds,Lat,Lon,Name and Num
3961,140119,RUSH UNIVERSITY MEDICAL CENTER,1653 WEST CONGRESS PARKWAY,CHICAGO,IL,60612,COOK,Acute Care Hospitals,Voluntary non-profit - Private,Yes,Y,5,,1,2024,2.79693,0.201855,0.085723,0.657863,-0.396658,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.775723,7,8,11,8,10,5,2,1,3.0,5.0,2024,0.989852,0.99615,1.898914,3.264295,2.645093,1.644249,1.873249,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,1.901686,-0.068391,0.704371,2.79693,-0.010707,-0.732692,-0.178886,1.723258,-0.492687,1.248099,0.319722,-0.495748,-1.689883,1.911113,-0.252865,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.122611,0.014038,0.537879,0.201855,-1.491252,0.131755,0.616551,0.629388,0.32231,-0.731715,-0.020107,0.90119,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.044765,-0.021526,0.773315,0.085723,1.123714,1.103375,-0.810318,-0.440709,-3.393917,0.592956,,0.406658,1.371101,0.583665,-2.382227,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,10.0,0.1,-0.18457,0.029039,0.538522,-0.396658,0.617053,0.659449,-0.243371,0.124487,0.479704,0.876397,1.182464,0.883622,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.572475,1.093644e-15,0.870204,0.657863,,1.0,,,,,,30819.0,13216.0,267.0,350.0,158486.0,138471.0,33.976,21.149,7.412,2.912,7.606,108.008,0.677,0.284,0.405,0.687,1.315,0.518,0.15,0.041,22.0,0.179,6.6,11.5,0.108,0.116,0.055,0.066,0.114,0.04,4725.0,636.0,606.0,100.0,122.0,264.0,242.0,225.0,473.0,83.0,83.0,646.0,0.281,128.0,0.1,2072.0,0.043,399.0,349.0,331.0,0.01,60593.0,,,0.97,96.0,128.8,325.0,0.84,15349.0,0.98,14781.0,0.991,0.0,20.0,0.18,92.0,16.0,4.0,4.0,3.0,3.0,4.0,4.0,4.5,4.0,2954.0,5938.526338,0.023,97.0,0.115,95.0,11.5,1720.0,12.8,779.0,3.8,779.0,1.1,1145.0,,,,,,,,,,,677.0,41.875166,-87.66851,RUSH UNIVERSITY MEDICAL CENTER (140119)


In [36]:
main_df = pd.read_pickle(stars_dir + 'FilesForApp/hosp_stars_dat.pkl')

CCNs = main_df['Facility ID'].unique().tolist()

ct = 0

for ccn in CCNs:
    tdf = main_df[main_df['Facility ID'] == ccn]
    Names = tdf['Facility Name'].unique().tolist()

    if len(Names) > 1:
        print('CCN:', ccn)
        print('Names:', Names, '\n')
        ct += 1
            
print('No. of CCNs associated with more than one name:', ct)
    

CCN: 010001
Names: ['SOUTHEAST HEALTH MEDICAL CENTER', 'SOUTHEAST ALABAMA MEDICAL CENTER'] 

CCN: 010029
Names: ['THE EAST ALABAMA HEALTHCARE AUTHORITY', 'EAST ALABAMA MEDICAL CENTER AND SNF'] 

CCN: 010090
Names: ['USA HEALTH HCA PROVIDENCE HOSPITAL, LLC', 'PROVIDENCE HOSPITAL'] 

CCN: 010092
Names: ['DCH REGIONAL MEDICAL CENTER', 'D C H REGIONAL MEDICAL CENTER'] 

CCN: 030014
Names: ['HONOR HEALTH JOHN C. LINCOLN MEDICAL CENTER', 'JOHN C. LINCOLN NORTH MOUNTAIN HOSPITAL'] 

CCN: 030038
Names: ['HONORHEALTH SCOTTSDALE OSBORN MEDICAL CENTER', 'SCOTTSDALE OSBORN MEDICAL CENTER'] 

CCN: 030094
Names: ['ABRAZO ARROWHEAD HOSPITAL', 'ARROWHEAD HOSPITAL'] 

CCN: 030123
Names: ['HONORHEALTH SCOTTSDALE THOMPSON PEAK MED CTR', 'HONOR HEALTH THOMPSON PEAK MEDICAL CENTER', 'SCOTTSDALE THOMPSON PEAK MEDICAL CENTER'] 

CCN: 040029
Names: ['CONWAY REGIONAL MEDICAL CENTER, INC', 'CONWAY REGIONAL HEALTH SYSTEM'] 

CCN: 040051
Names: ['BAPTIST HEALTH MEDICAL CENTER-DREW COUNTY', 'DREW MEMORIAL HEALTH S

CCN: 110128
Names: ['MEMORIAL HEALTH MEADOWS HOSPITAL', 'MEADOWS REGIONAL MEDICAL CENTER'] 

CCN: 110143
Names: ['WELLSTAR COBB MEDICAL CENTER', 'WELLSTAR COBB HOSPITAL'] 

CCN: 110153
Names: ['HOUSTON HEALTHCARE', 'PERRY HOSPITAL'] 

CCN: 110168
Names: ['REDMOND REGIONAL MEDICAL CENTER', 'ADVENTHEALTH REDMOND'] 

CCN: 110184
Names: ['WELLSTAR DOUGLAS MEDICAL CENTER', 'WELLSTAR DOUGLAS HOSPITAL'] 

CCN: 110192
Names: ['PIEDMONT EASTSIDE MEDICAL CENTER', 'EASTSIDE MEDICAL CENTER'] 

CCN: 110233
Names: ['SOUTHEASTERN REGIONAL MEDICAL CENTER, INC', 'SOUTHEASTERN REGIONAL MEDICAL CENTER'] 

CCN: 111310
Names: ['ATRIUM HEALTH NAVICENT PEACH', 'MEDICAL CENTER OF PEACH COUNTY, NAVICENT HEALTH'] 

CCN: 120028
Names: ['NORTH HAWAII COMMUNITY HOSPITAL, INC', 'NORTH HAWAII COMMUNITY HOSPITAL'] 

CCN: 140008
Names: ['LOYOLA GOTTLIEB MEMORIAL HOSPITAL', 'GOTTLIEB MEMORIAL HOSPITAL'] 

CCN: 140011
Names: ['HERRIN HOSPITAL', 'SOUTHERN ILLINOIS HOSPITAL SERVICES DBA HERRIN HOSPITAL'] 

CCN: 140046
Nam

CCN: 230110
Names: ['COREWELL HEALTH LUDINGTON HOSPITAL', 'SPECTRUM HEALTH LUDINGTON HOSPITAL'] 

CCN: 230142
Names: ['COREWELL HEALTH WAYNE HOSPITAL', 'BEAUMONT HOSPITAL - WAYNE'] 

CCN: 230146
Names: ['HENRY FORD HEALTH WYANDOTTE HOSPITAL', 'HENRY FORD WYANDOTTE HOSPITAL'] 

CCN: 230151
Names: ['BEAUMONT HOSPITAL - FARMINGTON HILLS', 'BEAUMONT FARMINGTON HILLS'] 

CCN: 230156
Names: ['TRINITY HEALTH ANN ARBOR HOSPITAL', 'ST JOSEPH MERCY HOSPITAL'] 

CCN: 230176
Names: ['COREWELL HEALTH TRENTON HOSPITAL', 'BEAUMONT HOSPITAL - TRENTON'] 

CCN: 230197
Names: ['ASCENSION GENESYS HOSPITAL', 'GENESYS REGIONAL MEDICAL CENTER - HEALTH PARK'] 

CCN: 230222
Names: ['MYMICHIGAN MEDICAL CENTER MIDLAND', 'MIDMICHIGAN MEDICAL CENTER-MIDLAND'] 

CCN: 230236
Names: ['UNIVERSITY OF MICHIGAN HEALTH - WEST', 'METRO HEALTH HOSPITAL'] 

CCN: 230254
Names: ['ASCENSION PROVIDENCE ROCHESTER HOSPITAL', 'CRITTENTON HOSPITAL MEDICAL CENTER'] 

CCN: 230259
Names: ['CHELSEA HOSPITAL', 'ST JOSEPH MERCY CHELSEA'] 

CCN: 380021
Names: ['HILLSBORO MEDICAL CENTER', 'TUALITY COMMUNITY HOSPITAL'] 

CCN: 380052
Names: ['SAINT ALPHONSUS MEDICAL CENTER - ONTARIO', 'SAINT ALPHONSUS MEDICAL CENTER - ONTARIO, INC'] 

CCN: 380103
Names: ['KAISER FOUNDATION HOSPITAL WESTSIDE', 'KAISER FOUNDATION HOSPITAL - WESTSIDE'] 

CCN: 390004
Names: ['PENN STATE HEALTH HOLY SPIRIT MEDICAL CENTER', 'HOLY SPIRIT HOSPITAL'] 

CCN: 390012
Names: ['LANSDALE HOSPITAL', 'ABINGTON HEALTH LANSDALE HOSPITAL'] 

CCN: 390026
Names: ['TEMPLE HEALTH - CHESTNUT HILL HOSPITAL', 'CHESTNUT HILL HOSPITAL'] 

CCN: 390045
Names: ['UPMC WILLIAMSPORT', 'WILLIAMSPORT REGIONAL MEDICAL CENTER'] 

CCN: 390046
Names: ['WELLSPAN YORK HOSPITAL', 'YORK HOSPITAL'] 

CCN: 390057
Names: ['GRAND VIEW HEALTH', 'GRAND VIEW HOSPITAL'] 

CCN: 390058
Names: ['UPMC CARLISLE', 'CARLISLE REGIONAL MEDICAL CENTER'] 

CCN: 390062
Names: ['CONEMAUGH NASON MEDICAL CENTER', 'NASON MEDICAL CENTER, LLC'] 

CCN: 390063
Names: ['UPMC HAMOT', 'UPMC HAMOT HOSPITAL'] 

CCN: 3

CCN: 440137
Names: ['VANDERBILT BEDFORD HOSPITAL', 'TENNOVA HEALTHCARE-SHELBYVILLE'] 

CCN: 511317
Names: ['CAMC PLATEAU MEDICAL CENTER, INC', 'PLATEAU MEDICAL CENTER'] 

CCN: 010038
Names: ['STRINGFELLOW CAMPUS OF NORTHEAST RMC', 'STRINGFELLOW MEMORIAL HOSPITAL'] 

CCN: 041312
Names: ['BAPTIST HEALTH MEDICAL CENTER HEBER SPRINGS', 'BAPTIST HEALTH MEDICAL CENTER HEBER SPINGS'] 

CCN: 050747
Names: ['COASTAL COMMUNITIES HOSPITAL', 'SOUTH COAST GLOBAL MEDICAL CENTER'] 

CCN: 060118
Names: ['ST ANTHONY SUMMIT MEDICAL CENTER', 'ST. ANTHONY SUMMIT MEDICAL CENTER'] 

CCN: 140137
Names: ['HSHS HOLY FAMILY HOSPITAL INC', 'HSHS HOLY FAMILY HOSPIAL INC'] 

CCN: 141337
Names: ['OSF SAINT CLARE MEDICAL CENTER', 'PERRY MEMORIAL HOSPITAL'] 

CCN: 150004
Names: ['FRANCISCAN HEALTH HAMMOND', 'FRANCISCAN ST. MARGARET HEALTH HAMMOND'] 

CCN: 160008
Names: ['BLESSING HEALTH KEOKUK', 'KEOKUK AREA HOSPITAL', 'UNITYPOINT HEALTH - KEOKUK'] 

CCN: 231325
Names: ['MYMICHIGAN MEDICAL CENTER GLADWIN', 'MIDMICHIG