In [1]:
import pandas as pd
import warnings
from IPython.utils import io
import sys
import numpy as np
from functools import reduce

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

stars_dir = '~/GitHub/stars-data-builder/'
hos_dir = '~/Desktop/Rush/CMS_HospitalArchives/'

dates_df = pd.DataFrame(columns = ['Measure ID', 'Start Date', 'End Date'])

# 
----

# Compare 2023 publicly released results to 2024 results

In [2]:
def compare_2023_2024(df_2023, df_2024):
    
    df_2023 = df_2023[~df_2023['star'].isin([np.nan, float("NaN")])]
    df_2024 = df_2024[~df_2024['star'].isin([np.nan, float("NaN")])]
    
    ## Replace the imputed 666666 suffixes of VHA hospitals with their original 'F' suffix
    prvdrs_2024 = []
    for p in df_2024['PROVIDER_ID'].tolist():
        p = str(p)
        if '666666' in p:
            p = p[:-6]
            p = p + 'F'
        while len(p) < 6:
            p = '0' + p
            
        prvdrs_2024.append(p)
    
    df_2024['PROVIDER_ID'] = prvdrs_2024

    prvdrs_2023 = df_2023['PROVIDER_ID'].unique()
    print('Hospitals with overall star ratings in 2023:', df_2023.shape[0])

    prvdrs_2024 = df_2024['PROVIDER_ID'].unique()
    print('Hospitals with overall star ratings in 2024:', df_2024.shape[0], '\n')

    ls = np.setdiff1d(list(prvdrs_2023), list(prvdrs_2024)).tolist()
    print(len(ls), "hospitals that had star ratings in 2023 but not 2024")

    ls = np.setdiff1d(list(prvdrs_2024), list(prvdrs_2023)).tolist()
    print(len(ls), "hospitals that had star ratings in 2024 but not 2023")

    ls = list(set(prvdrs_2023) & set(prvdrs_2024))
    df_2024 = df_2024[df_2024['PROVIDER_ID'].isin(ls)]
    df_2024.sort_values(by='PROVIDER_ID', ascending=False, inplace=True)
    df_2023 = df_2023[df_2023['PROVIDER_ID'].isin(ls)]
    df_2023.sort_values(by='PROVIDER_ID', ascending=False, inplace=True)

    prvdrs = df_2023['PROVIDER_ID'].unique()
    df_2024 = df_2024[df_2024['PROVIDER_ID'].isin(prvdrs)]

    prvdrs = df_2024['PROVIDER_ID'].unique()
    df_2023 = df_2023[df_2023['PROVIDER_ID'].isin(prvdrs)]

    df_2024.sort_values(by=['PROVIDER_ID'], inplace=True)
    df_2023.sort_values(by=['PROVIDER_ID'], inplace=True)

    if df_2024['PROVIDER_ID'].tolist() == df_2023['PROVIDER_ID'].tolist():
        print('After filtering, the ordered list of providers in pred and actual are the same')
    else:
        print('After filtering, the ordered list of providers in pred and actual are NOT the same')

    print('df_2023.shape:', df_2023.shape)
    print('df_2024.shape:', df_2024.shape, '\n')

    stars = [1,2,3,4,5]

    for star in stars:
        print('Changes for hospitals that were', star, 'star 2023:')

        tdf_2023 = df_2023[df_2023['star'] == star]
        prvdrs = tdf_2023['PROVIDER_ID'].tolist()
        tdf_2024 = df_2024[df_2024['PROVIDER_ID'].isin(prvdrs)]

        print(tdf_2024.shape[0], star, 'hospitals')
        stars_ls = tdf_2024['star'].tolist()

        print('1 star:', stars_ls.count(1))
        print('2 star:', stars_ls.count(2))
        print('3 star:', stars_ls.count(3))
        print('4 star:', stars_ls.count(4))
        print('5 star:', stars_ls.count(5), '\n')
        
    print('No. of hospitals where the change in the star rating was greater than 1')
    
    stars_2023 = df_2023['star'].tolist()
    stars_2024 = df_2024['star'].tolist()
    
    hosps_2023 = df_2023['PROVIDER_ID'].tolist()
    hosps_2024 = df_2024['PROVIDER_ID'].tolist()
    
    diffs = []
    for i, hosp in enumerate(hosps_2023):
        star_2023 = stars_2023[i]
        
        if hosp in hosps_2024:
            ii = hosps_2024.index(hosp)
            star_2024 = stars_2024[ii]
            diffs.append(star_2023 - star_2024)
            
    print('Results for differences')
    for i in list(set(diffs)):
        if i < 0:
            print(diffs.count(i), 'hospitals dropped in their star rating by', np.abs(i), 'stars')
        elif i > 0:
            print(diffs.count(i), 'hospitals improved their star rating by', np.abs(i), 'stars')

        
        


## Load data

In [3]:
path = stars_dir + '2023/2023-07 Stars Release/SAS_CSV_output/CMS_Stars_Jul_2023.csv'
df_2023 = pd.read_csv(path)

path = stars_dir + '2024/2024-07 Stars Release/SAS_CSV_output/CMS_Stars_Jul_2024.csv'
df_2024 = pd.read_csv(path)

compare_2023_2024(df_2023, df_2024)


Hospitals with overall star ratings in 2023: 3076
Hospitals with overall star ratings in 2024: 2847 

265 hospitals that had star ratings in 2023 but not 2024
36 hospitals that had star ratings in 2024 but not 2023
After filtering, the ordered list of providers in pred and actual are the same
df_2023.shape: (2811, 27)
df_2024.shape: (2811, 27) 

Changes for hospitals that were 1 star 2023:
225 1 hospitals
1 star: 139
2 star: 71
3 star: 12
4 star: 3
5 star: 0 

Changes for hospitals that were 2 star 2023:
613 2 hospitals
1 star: 117
2 star: 295
3 star: 175
4 star: 23
5 star: 3 

Changes for hospitals that were 3 star 2023:
788 3 hospitals
1 star: 19
2 star: 171
3 star: 372
4 star: 207
5 star: 19 

Changes for hospitals that were 4 star 2023:
740 4 hospitals
1 star: 2
2 star: 48
3 star: 226
4 star: 370
5 star: 94 

Changes for hospitals that were 5 star 2023:
445 5 hospitals
1 star: 0
2 star: 1
3 star: 37
4 star: 151
5 star: 256 

No. of hospitals where the change in the star rating was 