In [1]:
import pandas as pd
import warnings
from IPython.utils import io
import sys
import numpy as np
from functools import reduce

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

stars_dir = '~/GitHub/stars-data-builder/'
hos_dir = '~/Desktop/Rush/CMS_HospitalArchives/'

dates_df = pd.DataFrame(columns = ['Measure ID', 'Start Date', 'End Date'])

# 
----

# Compare 2023 publicly released results to 2024 predictions

In [2]:
def compare_2023_2024(df_2023, df_2024):
    
    df_2023 = df_2023[~df_2023['star'].isin([np.nan, float("NaN")])]
    df_2024 = df_2024[~df_2024['star'].isin([np.nan, float("NaN")])]
    
    ## Replace the imputed 666666 suffixes of VHA hospitals with their original 'F' suffix
    prvdrs_2024 = []
    for p in df_2024['PROVIDER_ID'].tolist():
        p = str(p)
        if '666666' in p:
            p = p[:-6]
            p = p + 'F'
        while len(p) < 6:
            p = '0' + p
            
        prvdrs_2024.append(p)
    
    df_2024['PROVIDER_ID'] = prvdrs_2024

    prvdrs_2023 = df_2023['PROVIDER_ID'].unique()
    print('Hospitals with overall star ratings in 2023:', df_2023.shape[0])

    prvdrs_2024 = df_2024['PROVIDER_ID'].unique()
    print('Hospitals with overall star ratings in the 2024 predictions:', df_2024.shape[0], '\n')

    ls = np.setdiff1d(list(prvdrs_2023), list(prvdrs_2024)).tolist()
    print(len(ls), "hospitals in 2023 that had star ratings but don't in the 2024 file")

    ls = np.setdiff1d(list(prvdrs_2024), list(prvdrs_2023)).tolist()
    print(len(ls), "hospitals in the 2024 file that have star ratings but didn't in 2023\n")

    ls = list(set(prvdrs_2023) & set(prvdrs_2024))
    df_2024 = df_2024[df_2024['PROVIDER_ID'].isin(ls)]
    df_2024.sort_values(by='PROVIDER_ID', ascending=False, inplace=True)
    df_2023 = df_2023[df_2023['PROVIDER_ID'].isin(ls)]
    df_2023.sort_values(by='PROVIDER_ID', ascending=False, inplace=True)

    prvdrs = df_2023['PROVIDER_ID'].unique()
    df_2024 = df_2024[df_2024['PROVIDER_ID'].isin(prvdrs)]

    prvdrs = df_2024['PROVIDER_ID'].unique()
    df_2023 = df_2023[df_2023['PROVIDER_ID'].isin(prvdrs)]

    df_2024.sort_values(by=['PROVIDER_ID'], inplace=True)
    df_2023.sort_values(by=['PROVIDER_ID'], inplace=True)

    if df_2024['PROVIDER_ID'].tolist() == df_2023['PROVIDER_ID'].tolist():
        print('After filtering, the ordered list of providers in pred and actual are the same')
    else:
        print('After filtering, the ordered list of providers in pred and actual are NOT the same')

    print('df_2023.shape:', df_2023.shape)
    print('df_2024.shape:', df_2024.shape, '\n')

    stars = [1,2,3,4,5]

    for star in stars:
        print('Predicted changes for hospitals that were', star, 'star 2023:')

        tdf_2023 = df_2023[df_2023['star'] == star]
        prvdrs = tdf_2023['PROVIDER_ID'].tolist()
        tdf_2024 = df_2024[df_2024['PROVIDER_ID'].isin(prvdrs)]

        print(tdf_2024.shape[0], star, 'hospitals')
        stars_ls = tdf_2024['star'].tolist()

        print('1 star:', stars_ls.count(1))
        print('2 star:', stars_ls.count(2))
        print('3 star:', stars_ls.count(3))
        print('4 star:', stars_ls.count(4))
        print('5 star:', stars_ls.count(5), '\n')


## Predictions based on April 2023 data

In [3]:
path = stars_dir + '2023/2023-07 Stars Release/SAS_CSV_output/CMS_Stars_Jul_2023.csv'
df_2023 = pd.read_csv(path)

path = stars_dir + 'Reproduce_Stars_Input/2024/SAS_output/CMS_Stars_2024_predictions_from_April_2023_data.csv'
df_2024 = pd.read_csv(path)

compare_2023_2024(df_2023, df_2024)

tdf = df_2024[df_2024['PROVIDER_ID'].isin([140119, 140029, 140063])]
tdf = tdf.filter(items=['summary_score', 'cnt_grp', 'star'])
tdf.head()

Hospitals with overall star ratings in 2023: 3076
Hospitals with overall star ratings in the 2024 predictions: 3068 

17 hospitals in 2023 that had star ratings but don't in the 2024 file
9 hospitals in the 2024 file that have star ratings but didn't in 2023

After filtering, the ordered list of providers in pred and actual are the same
df_2023.shape: (3059, 27)
df_2024.shape: (3059, 27) 

Predicted changes for hospitals that were 1 star 2023:
248 1 hospitals
1 star: 217
2 star: 30
3 star: 1
4 star: 0
5 star: 0 

Predicted changes for hospitals that were 2 star 2023:
665 2 hospitals
1 star: 43
2 star: 565
3 star: 56
4 star: 1
5 star: 0 

Predicted changes for hospitals that were 3 star 2023:
867 3 hospitals
1 star: 2
2 star: 106
3 star: 697
4 star: 62
5 star: 0 

Predicted changes for hospitals that were 4 star 2023:
799 4 hospitals
1 star: 1
2 star: 3
3 star: 154
4 star: 613
5 star: 28 

Predicted changes for hospitals that were 5 star 2023:
480 5 hospitals
1 star: 0
2 star: 0
3 star:

Unnamed: 0,summary_score,cnt_grp,star
1075,0.008017,3) # of groups=5,3.0
1089,0.483189,3) # of groups=5,5.0
1113,0.734652,3) # of groups=5,5.0


## Predictions based on July 2023 data

In [4]:
path = stars_dir + '2023/2023-07 Stars Release/SAS_CSV_output/CMS_Stars_Jul_2023.csv'
df_2023 = pd.read_csv(path)

path = stars_dir + 'Reproduce_Stars_Input/2024/SAS_output/CMS_Stars_2024_predictions_from_July_2023_data.csv'
df_2024 = pd.read_csv(path)

compare_2023_2024(df_2023, df_2024)

tdf = df_2024[df_2024['PROVIDER_ID'].isin([140119, 140029, 140063])]
tdf = tdf.filter(items=['summary_score', 'cnt_grp', 'star'])
tdf.head()

Hospitals with overall star ratings in 2023: 3076
Hospitals with overall star ratings in the 2024 predictions: 2874 

232 hospitals in 2023 that had star ratings but don't in the 2024 file
30 hospitals in the 2024 file that have star ratings but didn't in 2023

After filtering, the ordered list of providers in pred and actual are the same
df_2023.shape: (2844, 27)
df_2024.shape: (2844, 27) 

Predicted changes for hospitals that were 1 star 2023:
230 1 hospitals
1 star: 165
2 star: 58
3 star: 7
4 star: 0
5 star: 0 

Predicted changes for hospitals that were 2 star 2023:
620 2 hospitals
1 star: 100
2 star: 380
3 star: 129
4 star: 11
5 star: 0 

Predicted changes for hospitals that were 3 star 2023:
796 3 hospitals
1 star: 12
2 star: 179
3 star: 484
4 star: 119
5 star: 2 

Predicted changes for hospitals that were 4 star 2023:
748 4 hospitals
1 star: 3
2 star: 19
3 star: 270
4 star: 418
5 star: 38 

Predicted changes for hospitals that were 5 star 2023:
450 5 hospitals
1 star: 0
2 star: 0

Unnamed: 0,summary_score,cnt_grp,star
1073,0.269223,3) # of groups=5,4.0
1087,0.276828,3) # of groups=5,4.0
1111,0.800703,3) # of groups=5,5.0


## Predictions based on Oct 2023 data

In [5]:
path = stars_dir + '2023/2023-07 Stars Release/SAS_CSV_output/CMS_Stars_Jul_2023.csv'
df_2023 = pd.read_csv(path)

path = stars_dir + 'Reproduce_Stars_Input/2024/SAS_output/CMS_Stars_2024_predictions_from_Oct_2023_data.csv'
df_2024 = pd.read_csv(path)

compare_2023_2024(df_2023, df_2024)

tdf = df_2024[df_2024['PROVIDER_ID'].isin([140119, 140029, 140063])]
tdf = tdf.filter(items=['summary_score', 'cnt_grp', 'star'])
tdf.head()

Hospitals with overall star ratings in 2023: 3076
Hospitals with overall star ratings in the 2024 predictions: 2874 

232 hospitals in 2023 that had star ratings but don't in the 2024 file
30 hospitals in the 2024 file that have star ratings but didn't in 2023

After filtering, the ordered list of providers in pred and actual are the same
df_2023.shape: (2844, 27)
df_2024.shape: (2844, 27) 

Predicted changes for hospitals that were 1 star 2023:
230 1 hospitals
1 star: 165
2 star: 59
3 star: 6
4 star: 0
5 star: 0 

Predicted changes for hospitals that were 2 star 2023:
620 2 hospitals
1 star: 101
2 star: 379
3 star: 129
4 star: 11
5 star: 0 

Predicted changes for hospitals that were 3 star 2023:
796 3 hospitals
1 star: 12
2 star: 179
3 star: 486
4 star: 117
5 star: 2 

Predicted changes for hospitals that were 4 star 2023:
748 4 hospitals
1 star: 2
2 star: 20
3 star: 271
4 star: 417
5 star: 38 

Predicted changes for hospitals that were 5 star 2023:
450 5 hospitals
1 star: 0
2 star: 0

Unnamed: 0,summary_score,cnt_grp,star
1073,0.267916,3) # of groups=5,4.0
1087,0.27638,3) # of groups=5,4.0
1111,0.802513,3) # of groups=5,5.0


## Predictions based on Nov 2023 data

In [6]:
path = stars_dir + '2023/2023-07 Stars Release/SAS_CSV_output/CMS_Stars_Jul_2023.csv'
df_2023 = pd.read_csv(path)

path = stars_dir + 'Reproduce_Stars_Input/2024/SAS_output/CMS_Stars_2024_predictions_from_Nov_2023_data.csv'
df_2024 = pd.read_csv(path)

compare_2023_2024(df_2023, df_2024)

tdf = df_2024[df_2024['PROVIDER_ID'].isin([140119, 140029, 140063])]
tdf = tdf.filter(items=['summary_score', 'cnt_grp', 'star'])
tdf.head()

Hospitals with overall star ratings in 2023: 3076
Hospitals with overall star ratings in the 2024 predictions: 2862 

249 hospitals in 2023 that had star ratings but don't in the 2024 file
35 hospitals in the 2024 file that have star ratings but didn't in 2023

After filtering, the ordered list of providers in pred and actual are the same
df_2023.shape: (2827, 27)
df_2024.shape: (2827, 27) 

Predicted changes for hospitals that were 1 star 2023:
228 1 hospitals
1 star: 154
2 star: 65
3 star: 8
4 star: 1
5 star: 0 

Predicted changes for hospitals that were 2 star 2023:
619 2 hospitals
1 star: 105
2 star: 353
3 star: 145
4 star: 16
5 star: 0 

Predicted changes for hospitals that were 3 star 2023:
790 3 hospitals
1 star: 17
2 star: 189
3 star: 447
4 star: 131
5 star: 6 

Predicted changes for hospitals that were 4 star 2023:
744 4 hospitals
1 star: 3
2 star: 35
3 star: 284
4 star: 375
5 star: 47 

Predicted changes for hospitals that were 5 star 2023:
446 5 hospitals
1 star: 0
2 star: 4

Unnamed: 0,summary_score,cnt_grp,star
1071,0.258349,3) # of groups=5,4.0
1085,0.243672,3) # of groups=5,4.0
1109,0.781296,3) # of groups=5,5.0


## Predictions based on Jan 2024 data

In [7]:
path = stars_dir + '2023/2023-07 Stars Release/SAS_CSV_output/CMS_Stars_Jul_2023.csv'
df_2023 = pd.read_csv(path)

path = stars_dir + 'Reproduce_Stars_Input/2024/SAS_output/CMS_Stars_2024_predictions_from_Jan_2024_data.csv'
df_2024 = pd.read_csv(path)

compare_2023_2024(df_2023, df_2024)

tdf = df_2024[df_2024['PROVIDER_ID'].isin([140119, 140029, 140063])]
tdf = tdf.filter(items=['summary_score', 'cnt_grp', 'star'])
tdf.head()

Hospitals with overall star ratings in 2023: 3076
Hospitals with overall star ratings in the 2024 predictions: 2852 

265 hospitals in 2023 that had star ratings but don't in the 2024 file
41 hospitals in the 2024 file that have star ratings but didn't in 2023

After filtering, the ordered list of providers in pred and actual are the same
df_2023.shape: (2811, 27)
df_2024.shape: (2811, 27) 

Predicted changes for hospitals that were 1 star 2023:
225 1 hospitals
1 star: 139
2 star: 71
3 star: 12
4 star: 3
5 star: 0 

Predicted changes for hospitals that were 2 star 2023:
613 2 hospitals
1 star: 116
2 star: 292
3 star: 179
4 star: 23
5 star: 3 

Predicted changes for hospitals that were 3 star 2023:
788 3 hospitals
1 star: 19
2 star: 168
3 star: 375
4 star: 208
5 star: 18 

Predicted changes for hospitals that were 4 star 2023:
740 4 hospitals
1 star: 2
2 star: 48
3 star: 227
4 star: 369
5 star: 94 

Predicted changes for hospitals that were 5 star 2023:
445 5 hospitals
1 star: 0
2 star:

Unnamed: 0,summary_score,cnt_grp,star
1069,0.175615,3) # of groups=5,4.0
1082,0.154311,3) # of groups=5,4.0
1106,0.773013,3) # of groups=5,5.0
