In [1]:
# Reading the data and merging it
# Input: 
#   a csv file where each row corresponds to a pair of hospital, performance measure
# Output:
#   (1) df: a pandas dataframe where each row corresponds to a hospital and
#   there is a column for each performance measure with the entry corresponding to the value for the measure
#   i,j entry is the performance rating of hospital i on measure j
#   (2) inverse: maps each Measure Identifier to whether it is an inverse measure or not
#   (3) states: maps each Group PAC ID (hospital identifier) to its state
#   (4) featurenames: maps each Measure Identifier to its Measure Title

import pandas as pd
import numpy as np

from matplotlib import pyplot as plt

from scipy import stats as ss

import collections
from collections import defaultdict


def recursively_default_dict():
    return collections.defaultdict(recursively_default_dict)


df = pd.read_csv('Physician_Compare_2015_Group_Public_Reporting_-_Patient_Experience.csv')
measures = set(list(df['Measure Title']))
exp_mids = set(list(df['Measure Identifier']))
print('Experience measures \n{}'.format(measures))

df = pd.read_csv('Physician_Compare_2015_Group_Public_Reporting___Performance_Scores.csv')
measures = set(list(df['Measure Title']))
perf_mids = set(list(df['Measure Identifier']))
print('Performance measures \n{}'.format(measures))


def read_in_rows(records, fname, valcol='Measure Performance Rate', stratum=1):
    perf = pd.read_csv(fname)
    for name, row in perf.iterrows():
        pacid = row['Group PAC ID']
        mid = row['Measure Identifier']
        if stratum > 0:
            mid_root = mid.rstrip().split('_')
            mid_root = mid_root[:-1]
            mid_root.append('1')
            mid = '_'.join(mid_root).rstrip('_')
        val = row[valcol]
        if not mid in records[pacid]:
            records[pacid][mid] = list()
        records[pacid][mid].append(val)
    for pacid in records:
        for mid in records[pacid]:
            records[pacid][mid] = np.mean(records[pacid][mid])
    return records

grp_perf = recursively_default_dict()
grp_perf = read_in_rows(grp_perf, 'Physician_Compare_2015_Group_Public_Reporting___Performance_Scores.csv', stratum=1)
grp_perf = read_in_rows(grp_perf, 'Physician_Compare_2015_Group_Public_Reporting_-_Patient_Experience.csv', stratum=0)

df = pd.DataFrame.from_dict(grp_perf, orient='index')
print(list(df))
print(df)
df.to_csv('group_perf.csv')

data = pd.read_csv('Physician_Compare_2015_Group_Public_Reporting___Performance_Scores.csv')
inverse = dict()
states = dict()
featurenames = dict()
for name, row in data.iterrows():
    mid = row['Measure Identifier']
    mid_root = mid.rstrip().split('_')
    mid_root = mid_root[:-1]
    mid_root.append('1')
    mid = '_'.join(mid_root).rstrip('_')
    val = row['Inverse Measure']
    if val == 'Y':
        val = 1
    if val == 'N':
        val = 0
    inverse[mid] = val
    pacid = row['Group PAC ID']
    states[pacid] = row['State']
    featurenames[mid] = row['Measure Title']
data = pd.read_csv('Physician_Compare_2015_Group_Public_Reporting_-_Patient_Experience.csv')
for name, row in data.iterrows():
    featurenames[mid] = row['Measure Title']
    mid = row['Measure Identifier']
    featurenames[mid] = row['Measure Title']
print(inverse)
print(states)
print(featurenames)


Experience measures 
set(['Health promotion and education.', 'Getting timely care, appointments, and information.', 'How well clinicians communicate.', 'Clinicians working together for your care.', 'Attention to patient medication cost.', 'Courteous and helpful office staff.', 'Between visit communication.', "Patients' rating of clinicians."])
Performance measures 
set(['Coronary Artery Disease (CAD): Beta-Blocker Therapy \xc2\x96 Prior Myocardial Infarction (MI) or Left Ventricular Systolic Dysfunction (LVEF < 40%)', 'Falls: Plan of Care', 'Use of High-Risk Medications in the Elderly*', 'Adult Kidney Disease: Laboratory Testing (Lipid Profile)', 'Stroke and Stroke Rehabilitation: Discharged on Antithrombotic Therapy', 'Oncology: Cancer Stage Documented', 'Tuberculosis Prevention for Psoriasis, Psoriatic Arthritis and Rheumatoid Arthritis Patients on a Biological Immune Response Modifier', 'Hepatitis C: One-Time Screening for Hepatitis C Virus (HCV) for Patients at Risk', 'Rheumatoid A

['PQRS_GRP_173_1', 'PQRS_GRP_47_1', 'PQRS_GRP_137_1', 'PQRS_GRP_224_1', 'CAHPS_GRP_10', 'PQRS_GRP_7_1', 'CAHPS_GRP_12', 'PQRS_GRP_121_1', 'PQRS_GRP_6_1', 'CAHPS_GRP_5', 'PQRS_GRP_119_1', 'CAHPS_GRP_3', 'CAHPS_GRP_2', 'CAHPS_GRP_1', 'CAHPS_GRP_9', 'CAHPS_GRP_8', 'PQRS_GRP_334_1', 'PQRS_GRP_265_1', 'PQRS_GRP_128_1', 'PQRS_GRP_130_1', 'PQRS_GRP_8_1', 'PQRS_GRP_317_1', 'PQRS_GRP_318_1', 'PQRS_GRP_110_1', 'PQRS_GRP_117_1', 'PQRS_GRP_204_1', 'PQRS_GRP_226_1', 'PQRS_GRP_113_1', 'PQRS_GRP_118_1', 'PQRS_GRP_112_1', 'PQRS_GRP_134_1', 'PQRS_GRP_111_1', 'PQRS_GRP_54_1', 'PQRS_GRP_39_1', 'PQRS_GRP_163_1', 'PQRS_GRP_41_1', 'PQRS_GRP_146_1', 'PQRS_GRP_195_1', 'PQRS_GRP_147_1', 'PQRS_GRP_225_1', 'PQRS_GRP_145_1', 'PQRS_GRP_76_1', 'PQRS_GRP_21_1', 'PQRS_GRP_131_1', 'PQRS_GRP_182_1', 'PQRS_GRP_193_1', 'PQRS_GRP_194_1', 'PQRS_GRP_144_1', 'PQRS_GRP_155_1', 'PQRS_GRP_154_1', 'PQRS_GRP_238_1', 'PQRS_GRP_51_1', 'PQRS_GRP_122_1', 'PQRS_GRP_22_1', 'PQRS_GRP_23_1', 'PQRS_GRP_116_1', 'PQRS_GRP_326_1', 'PQRS_GRP_

{'PQRS_GRP_131_1': 0, 'PQRS_GRP_128_1': 0, 'PQRS_GRP_51_1': 0, 'PQRS_GRP_217_1': 0, 'PQRS_GRP_126_1': 0, 'PQRS_GRP_21_1': 0, 'PQRS_GRP_65_1': 0, 'PQRS_GRP_118_1': 0, 'PQRS_GRP_110_1': 0, 'PQRS_GRP_5_1': 0, 'PQRS_GRP_7_1': 0, 'PQRS_GRP_22_1': 0, 'PQRS_GRP_324_1': 1, 'PQRS_GRP_116_1': 0, 'PQRS_GRP_205_1': 0, 'PQRS_GRP_145_1': 0, 'PQRS_GRP_24_1': 0, 'PQRS_GRP_185_1': 0, 'PQRS_GRP_337_1': 0, 'PQRS_GRP_326_1': 0, 'PQRS_GRP_113_1': 0, 'PQRS_GRP_109_1': 0, 'PQRS_GRP_147_1': 0, 'PQRS_GRP_93_1': 0, 'PQRS_GRP_220_1': 0, 'PQRS_GRP_320_1': 0, 'PQRS_GRP_134_1': 0, 'PQRS_GRP_111_1': 0, 'PQRS_GRP_141_1': 0, 'PQRS_GRP_70_1': 0, 'PQRS_GRP_191_1': 0, 'PQRS_GRP_386_1': 0, 'PQRS_GRP_137_1': 0, 'PQRS_GRP_122_1': 0, 'PQRS_GRP_50_1': 0, 'PQRS_GRP_130_1': 0, 'PQRS_GRP_238_1': 1, 'PQRS_GRP_358_1': 0, 'PQRS_GRP_155_1': 0, 'PQRS_GRP_334_1': 1, 'PQRS_GRP_47_1': 0, 'PQRS_GRP_127_1': 0, 'PQRS_GRP_119_1': 0, 'PQRS_GRP_181_1': 0, 'PQRS_GRP_332_1': 0, 'PQRS_GRP_39_1': 0, 'PQRS_GRP_44_1': 0, 'PQRS_GRP_143_1': 0, 'PQRS_

In [2]:
-



FOR  CAHPS_GRP_1
------------------------------------------------------
PQRS_GRP_111_1 : 0.0596333308189 
 
PQRS_GRP_118_1 : 0.781021622807 
 
PQRS_GRP_131_1 : 0.0560754744862 
 
PQRS_GRP_47_1 : 0.0510174652879 
 
PQRS_GRP_8_1 : 0.0408169970106 
 
PQRS_GRP_7_1 : 0.0114351095892 
 
---------------------------------------------------------
(DecisionTreeRegressor(criterion='mse', max_depth=3, max_features=None,
           max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, presort=False, random_state=0,
           splitter='best'), ['PQRS_GRP_100_1', 'PQRS_GRP_102_1', 'PQRS_GRP_109_1', 'PQRS_GRP_110_1', 'PQRS_GRP_112_1', 'PQRS_GRP_113_1', 'PQRS_GRP_116_1', 'PQRS_GRP_117_1', 'PQRS_GRP_119_1', 'PQRS_GRP_121_1', 'PQRS_GRP_126_1', 'PQRS_GRP_127_1', 'PQRS_GRP_128_1', 'PQRS_GRP_12_1', 'PQRS_GRP_130_1', 'PQRS_GRP_134_1', 'PQRS_GRP_137_1', 'PQRS_GRP_138_1', 'PQRS_GRP_140_1', 'PQRS_GRP_141_1', 'PQRS_GRP_143_1',

FOR  CAHPS_GRP_3
------------------------------------------------------
PQRS_GRP_111_1 : 0.033828829923 
 
PQRS_GRP_131_1 : 0.0549088216593 
 
PQRS_GRP_134_1 : 0.0357758129402 
 
PQRS_GRP_47_1 : 0.0490210855209 
 
PQRS_GRP_8_1 : 0.812469275806 
 
PQRS_GRP_7_1 : 0.013996174151 
 
---------------------------------------------------------
(DecisionTreeRegressor(criterion='mse', max_depth=3, max_features=None,
           max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, presort=False, random_state=0,
           splitter='best'), ['PQRS_GRP_100_1', 'PQRS_GRP_102_1', 'PQRS_GRP_109_1', 'PQRS_GRP_110_1', 'PQRS_GRP_112_1', 'PQRS_GRP_113_1', 'PQRS_GRP_116_1', 'PQRS_GRP_117_1', 'PQRS_GRP_118_1', 'PQRS_GRP_119_1', 'PQRS_GRP_121_1', 'PQRS_GRP_126_1', 'PQRS_GRP_127_1', 'PQRS_GRP_128_1', 'PQRS_GRP_12_1', 'PQRS_GRP_130_1', 'PQRS_GRP_137_1', 'PQRS_GRP_138_1', 'PQRS_GRP_140_1', 'PQRS_GRP_141_1', 'PQRS_GRP_143_1', '

In [3]:
# Function to print all the patient utilization for operational parameters
book1_df = pd.read_csv('Book1.csv')
book1_df.head()

Unnamed: 0,Measure.Identifier,Dummy
0,PQRS_GRP_100_1,0
1,PQRS_GRP_102_1,0
2,PQRS_GRP_109_1,0
3,PQRS_GRP_110_1,1
4,PQRS_GRP_111_1,0


In [4]:
book1_df_operational = book1_df[book1_df['Dummy'] == 1]
