In [90]:
import polars as pl

# Load the data
facility_df = pl.read_parquet('../../003_data/001_raw-data/2017-2024_national_cms_dialysis-facility_data.parquet')
cahps_df = pl.read_parquet('../../003_data/001_raw-data/2017-2024_national_cms_dialysis-facility_cahps-data.parquet')

# Display the first few rows of the cahps data
cahps_df.head()

# Filter the data for California
cahps_df = cahps_df.filter(pl.col('state').is_in(['CA']))


In [61]:
# Storing the initial number of rows to compare after cleaning
initial_row_count = cahps_df.shape[0]

# Check missing values before renaming columns
print("Missing values before renaming:")
print(f"'county': {cahps_df['county'].null_count()}")
print(f"'countyparish': {cahps_df['countyparish'].null_count()}")

# Merge duplicate columns
cahps_df = cahps_df.with_columns(pl.coalesce('city', 'citytown').alias('city'))
cahps_df = cahps_df.with_columns(pl.coalesce('county', 'countyparish').alias('county'))
cahps_df = cahps_df.with_columns(pl.coalesce('zip_code', 'zip').alias('zip_code'))
cahps_df = cahps_df.with_columns(pl.coalesce('phone_number', 'telephone_number').alias('phone_number'))
cahps_df = cahps_df.with_columns(pl.coalesce('facility', 'facility_name').alias('facility_name'))
cahps_df = cahps_df.with_columns(pl.coalesce('facility_name', 'facility_name_').alias('facility_name'))
cahps_df = cahps_df.with_columns(pl.coalesce('state', 'state_').alias('state'))
cahps_df = cahps_df.with_columns(pl.coalesce('chain_organization', 'chain_organization_').alias('chain_organization'))
cahps_df = cahps_df.with_columns(pl.coalesce('ich_cahps_survey_of_patients_experiences_star_rating', 'ich_cahps_survey_of_patients_experiences_star_rating_').alias('ich_cahps_survey_of_patients_experiences_star_rating'))
cahps_df = cahps_df.with_columns(pl.coalesce('overall_ich_cahps_survey_of_patients_experiences_star_rating', 'ich_cahps_survey_of_patients_experiences_star_rating').alias('ich_cahps_survey_of_patients_experiences_star_rating'))

# Changing the name of columns with the string patientsrating to patients_rating
for col in cahps_df.columns:
    if 'patientsrating' in col:
        new_col = col.replace('patientsrating', 'patients_rating')
        if new_col in cahps_df.columns:
            # Merge columns if the new name already exists
            cahps_df = cahps_df.with_columns(pl.coalesce(new_col, col).alias(new_col))
            cahps_df = cahps_df.drop(col)
        else:
            cahps_df = cahps_df.rename({col: new_col})

# Drop the original, changed column names
cahps_df = cahps_df.drop('citytown', 'zip', 'telephone_number', 'countyparish', 'facility_name_', 'facility', 'state_', 'chain_organization_', 'ich_cahps_survey_of_patients_experiences_star_rating_', 'citytown')

# Check missing values after processing
print("\nMissing values after processing:")
print(f"'county': {cahps_df['county'].null_count()}")

# Check if the number of rows has changed
final_row_count = cahps_df.shape[0]
if initial_row_count == final_row_count:
    print(f"\nNo rows were dropped. Row count remains {final_row_count}.")
else:
    print(f"\nWarning: Row count changed from {initial_row_count} to {final_row_count}.")

# Some data validation using null values in county column
if cahps_df['county'].null_count() > 0:
    print("\nInvestigating remaining missing values in 'county':")
    missing_county = cahps_df.filter(pl.col('county').is_null())
    print(missing_county.select(['facility_name', 'city', 'state', 'county']))

Missing values before renaming:
'county': 1478
'countyparish': 3567

Missing values after processing:
'county': 0

No rows were dropped. Row count remains 5045.


In [62]:
cahps_df.describe()

statistic,provider_number,network,facility_name,address_line_1,address_line_2,state,zip_code,profit_or_nonprofit,chain_owned,chain_organization,ichcahps_date,ichcahps_data_availability_code,lower_box_percent_of_patientsnephrologists_communication_and_caring,middle_box_percent_of_patientsnephrologists_communication_and_caring,top_box_percent_of_patientsnephrologists_communication_and_caring,linearized_score_of_nephrologists_communication_and_caring,star_rating_of_nephrologists_communication_and_caring,lower_box_percent_of_patientsquality_of_dialysis_center_care_and_operations,middle_box_percent_of_patientsquality_of_dialysis_center_care_and_operations,top_box_percent_of_patientsquality_of_dialysis_center_care_and_operations,linearized_score_of_quality_of_dialysis_center_care_and_operations,star_rating_of_quality_of_dialysis_center_care_and_operations,lower_box_percent_of_patientsproviding_information_to_patients,top_box_percent_of_patients_providing_information_to_patients,linearized_score_of_providing_information_to_patients,star_rating_of_providing_information_to_patients,lower_box_percent_of_patients_rating_of_the_nephrologist,middle_box_percent_of_patients_rating_of_the_nephrologist,top_box_percent_of_patients_rating_of_the_nephrologist,linearized_score_of_rating_of_the_nephrologist,star_rating_of_the_nephrologist,lower_box_percent_of_patients_rating_of_the_dialysis_center_staff,middle_box_percent_of_patients_rating_of_the_dialysis_center_staff,top_box_percent_of_patients_rating_of_the_dialysis_center_staff,linearized_score_of_rating_of_the_dialysis_center_staff,star_rating_of_the_dialysis_center_staff,lower_box_percent_of_patients_rating_of_the_dialysis_facility,middle_box_percent_of_patients_rating_of_the_dialysis_facility,top_box_percent_of_patients_rating_of_the_dialysis_facility,linearized_score_of_rating_of_the_dialysis_facility,star_rating_of_the_dialysis_facility,total_number_of_completed_interviews_from_the_fall_and_spring_surveys,ich_cahps_survey_of_patients_experiences_star_rating,survey_response_rate,year,month,city,county,phone_number,lower_box_percent_of_patientsproviding_information_to_patients_,lower_box_percent_of_patients_rating_of_the_nephrologist_,lower_box_percent_of_patients_rating_of_the_dialysis_center_staff_,top_box_percent_of_patients_rating_of_the_dialysis_center_staff_,middle_box_percent_of_patients_rating_of_the_dialysis_facility_,ichcahps_survey_response_rate,lower_box_percent_of_patients_nephrologists_communication_and_caring,middle_box_percent_of_patients_nephrologists_communication_and_caring,top_box_percent_of_patients_nephrologists_communication_and_caring,overall_ich_cahps_survey_of_patients_experiences_star_rating,ich_cahps_quality_of_patient_care_star_rating
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""count""","""5045""","""5045""","""5045""","""5045""","""2216""","""5045""","""5045""","""5045""","""5045""","""5045""","""5045""","""5045""","""2071""","""2071""","""2071""","""3426""","""3426""","""3426""","""3426""","""3426""","""3426""","""3426""","""2190""","""3426""","""3426""","""3426""","""2190""","""3426""","""3426""","""3426""","""3426""","""2190""","""3426""","""2190""","""3426""","""3426""","""3426""","""2190""","""3426""","""3426""","""3426""","""3426""","""2763""","""835""","""5045""","""5045""","""5045""","""5045""","""5045""","""1236""","""1236""","""1236""","""1236""","""1236""","""1928""","""1355""","""1355""","""1355""","""692""","""663"""
"""null_count""","""0""","""0""","""0""","""0""","""2829""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""2974""","""2974""","""2974""","""1619""","""1619""","""1619""","""1619""","""1619""","""1619""","""1619""","""2855""","""1619""","""1619""","""1619""","""2855""","""1619""","""1619""","""1619""","""1619""","""2855""","""1619""","""2855""","""1619""","""1619""","""1619""","""2855""","""1619""","""1619""","""1619""","""1619""","""2282""","""4210""","""0""","""0""","""0""","""0""","""0""","""3809""","""3809""","""3809""","""3809""","""3809""","""3117""","""3690""","""3690""","""3690""","""4353""","""4382"""
"""mean""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""std""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""min""","""50038""","""17""","""ABORN DIALYSIS CENTER""","""100 SOUTH SAN MATEO DRIVE""","""""","""CA""","""90003""","""Non-Profit""","""No""","""""","""04/19/2019-01/10/2020""","""1""","""1""","""0""","""40""","""60""","""1""","""10""","""10""","""39""","""67""","""1""","""10""","""53""","""53""","""1""","""0""","""10""","""26""","""64""","""1""","""0""","""10""","""35""","""70""","""1""","""0""","""0""","""100""","""72""","""1""","""100""","""1""","""12""","""2018""","""10""","""ALHAMBRA""","""""","""(209) 205-1126""","""10""","""0""","""0""","""40""","""0""","""10""","""10""","""10""","""47""","""2""","""1"""
"""25%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""50%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""75%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""max""","""752592""","""18""","""YUBA SUTTER DIALYSIS""","""UCSD Dialysis Center - 200 W. …","""Suite B""","""CA""","""96080""","""Profit""","""Yes""","""US Renal Care, Inc.""","""31MAY2022-13JAN2023""","""270""","""9""","""9""","""90""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""45""","""2024""","""7""","""Yucca Valley""","""Yuba""","""(951) 977-9466""","""9""","""9""","""9""","""95""","""9""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available"""


In [63]:
# Select columns of interest
cahps_df = cahps_df.select([
    'year',
    'provider_number',
    'network',
    'facility_name',
    'address_line_1',
    'address_line_2',
    'city',
    'state',
    'zip_code',
    'county',
    'profit_or_nonprofit',
    'phone_number',
    'chain_owned',
    'chain_organization',
    'ichcahps_date',
    'ichcahps_data_availability_code',
    'linearized_score_of_nephrologists_communication_and_caring', 
    'star_rating_of_nephrologists_communication_and_caring',
    'linearized_score_of_quality_of_dialysis_center_care_and_operations',
    'star_rating_of_quality_of_dialysis_center_care_and_operations',
    'linearized_score_of_providing_information_to_patients',
    'star_rating_of_providing_information_to_patients',
    'linearized_score_of_rating_of_the_nephrologist',
    'star_rating_of_the_nephrologist',
    'linearized_score_of_rating_of_the_dialysis_center_staff', 
    'star_rating_of_the_dialysis_center_staff',
    'linearized_score_of_rating_of_the_dialysis_facility',
    'star_rating_of_the_dialysis_facility',
    'total_number_of_completed_interviews_from_the_fall_and_spring_surveys',
    'ich_cahps_survey_of_patients_experiences_star_rating',
    'survey_response_rate',
    'ichcahps_survey_response_rate',
    'ich_cahps_quality_of_patient_care_star_rating'
])



In [64]:
cahps_df.describe()


statistic,year,provider_number,network,facility_name,address_line_1,address_line_2,city,state,zip_code,county,profit_or_nonprofit,phone_number,chain_owned,chain_organization,ichcahps_date,ichcahps_data_availability_code,linearized_score_of_nephrologists_communication_and_caring,star_rating_of_nephrologists_communication_and_caring,linearized_score_of_quality_of_dialysis_center_care_and_operations,star_rating_of_quality_of_dialysis_center_care_and_operations,linearized_score_of_providing_information_to_patients,star_rating_of_providing_information_to_patients,linearized_score_of_rating_of_the_nephrologist,star_rating_of_the_nephrologist,linearized_score_of_rating_of_the_dialysis_center_staff,star_rating_of_the_dialysis_center_staff,linearized_score_of_rating_of_the_dialysis_facility,star_rating_of_the_dialysis_facility,total_number_of_completed_interviews_from_the_fall_and_spring_surveys,ich_cahps_survey_of_patients_experiences_star_rating,survey_response_rate,ichcahps_survey_response_rate,ich_cahps_quality_of_patient_care_star_rating
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""count""","""5045""","""5045""","""5045""","""5045""","""5045""","""2216""","""5045""","""5045""","""5045""","""5045""","""5045""","""5045""","""5045""","""5045""","""5045""","""5045""","""3426""","""3426""","""3426""","""3426""","""3426""","""3426""","""3426""","""3426""","""3426""","""3426""","""3426""","""3426""","""3426""","""2763""","""835""","""1928""","""663"""
"""null_count""","""0""","""0""","""0""","""0""","""0""","""2829""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""1619""","""1619""","""1619""","""1619""","""1619""","""1619""","""1619""","""1619""","""1619""","""1619""","""1619""","""1619""","""1619""","""2282""","""4210""","""3117""","""4382"""
"""mean""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""std""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""min""","""2018""","""50038""","""17""","""ABORN DIALYSIS CENTER""","""100 SOUTH SAN MATEO DRIVE""","""""","""ALHAMBRA""","""CA""","""90003""","""""","""Non-Profit""","""(209) 205-1126""","""No""","""""","""04/19/2019-01/10/2020""","""1""","""60""","""1""","""67""","""1""","""53""","""1""","""64""","""1""","""70""","""1""","""72""","""1""","""100""","""1""","""12""","""10""","""1"""
"""25%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""50%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""75%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""max""","""2024""","""752592""","""18""","""YUBA SUTTER DIALYSIS""","""UCSD Dialysis Center - 200 W. …","""Suite B""","""Yucca Valley""","""CA""","""96080""","""Yuba""","""Profit""","""(951) 977-9466""","""Yes""","""US Renal Care, Inc.""","""31MAY2022-13JAN2023""","""270""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""45""","""Not Available""","""Not Available"""


In [65]:
facility_df.describe()

statistic,provider_number,network,facility_name,five_star_date,five_star,five_star_data_availability_code,address_line_1,address_line_2,citytown,state,zip_code,countyparish,telephone_number,profit_or_nonprofit,chain_owned,chain_organization,late_shift,_of_dialysis_stations,offers_incenter_hemodialysis,offers_peritoneal_dialysis,offers_home_hemodialysis_training,certification_date,claims_date,eqrs_date,smr_date,patient_survival_category_text,patient_survival_data_availability_code,number_of_patients_included_in_survival_summary,mortality_rate_facility,mortality_rate_upper_confidence_limit_975,mortality_rate_lower_confidence_limit_25,shr_date,patient_hospitalization_category_text,patient_hospitalization_data_availability_code,number_of_patients_included_in_hospitalization_summary,hospitalization_rate_facility,…,number_of_patients_in_long_term_catheter_summary,number_of_patient_months_in_long_term_catheter_summary,percentage_of_adult_patients_with_long_term_catheter_in_use,npcr_data_availability_code,number_of_patients_in_npcr_summary,number_of_patientmonths_in_npcr_summary,percentage_of_pediatric_hd_patients_with_npcr,year,month,city,zip,county,phone_number,certification_or_recertification_date,crownweb_date,number_of_patientmonths_in_serum_phosphorus_summary_,serum_phosphorus_data_availability_code_,dateswr,offers_incenter_peritoneal_dialysis,rate_of_hospital_readmission_category_text,percentage_of_medicare_patients_with_hgb_10_gdl,number_of_patients_included_in_transfusion_summary,standard_infection_ratio_,crownweb__date_,percentage_of_adult_hd_patients_with_ktv_12,percentage_of_adult_pd_patients_with_ktv17,percentage_of_pediatric_hd_patents_with_ktv12,percentage_of_pediatric_pd_patents_with_ktv18,number_of_adult_patients_included_in_arterial_venous_fistula_and_catheter_summaries,number_of_adult_patientmonths_included_in_arterial_venous_fistula_and_catheter_summaries,arteriovenous_fistulae_in_use_data_availability_code,percentage_of_patients_with_arteriovenous_fistulae_in_use,vascular_catheter_data_availability_code,percentage_of_patients_with_vascular_catheter_in_use_for_90_days_or_longer,hospitalization_rate_facility_,patient_hospital_readmission_category_text,readmission_rate_facility_
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,…,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""count""","""60476""","""60476""","""60476""","""60476""","""56790""","""60476""","""60476""","""34634""","""15215""","""60476""","""15215""","""15214""","""15215""","""60476""","""60476""","""60475""","""60474""","""60476""","""60476""","""46190""","""60474""","""15215""","""60476""","""15215""","""60476""","""60476""","""60476""","""60093""","""57271""","""57271""","""57271""","""60476""","""60476""","""60476""","""60093""","""50873""",…,"""53193""","""51911""","""49982""","""53471""","""53193""","""15874""","""14983""","""60476""","""60476""","""45261""","""45261""","""45261""","""45261""","""45261""","""38256""","""22856""","""23409""","""7566""","""14286""","""7281""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005"""
"""null_count""","""0""","""0""","""0""","""0""","""3686""","""0""","""0""","""25842""","""45261""","""0""","""45261""","""45262""","""45261""","""0""","""0""","""1""","""2""","""0""","""0""","""14286""","""2""","""45261""","""0""","""45261""","""0""","""0""","""0""","""383""","""3205""","""3205""","""3205""","""0""","""0""","""0""","""383""","""9603""",…,"""7283""","""8565""","""10494""","""7005""","""7283""","""44602""","""45493""","""0""","""0""","""15215""","""15215""","""15215""","""15215""","""15215""","""22220""","""37620""","""37067""","""52910""","""46190""","""53195""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471"""
"""mean""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""std""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""min""","""12500""","""1""","""- LIBERTY ALBUQUERQUE DIALYSIS…","""01/01/2014-12/31/2017""","""""","""1""","""# 7 PROFESSIONAL DRIVE""","""""","""ABBEVILLE""","""AK""","""10003""","""Acadia""","""(201) 262-0429""","""""","""N""","""""","""0""","""0""","""0""","""0""","""0""","""01APR1971""","""01/01/2017-12/31/2017""","""01JAN2022-31DEC2022""","""01/01/2014-12/31/2017""",""" ""","""1""","""""","""""","""""","""""","""01/01/2017-12/31/2017""",""" ""","""1""","""""","""""",…,"""""","""""","""""","""1""","""""","""""","""""","""2017""","""10""","""ABBEVILLE""","""10003""","""""","""(201) 262-0429""","""01/01/1968""","""01/01/2017-12/31/2017""","""0""","""1""","""01/01/2015 - 12/31/2017""","""0""","""As Expected""","""""","""""","""""","""01JAN2016-31DEC2016""","""""","""""","""""","""""","""""","""""","""1""","""""","""1""","""""","""""",""" """,""""""
"""25%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""50%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""75%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""max""","""92531""","""9""","""zz_closed_Texas Care Dialysis""","""01Jan2019-31Dec2022""","""Not Available""","""270""","""road 174 km 6.9""","""suite B""","""pennsauken""","""WY""","""99801""","""Zavala""","""(989) 921-2170""","""Profit""","""Yes""","""Wake Forest University""","""Yes""","""9""","""Yes""","""Yes""","""Yes""","""31OCT2022""","""01OCT2022-30SEP2023""","""01OCT2022-30SEP2023""","""01Jan2019-31Dec2022""","""Worse than Expected""","""270""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""01Jan2022-31Dec2022""","""Worse than Expected""","""270""","""Not Available""","""Not Available""",…,"""Not Available""","""Not Available""","""Not Available""","""270""","""Not Available""","""Not Available""","""Not Available""","""2024""","""7""","""pennsauken""","""99801""","""Zavala""","""9899212170""","""9/9/2016""","""01JAN2020-31DEC2020""","""999""","""258""","""01/01/2015 - 12/31/2017""","""Y""","""Worse than Expected""","""93""","""99""","""7""","""01JAN2016-31DEC2016""","""99""","""99""","""99""","""96""","""99""","""997""","""258""","""99""","""258""","""9""","""99""","""Worse than Expected""","""9"""


In [40]:
print("Missing values before renaming:")
print(f"'county': {facility_df['county'].null_count()}")

facility_df = merge_columns(facility_df)

# Check missing values after processing
print("\nMissing values after processing:")

# Check if the number of rows has changed
final_row_count = facility_df.shape[0]
if initial_row_count == final_row_count:
    print(f"\nNo rows were dropped. Row count remains {final_row_count}.")
else:
    print(f"\nWarning: Row count changed from {initial_row_count} to {final_row_count}.")


Missing values before renaming:
'county': 1

Missing values after processing:



In [41]:
facility_df.describe()

statistic,provider_number,network,facility_name,five_star_date,five_star,five_star_data_availability_code,address_line_1,address_line_2,citytown,state,zip_code,countyparish,telephone_number,profit_or_nonprofit,chain_owned,chain_organization,late_shift,_of_dialysis_stations,offers_incenter_hemodialysis,offers_peritoneal_dialysis,offers_home_hemodialysis_training,certification_date,claims_date,eqrs_date,smr_date,patient_survival_category_text,patient_survival_data_availability_code,number_of_patients_included_in_survival_summary,mortality_rate_facility,mortality_rate_upper_confidence_limit_975,mortality_rate_lower_confidence_limit_25,shr_date,patient_hospitalization_category_text,patient_hospitalization_data_availability_code,number_of_patients_included_in_hospitalization_summary,hospitalization_rate_facility,…,number_of_patients_in_long_term_catheter_summary,number_of_patient_months_in_long_term_catheter_summary,percentage_of_adult_patients_with_long_term_catheter_in_use,npcr_data_availability_code,number_of_patients_in_npcr_summary,number_of_patientmonths_in_npcr_summary,percentage_of_pediatric_hd_patients_with_npcr,year,month,city,zip,county,phone_number,certification_or_recertification_date,crownweb_date,number_of_patientmonths_in_serum_phosphorus_summary_,serum_phosphorus_data_availability_code_,dateswr,offers_incenter_peritoneal_dialysis,rate_of_hospital_readmission_category_text,percentage_of_medicare_patients_with_hgb_10_gdl,number_of_patients_included_in_transfusion_summary,standard_infection_ratio_,crownweb__date_,percentage_of_adult_hd_patients_with_ktv_12,percentage_of_adult_pd_patients_with_ktv17,percentage_of_pediatric_hd_patents_with_ktv12,percentage_of_pediatric_pd_patents_with_ktv18,number_of_adult_patients_included_in_arterial_venous_fistula_and_catheter_summaries,number_of_adult_patientmonths_included_in_arterial_venous_fistula_and_catheter_summaries,arteriovenous_fistulae_in_use_data_availability_code,percentage_of_patients_with_arteriovenous_fistulae_in_use,vascular_catheter_data_availability_code,percentage_of_patients_with_vascular_catheter_in_use_for_90_days_or_longer,hospitalization_rate_facility_,patient_hospital_readmission_category_text,readmission_rate_facility_
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,…,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""count""","""60476""","""60476""","""60476""","""60476""","""56790""","""60476""","""60476""","""34634""","""15215""","""60476""","""60476""","""15214""","""15215""","""60476""","""60476""","""60475""","""60474""","""60476""","""60476""","""46190""","""60474""","""15215""","""60476""","""15215""","""60476""","""60476""","""60476""","""60093""","""57271""","""57271""","""57271""","""60476""","""60476""","""60476""","""60093""","""50873""",…,"""53193""","""51911""","""49982""","""53471""","""53193""","""15874""","""14983""","""60476""","""60476""","""45261""","""45261""","""60475""","""60476""","""45261""","""38256""","""22856""","""23409""","""7566""","""14286""","""7281""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005"""
"""null_count""","""0""","""0""","""0""","""0""","""3686""","""0""","""0""","""25842""","""45261""","""0""","""0""","""45262""","""45261""","""0""","""0""","""1""","""2""","""0""","""0""","""14286""","""2""","""45261""","""0""","""45261""","""0""","""0""","""0""","""383""","""3205""","""3205""","""3205""","""0""","""0""","""0""","""383""","""9603""",…,"""7283""","""8565""","""10494""","""7005""","""7283""","""44602""","""45493""","""0""","""0""","""15215""","""15215""","""1""","""0""","""15215""","""22220""","""37620""","""37067""","""52910""","""46190""","""53195""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471"""
"""mean""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""std""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""min""","""12500""","""1""","""- LIBERTY ALBUQUERQUE DIALYSIS…","""01/01/2014-12/31/2017""","""""","""1""","""# 7 PROFESSIONAL DRIVE""","""""","""ABBEVILLE""","""AK""","""10003""","""Acadia""","""(201) 262-0429""","""""","""N""","""""","""0""","""0""","""0""","""0""","""0""","""01APR1971""","""01/01/2017-12/31/2017""","""01JAN2022-31DEC2022""","""01/01/2014-12/31/2017""",""" ""","""1""","""""","""""","""""","""""","""01/01/2017-12/31/2017""",""" ""","""1""","""""","""""",…,"""""","""""","""""","""1""","""""","""""","""""","""2017""","""10""","""ABBEVILLE""","""10003""","""""","""(201) 262-0429""","""01/01/1968""","""01/01/2017-12/31/2017""","""0""","""1""","""01/01/2015 - 12/31/2017""","""0""","""As Expected""","""""","""""","""""","""01JAN2016-31DEC2016""","""""","""""","""""","""""","""""","""""","""1""","""""","""1""","""""","""""",""" """,""""""
"""25%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""50%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""75%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""max""","""92531""","""9""","""zz_closed_Texas Care Dialysis""","""01Jan2019-31Dec2022""","""Not Available""","""270""","""road 174 km 6.9""","""suite B""","""pennsauken""","""WY""","""99801""","""Zavala""","""(989) 921-2170""","""Profit""","""Yes""","""Wake Forest University""","""Yes""","""9""","""Yes""","""Yes""","""Yes""","""31OCT2022""","""01OCT2022-30SEP2023""","""01OCT2022-30SEP2023""","""01Jan2019-31Dec2022""","""Worse than Expected""","""270""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""01Jan2022-31Dec2022""","""Worse than Expected""","""270""","""Not Available""","""Not Available""",…,"""Not Available""","""Not Available""","""Not Available""","""270""","""Not Available""","""Not Available""","""Not Available""","""2024""","""7""","""pennsauken""","""99801""","""Zavala""","""9899212170""","""9/9/2016""","""01JAN2020-31DEC2020""","""999""","""258""","""01/01/2015 - 12/31/2017""","""Y""","""Worse than Expected""","""93""","""99""","""7""","""01JAN2016-31DEC2016""","""99""","""99""","""99""","""96""","""99""","""997""","""258""","""99""","""258""","""9""","""99""","""Worse than Expected""","""9"""


In [91]:
import janitor.polars

facility_df = facility_df.with_columns(pl.coalesce('city', 'citytown').alias('city'))
facility_df = facility_df.with_columns(pl.coalesce('county', 'countyparish').alias('county'))
facility_df = facility_df.with_columns(pl.coalesce('zip_code', 'zip').alias('zip_code'))
facility_df = facility_df.with_columns(pl.coalesce('phone_number', 'telephone_number').alias('phone_number'))
facility_df = facility_df.with_columns(pl.coalesce('hospitalization_rate_facility_', 'hospitalization_rate_facility').alias('hospitalization_rate_facility'))
facility_df = facility_df.with_columns(pl.coalesce('crownweb__date_', 'crownweb_date').alias('crownweb_date'))
facility_df = facility_df.with_columns(pl.coalesce('number_of_patientmonths_in_serum_phosphorus_summary_', 'number_of_patientmonths_in_serum_phosphorus_summary').alias('number_of_patientmonths_in_serum_phosphorus_summary'))
facility_df = facility_df.with_columns(pl.coalesce('serum_phosphorus_data_availability_code_', 'serum_phosphorus_data_availability_code').alias('serum_phosphorus_data_availability_code'))
facility_df = facility_df.with_columns(pl.coalesce('standard_infection_ratio_', 'standard_infection_ratio').alias('standard_infection_ratio'))
facility_df = facility_df.with_columns(pl.coalesce('hospitalization_rate_facility_', 'hospitalization_rate_facility').alias('hospitalization_rate_facility'))
facility_df = facility_df.with_columns(pl.coalesce('readmission_rate_facility', 'readmission_rate_facility_').alias('readmission_rate_facility'))

facility_df = facility_df.drop('readmission_rate_facility','hospitalization_rate_facility','standard_infection_ratio', 'number_of_patientmonths_in_serum_phosphorus_summary_', 'crownweb__date_', 'serum_phosphorus_data_availability_code', 'citytown', 'zip', 'telephone_number', 'countyparish')

facility_df.clean_names(strip_underscores=True)

provider_number,network,facility_name,five_star_date,five_star,five_star_data_availability_code,address_line_1,address_line_2,state,zip_code,profit_or_nonprofit,chain_owned,chain_organization,late_shift,of_dialysis_stations,offers_incenter_hemodialysis,offers_peritoneal_dialysis,offers_home_hemodialysis_training,certification_date,claims_date,eqrs_date,smr_date,patient_survival_category_text,patient_survival_data_availability_code,number_of_patients_included_in_survival_summary,mortality_rate_facility,mortality_rate_upper_confidence_limit_975,mortality_rate_lower_confidence_limit_25,shr_date,patient_hospitalization_category_text,patient_hospitalization_data_availability_code,number_of_patients_included_in_hospitalization_summary,hospitalization_rate_upper_confidence_limit_975,hospitalization_rate_lower_confidence_limit_25,srr_date,patient_hospital_readmission_category,patient_hospital_readmission_data_availability_code,…,percentage_of_adult_patients_with_serum_phosphorus_between_5670_mgdl,percentage_of_adult_patients_with_serum_phosphorus_greater_than_70_mgdl,long_term_catheter_data_availability_code,number_of_patients_in_long_term_catheter_summary,number_of_patient_months_in_long_term_catheter_summary,percentage_of_adult_patients_with_long_term_catheter_in_use,npcr_data_availability_code,number_of_patients_in_npcr_summary,number_of_patientmonths_in_npcr_summary,percentage_of_pediatric_hd_patients_with_npcr,year,month,city,county,phone_number,certification_or_recertification_date,crownweb_date,serum_phosphorus_data_availability_code,dateswr,offers_incenter_peritoneal_dialysis,rate_of_hospital_readmission_category_text,percentage_of_medicare_patients_with_hgb_10_gdl,number_of_patients_included_in_transfusion_summary,standard_infection_ratio,percentage_of_adult_hd_patients_with_ktv_12,percentage_of_adult_pd_patients_with_ktv17,percentage_of_pediatric_hd_patents_with_ktv12,percentage_of_pediatric_pd_patents_with_ktv18,number_of_adult_patients_included_in_arterial_venous_fistula_and_catheter_summaries,number_of_adult_patientmonths_included_in_arterial_venous_fistula_and_catheter_summaries,arteriovenous_fistulae_in_use_data_availability_code,percentage_of_patients_with_arteriovenous_fistulae_in_use,vascular_catheter_data_availability_code,percentage_of_patients_with_vascular_catheter_in_use_for_90_days_or_longer,hospitalization_rate_facility,patient_hospital_readmission_category_text,readmission_rate_facility
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,…,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""12306""","""8""","""CHILDRENS HOSPITAL DIALYSIS""","""01Jan2019-31Dec2022""",,"""260""","""1600 7TH AVENUE SOUTH""",,"""AL""","""35233""","""Non-profit""","""No""","""Independent""","""No""","""7""","""Yes""","""Yes""","""Yes""","""17NOV1982""","""01OCT2022-30SEP2023""","""01OCT2022-30SEP2023""","""01Jan2019-31Dec2022""","""Not Available""","""199""","""59""",,,,"""01Jan2022-31Dec2022""","""Worse than Expected""","""1""","""15""","""479.2""","""143.6""","""01Jan2022-31Dec2022""","""As Expected""","""1""",…,,,"""199""","""1""","""11""",,"""1""","""24""","""177""","""96""","""2024""","""7""","""BIRMINGHAM""","""Jefferson""","""(205) 638-9275""",,,,,,,,,,,,,,,,,,,,,,
"""12500""","""8""","""FMC CAPITOL CITY""","""01Jan2019-31Dec2022""","""1""","""1""","""255 S JACKSON STREET""",,"""AL""","""36104""","""Profit""","""Yes""","""Fresenius Medical Care""","""No""","""28""","""Yes""","""Yes""","""Yes""","""01SEP1976""","""01OCT2022-30SEP2023""","""01OCT2022-30SEP2023""","""01Jan2019-31Dec2022""","""As Expected""","""1""","""462""","""23.3""","""36.3""","""15.4""","""01Jan2022-31Dec2022""","""As Expected""","""1""","""120""","""188.0""","""83.5""","""01Jan2022-31Dec2022""","""As Expected""","""1""",…,"""24""","""16""","""1""","""128""","""1142""","""18""","""259""","""0""",,,"""2024""","""7""","""MONTGOMERY""","""Montgomery""","""(334) 263-1028""",,,,,,,,,,,,,,,,,,,,,,
"""12501""","""8""","""DaVita Gadsden Dialysis""","""01Jan2019-31Dec2022""","""3""","""1""","""409 SOUTH FIRST STREET""",,"""AL""","""35901""","""Profit""","""Yes""","""DaVita""","""No""","""24""","""Yes""","""No""","""No""","""01SEP1976""","""01OCT2022-30SEP2023""","""01OCT2022-30SEP2023""","""01Jan2019-31Dec2022""","""As Expected""","""1""","""157""","""32.0""","""51.0""","""19.3""","""01Jan2022-31Dec2022""","""As Expected""","""1""","""47""","""271.8""","""95.9""","""01Jan2022-31Dec2022""","""As Expected""","""1""",…,"""27""","""18""","""1""","""89""","""546""","""10""","""259""","""0""",,,"""2024""","""7""","""GADSDEN""","""Etowah""","""(256) 547-2511""",,,,,,,,,,,,,,,,,,,,,,
"""12502""","""8""","""DaVita Tuscaloosa University D…","""01Jan2019-31Dec2022""","""1""","""1""","""220 15TH STREET""",,"""AL""","""35401""","""Profit""","""Yes""","""DaVita""","""No""","""23""","""Yes""","""Yes""","""No""","""21OCT1977""","""01OCT2022-30SEP2023""","""01OCT2022-30SEP2023""","""01Jan2019-31Dec2022""","""As Expected""","""1""","""415""","""23.7""","""36.7""","""15.8""","""01Jan2022-31Dec2022""","""As Expected""","""1""","""95""","""218.2""","""93.6""","""01Jan2022-31Dec2022""","""As Expected""","""1""",…,"""22""","""22""","""1""","""110""","""965""","""18""","""259""","""0""",,,"""2024""","""7""","""TUSCALOOSA""","""Tuscaloosa""","""(205) 345-6004""",,,,,,,,,,,,,,,,,,,,,,
"""12505""","""8""","""DaVita PDI-Montgomery""","""01Jan2019-31Dec2022""","""3""","""1""","""1001 FOREST AVENUE""",,"""AL""","""36106""","""Profit""","""Yes""","""DaVita""","""No""","""18""","""Yes""","""Yes""","""Yes""","""14DEC1977""","""01OCT2022-30SEP2023""","""01OCT2022-30SEP2023""","""01Jan2019-31Dec2022""","""As Expected""","""1""","""356""","""26.4""","""39.9""","""17.5""","""01Jan2022-31Dec2022""","""As Expected""","""1""","""89""","""191.1""","""75.3""","""01Jan2022-31Dec2022""","""As Expected""","""1""",…,"""32""","""15""","""1""","""82""","""642""","""13""","""259""","""0""",,,"""2024""","""7""","""MONTGOMERY""","""Montgomery""","""(334) 269-9416""",,,,,,,,,,,,,,,,,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""672605""","""14""","""BAYLOR COLLEGE OF MEDICINE-SCO…","""01Jan2012-31Dec2015""","""4""","""1""","""6120 SCOTT STREET, SUITE F""","""""","""TX""","""77021""","""1""","""Y""","""US RENAL CARE, INC.""","""N""","""24""","""Y""",,"""N""",,"""01JAN2016-31DEC2016""",,"""01Jan2013-31Dec2016""","""As Expected""","""1""","""446""","""19""","""25.6""","""13.8""","""01Jan2016-31Dec2016""","""As Expected""","""1""","""80""","""320.5""","""121.6""","""01Jan2016-31Dec2016""",,"""1""",…,"""21""","""14""",,,,,,,,,"""2017""","""10""","""HOUSTON""","""HARRIS""","""7137417059""","""24-Jul-09""","""01JAN2016-31DEC2016""",,,"""N""",,"""9""","""66""","""0.76""","""93""","""""","""""","""""","""81""","""710""","""1""","""74""","""1""","""7""","""194.8""","""As Expected""","""30.7"""
"""672606""","""14""","""SEALY DIALYSIS""","""01Jan2012-31Dec2015""","""5""","""1""","""2242 CHAMPIONSHIP DRIVE""","""""","""TX""","""77474""","""1""","""Y""","""DAVITA""","""N""","""12""","""Y""",,"""N""",,"""01JAN2016-31DEC2016""",,"""01Jan2013-31Dec2016""","""As Expected""","""1""","""269""","""15.5""","""22.7""","""10.1""","""01Jan2016-31Dec2016""","""As Expected""","""1""","""60""","""292.6""","""86.7""","""01Jan2016-31Dec2016""",,"""1""",…,"""11""","""7""",,,,,,,,,"""2017""","""10""","""SEALY""","""AUSTIN""","""9796270300""","""13-Jul-09""","""01JAN2016-31DEC2016""",,,"""N""",,"""27""","""57""","""0.31""","""97""","""""","""""","""""","""54""","""526""","""1""","""75""","""1""","""6""","""156.6""","""As Expected""","""17.9"""
"""672607""","""14""","""US RENAL CARE CANTON DIALYSIS""","""01Jan2012-31Dec2015""","""5""","""1""","""400 EAST STATE HIGHWAY 243, SU…","""""","""TX""","""75103""","""1""","""Y""","""US RENAL CARE, INC.""","""N""","""13""","""Y""",,"""N""",,"""01JAN2016-31DEC2016""",,"""01Jan2013-31Dec2016""","""As Expected""","""1""","""139""","""18.3""","""27.5""","""11.6""","""01Jan2016-31Dec2016""","""As Expected""","""1""","""34""","""377.9""","""108.4""","""01Jan2016-31Dec2016""",,"""1""",…,"""22""","""10""",,,,,,,,,"""2017""","""10""","""CANTON""","""VAN ZANDT""","""9035672250""","""10-Aug-09""","""01JAN2016-31DEC2016""",,,"""N""",,"""20""","""27""","""2.05""","""98""","""""","""""","""""","""32""","""257""","""1""","""70""","""1""","""2""","""197.3""","""As Expected""","""23.6"""
"""672608""","""14""","""LIBERTY DIALYSIS BRYAN""","""01Jan2012-31Dec2015""","""4""","""1""","""2390 E 29TH STREET""","""""","""TX""","""77802""","""1""","""Y""","""FRESENIUS MEDICAL CARE""","""N""","""21""","""Y""",,"""N""",,"""01JAN2016-31DEC2016""",,"""01Jan2013-31Dec2016""","""As Expected""","""1""","""635""","""18.8""","""23.5""","""14.8""","""01Jan2016-31Dec2016""","""As Expected""","""1""","""124""","""289.3""","""127.7""","""01Jan2016-31Dec2016""",,"""1""",…,"""20""","""14""",,,,,,,,,"""2017""","""10""","""BRYAN""","""BRAZOS""","""9793141550""","""20-Aug-09""","""01JAN2016-31DEC2016""",,,"""Y""",,"""10""","""103""","""0.8""","""98""","""99""","""""","""""","""104""","""843""","""1""","""73""","""1""","""7""","""185.9""","""As Expected""","""27.7"""


In [48]:
import re
from collections import defaultdict

def to_snake_case_and_merge(df):
    # Function to convert a string to snake case and remove leading/trailing underscores
    def to_snake_case(string):
        s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', string)
        s2 = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
        return s2.strip('_')  # Remove leading/trailing underscores

    # Convert all column names to snake case
    new_names = [to_snake_case(col) for col in df.columns]

    # Create a dictionary to store columns with the same snake case name
    name_groups = defaultdict(list)
    for old_name, new_name in zip(df.columns, new_names):
        name_groups[new_name].append(old_name)

    # Create a list to store the expressions for the new DataFrame
    new_columns = []
    columns_to_drop = []

    # Iterate through the grouped names
    for new_name, old_names in name_groups.items():
        if len(old_names) == 1:
            # If there's only one column with this name, just rename it
            new_columns.append(pl.col(old_names[0]).alias(new_name))
            if old_names[0] != new_name:
                columns_to_drop.append(old_names[0])
        else:
            # If there are multiple columns, use coalesce to merge them
            new_columns.append(pl.coalesce([pl.col(name) for name in old_names]).alias(new_name))
            columns_to_drop.extend(old_names)

    # Create a new DataFrame with the updated column names and merged columns
    df = df.select(new_columns)
    
    # Drop the original columns that were merged or renamed, but only if they exist
    columns_to_drop = [col for col in columns_to_drop if col in df.columns]
    if columns_to_drop:
        df = df.drop(columns_to_drop)

    return df

# Apply the function to your DataFrame
facility_df = to_snake_case_and_merge(facility_df)

# Display the first few rows and the new column names
print(facility_df.head())
print("\nNew column names:")
print(facility_df.columns)

shape: (5, 155)
┌────────────┬─────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐
│ provider_n ┆ network ┆ facility_ ┆ five_star ┆ … ┆ percentag ┆ vascular_ ┆ percentag ┆ patient_h │
│ umber      ┆ ---     ┆ name      ┆ _date     ┆   ┆ e_of_pati ┆ catheter_ ┆ e_of_pati ┆ ospital_r │
│ ---        ┆ str     ┆ ---       ┆ ---       ┆   ┆ ents_with ┆ data_avai ┆ ents_with ┆ eadmissio │
│ str        ┆         ┆ str       ┆ str       ┆   ┆ _ar…      ┆ lab…      ┆ _va…      ┆ n_c…      │
│            ┆         ┆           ┆           ┆   ┆ ---       ┆ ---       ┆ ---       ┆ ---       │
│            ┆         ┆           ┆           ┆   ┆ str       ┆ str       ┆ str       ┆ str       │
╞════════════╪═════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡
│ 12306      ┆ 8       ┆ CHILDRENS ┆ 01Jan2019 ┆ … ┆ null      ┆ null      ┆ null      ┆ null      │
│            ┆         ┆ HOSPITAL  ┆ -31Dec202 ┆   ┆           ┆           

In [69]:
facility_df.describe()

statistic,provider_number,network,facility_name,five_star_date,five_star,five_star_data_availability_code,address_line_1,address_line_2,citytown,state,zip_code,countyparish,telephone_number,profit_or_nonprofit,chain_owned,chain_organization,late_shift,_of_dialysis_stations,offers_incenter_hemodialysis,offers_peritoneal_dialysis,offers_home_hemodialysis_training,certification_date,claims_date,eqrs_date,smr_date,patient_survival_category_text,patient_survival_data_availability_code,number_of_patients_included_in_survival_summary,mortality_rate_facility,mortality_rate_upper_confidence_limit_975,mortality_rate_lower_confidence_limit_25,shr_date,patient_hospitalization_category_text,patient_hospitalization_data_availability_code,number_of_patients_included_in_hospitalization_summary,hospitalization_rate_facility,…,number_of_patients_in_long_term_catheter_summary,number_of_patient_months_in_long_term_catheter_summary,percentage_of_adult_patients_with_long_term_catheter_in_use,npcr_data_availability_code,number_of_patients_in_npcr_summary,number_of_patientmonths_in_npcr_summary,percentage_of_pediatric_hd_patients_with_npcr,year,month,city,zip,county,phone_number,certification_or_recertification_date,crownweb_date,number_of_patientmonths_in_serum_phosphorus_summary_,serum_phosphorus_data_availability_code_,dateswr,offers_incenter_peritoneal_dialysis,rate_of_hospital_readmission_category_text,percentage_of_medicare_patients_with_hgb_10_gdl,number_of_patients_included_in_transfusion_summary,standard_infection_ratio_,crownweb__date_,percentage_of_adult_hd_patients_with_ktv_12,percentage_of_adult_pd_patients_with_ktv17,percentage_of_pediatric_hd_patents_with_ktv12,percentage_of_pediatric_pd_patents_with_ktv18,number_of_adult_patients_included_in_arterial_venous_fistula_and_catheter_summaries,number_of_adult_patientmonths_included_in_arterial_venous_fistula_and_catheter_summaries,arteriovenous_fistulae_in_use_data_availability_code,percentage_of_patients_with_arteriovenous_fistulae_in_use,vascular_catheter_data_availability_code,percentage_of_patients_with_vascular_catheter_in_use_for_90_days_or_longer,hospitalization_rate_facility_,patient_hospital_readmission_category_text,readmission_rate_facility_
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,…,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""count""","""60476""","""60476""","""60476""","""60476""","""56790""","""60476""","""60476""","""34634""","""15215""","""60476""","""60476""","""15214""","""15215""","""60476""","""60476""","""60475""","""60474""","""60476""","""60476""","""46190""","""60474""","""15215""","""60476""","""15215""","""60476""","""60476""","""60476""","""60093""","""57271""","""57271""","""57271""","""60476""","""60476""","""60476""","""60093""","""57878""",…,"""53193""","""51911""","""49982""","""53471""","""53193""","""15874""","""14983""","""60476""","""60476""","""60476""","""45261""","""60475""","""60476""","""45261""","""38256""","""22856""","""23409""","""7566""","""14286""","""7281""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005""","""7005"""
"""null_count""","""0""","""0""","""0""","""0""","""3686""","""0""","""0""","""25842""","""45261""","""0""","""0""","""45262""","""45261""","""0""","""0""","""1""","""2""","""0""","""0""","""14286""","""2""","""45261""","""0""","""45261""","""0""","""0""","""0""","""383""","""3205""","""3205""","""3205""","""0""","""0""","""0""","""383""","""2598""",…,"""7283""","""8565""","""10494""","""7005""","""7283""","""44602""","""45493""","""0""","""0""","""0""","""15215""","""1""","""0""","""15215""","""22220""","""37620""","""37067""","""52910""","""46190""","""53195""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471""","""53471"""
"""mean""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""std""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""min""","""12500""","""1""","""- LIBERTY ALBUQUERQUE DIALYSIS…","""01/01/2014-12/31/2017""","""""","""1""","""# 7 PROFESSIONAL DRIVE""","""""","""ABBEVILLE""","""AK""","""10003""","""Acadia""","""(201) 262-0429""","""""","""N""","""""","""0""","""0""","""0""","""0""","""0""","""01APR1971""","""01/01/2017-12/31/2017""","""01JAN2022-31DEC2022""","""01/01/2014-12/31/2017""",""" ""","""1""","""""","""""","""""","""""","""01/01/2017-12/31/2017""",""" ""","""1""","""""","""""",…,"""""","""""","""""","""1""","""""","""""","""""","""2017""","""10""","""ABBEVILLE""","""10003""","""""","""(201) 262-0429""","""01/01/1968""","""01/01/2017-12/31/2017""","""0""","""1""","""01/01/2015 - 12/31/2017""","""0""","""As Expected""","""""","""""","""""","""01JAN2016-31DEC2016""","""""","""""","""""","""""","""""","""""","""1""","""""","""1""","""""","""""",""" """,""""""
"""25%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""50%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""75%""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""max""","""92531""","""9""","""zz_closed_Texas Care Dialysis""","""01Jan2019-31Dec2022""","""Not Available""","""270""","""road 174 km 6.9""","""suite B""","""pennsauken""","""WY""","""99801""","""Zavala""","""(989) 921-2170""","""Profit""","""Yes""","""Wake Forest University""","""Yes""","""9""","""Yes""","""Yes""","""Yes""","""31OCT2022""","""01OCT2022-30SEP2023""","""01OCT2022-30SEP2023""","""01Jan2019-31Dec2022""","""Worse than Expected""","""270""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""01Jan2022-31Dec2022""","""Worse than Expected""","""270""","""Not Available""","""Not Available""",…,"""Not Available""","""Not Available""","""Not Available""","""270""","""Not Available""","""Not Available""","""Not Available""","""2024""","""7""","""pennsauken""","""99801""","""Zavala""","""9899212170""","""9/9/2016""","""01JAN2020-31DEC2020""","""999""","""258""","""01/01/2015 - 12/31/2017""","""Y""","""Worse than Expected""","""93""","""99""","""7""","""01JAN2016-31DEC2016""","""99""","""99""","""99""","""96""","""99""","""997""","""258""","""99""","""258""","""9""","""99""","""Worse than Expected""","""9"""


In [None]:
year,
provider_number,
network,
facility_name,
five_star_date,
five_star,
five_star_data_availability_code,
address_line_1,
address_line_2,
city,
state,
zip_code,
county,
profit_or_nonprofit,
phone_number,
chain_owned,
chain_organization,
late_shift,
_of_dialysis_stations,
offers_incenter_hemodialysis,
offers_peritoneal_dialysis,
offers_home_hemodialysis_training,
certification_date,
claims_date,
eqrs_date,
smr_date,
patient_survival_category_text,
patient_survival_data_availability_code,
number_of_patients_included_in_survival_summary,
mortality_rate_facility,
patient_hospitalization_category_text,
patient_hospitalization_data_availability_code,
number_of_patients_included_in_hospitalization_summary,
hospitalization_rate_facility,
number_of_patients_in_long_term_catheter_summary,
number_of_patient_months_in_long_term_catheter_summary,
percentage_of_adult_patients_with_long_term_catheter_in_use,
npcr_data_availability_code,
number_of_patients_in_npcr_summary,
number_of_patientmonths_in_npcr_summary,
percentage_of_pediatric_hd_patients_with_npcr,
certification_or_recertification_date,
crownweb_date,