In [2]:
import numpy as np
import pandas as pd
import requests
import time
from scipy.stats import linregress
from pprint import pprint
import json
from census import Census
from config import (census_key, gkey)
import gmaps

# Census API Key
c = Census(census_key)

### Census Information API on poverty and race per zipcode

In [None]:
# Run Census Search to retrieve data on all zip codes (latest available Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
census_data = c.acs5.get(("B01003_001E", "B17001_002E", "B19013_001E", "B02001_002E", "B02001_003E", "B02001_004E", "B02001_005E", "B02001_006E", "B02001_008E", "B03001_003E"
                         ), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B17001_002E": "Poverty Count",
                                      "B19013_001E": "Median Income",
                                      "B02001_002E": "Caucasian",
                                      "B02001_003E": "African American",
                                      "B02001_004E": "American Indian",
                                      "B02001_005E": "Asian",
                                      "B02001_006E": "Native Hawaiian",
                                      "B02001_008E": "Two or more races",
                                      "B03001_003E": "Hispanic",
                                      "zip code tabulation area": "Zipcode"})

In [None]:
census_pd.sample(5)

In [None]:
census_pd["Poverty Rate"] = 100 * census_pd["Poverty Count"]/ census_pd["Population"]

In [None]:
census_pd.sample(5)

In [None]:
census_pd.columns = census_pd.columns.str.replace(' ', '_').str.lower()
census_pd.tail(5)

In [None]:
census_pd["caucasian_%"] = 100 * census_pd["caucasian"]/ census_pd["population"]
census_pd["african_american_%"] = 100 * census_pd["african_american"]/ census_pd["population"]
census_pd["american_indian_%"] = 100 * census_pd["american_indian"]/ census_pd["population"]
census_pd["asian_%"] = 100 * census_pd["asian"]/ census_pd["population"]
census_pd["native_hawaiian_%"] = 100 * census_pd["native_hawaiian"]/ census_pd["population"]
census_pd["two_or_more_races_%"] = 100 * census_pd["two_or_more_races"]/ census_pd["population"]
census_pd["hispanic_%"] = 100 * census_pd["hispanic"]/ census_pd["population"]
clean_census_df=census_pd[["zipcode", "population", "poverty_rate", "median_income", "caucasian_%", "african_american_%", "american_indian_%", "asian_%", "native_hawaiian_%", "two_or_more_races_%", "hispanic_%"]]

In [None]:
clean_census_df = clean_census_df.rename(columns={'zipcode': 'zip_code'})
clean_census_df['zip_code'].apply(type).value_counts()

In [None]:
clean_census_df

In [None]:
clean_census_df['median_income'].apply(type).value_counts()

In [None]:
(clean_census_df.median_income <= 0).value_counts()

In [None]:
updated_census_df = clean_census_df[(clean_census_df['median_income']>0)]
updated_census_df

In [None]:
final_census_df=updated_census_df[["zip_code", "population", "poverty_rate", "median_income"]]
final_census_df

In [None]:
final_census_df.to_csv("output_data/final_census_df.csv", index=False)

### CSV uploaded on Value of Care

In [5]:
value_df = pd.read_csv("data/Payment_and_Value_of_Care-Hospital.csv", low_memory=False, dtype=str)
value_df.sample(10)

Unnamed: 0,Facility ID,Facility Name,Address,City,State,ZIP Code,County Name,Phone Number,Payment Measure ID,Payment Measure Name,...,Payment,Lower Estimate,Higher Estimate,Payment Footnote,Value of Care Display ID,Value of Care Display Name,Value of Care Category,Value of Care Footnote,Start Date,End Date
11370,330163,EASTERN NIAGARA HOSPITAL,521 EAST AVENUE,LOCKPORT,NY,14094,NIAGARA,(716) 514-5700,PAYM_30_PN,Payment for pneumonia patients,...,"$16,108","$14,341","$17,904",,MORT_PAYM_30_PN,Value of Care Pneumonia measure,Average Mortality and Lower Payment,,07/01/2016,06/30/2019
3374,100232,PUTNAM COMMUNITY MEDICAL CENTER,611 ZEAGLER DR,PALATKA,FL,32177,PUTNAM,(386) 326-8500,PAYM_30_PN,Payment for pneumonia patients,...,"$20,351","$18,970","$21,722",,MORT_PAYM_30_PN,Value of Care Pneumonia measure,Average Mortality and Higher Payment,,07/01/2016,06/30/2019
9072,250027,WINSTON MEDICAL CENTER & SWINGBED,17550 EAST MAIN STREET,LOUISVILLE,MS,39339,WINSTON,(662) 779-5124,PAYM_30_AMI,Payment for heart attack patients,...,Not Available,Not Available,Not Available,1.0,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Not Available,13.0,07/01/2016,06/30/2019
15631,450072,CHI ST. LUKES' BRAZOSPORT HOSPITAL,100 MEDICAL DRIVE,LAKE JACKSON,TX,77566,BRAZORIA,(979) 297-4411,PAYM_90_HIP_KNEE,Payment for hip/knee replacement patients,...,"$20,928","$18,779","$23,461",,COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Average Complications and Average Payment,,04/01/2016,03/31/2019
3015,100047,BAYFRONT HEALTH PUNTA GORDA,809 E MARION AVE,PUNTA GORDA,FL,33950,CHARLOTTE,(941) 639-3131,PAYM_90_HIP_KNEE,Payment for hip/knee replacement patients,...,"$21,301","$20,127","$22,537",,COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Average Complications and Average Payment,,04/01/2016,03/31/2019
12583,360096,EAST LIVERPOOL CITY HOSPITAL,425 WEST 5TH STREET,EAST LIVERPOOL,OH,43920,COLUMBIANA,(330) 385-7200,PAYM_90_HIP_KNEE,Payment for hip/knee replacement patients,...,Not Available,Not Available,Not Available,1.0,COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Not Available,13.0,04/01/2016,03/31/2019
16451,451305,CHI ST JOSEPH HEALTH BURLESON HOSPITAL,1101 WOODSON DRIVE,CALDWELL,TX,77836,BURLESON,(979) 567-3245,PAYM_90_HIP_KNEE,Payment for hip/knee replacement patients,...,Not Available,Not Available,Not Available,5.0,COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Not Available,13.0,04/01/2016,03/31/2019
8516,240001,NORTH MEMORIAL HEALTH,3300 OAKDALE NORTH,ROBBINSDALE,MN,55422,HENNEPIN,(763) 520-5200,PAYM_30_AMI,Payment for heart attack patients,...,"$24,491","$22,200","$26,992",,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment,,07/01/2016,06/30/2019
6871,181307,MORGAN COUNTY ARH HOSPITAL,476 LIBERTY ROAD,WEST LIBERTY,KY,41472,MORGAN,(606) 743-3186,PAYM_90_HIP_KNEE,Payment for hip/knee replacement patients,...,Not Available,Not Available,Not Available,5.0,COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Not Available,13.0,04/01/2016,03/31/2019
16647,451358,MEMORIAL HOSPITAL,209 NORTHWEST 8TH STREET,SEMINOLE,TX,79360,GAINES,(432) 758-4854,PAYM_90_HIP_KNEE,Payment for hip/knee replacement patients,...,Not Available,Not Available,Not Available,19.0,COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Not Available,19.0,04/01/2016,03/31/2019


In [6]:
only_nj_zips=value_df[value_df["State"] == "NJ"]
only_nj_zips

Unnamed: 0,Facility ID,Facility Name,Address,City,State,ZIP Code,County Name,Phone Number,Payment Measure ID,Payment Measure Name,...,Payment,Lower Estimate,Higher Estimate,Payment Footnote,Value of Care Display ID,Value of Care Display Name,Value of Care Category,Value of Care Footnote,Start Date,End Date
10688,310001,HACKENSACK UNIVERSITY MEDICAL CENTER,30 PROSPECT AVE,HACKENSACK,NJ,07601,BERGEN,(551) 996-2000,PAYM_30_AMI,Payment for heart attack patients,...,"$28,471","$26,974","$30,113",,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Better Mortality and Higher Payment,,07/01/2016,06/30/2019
10689,310001,HACKENSACK UNIVERSITY MEDICAL CENTER,30 PROSPECT AVE,HACKENSACK,NJ,07601,BERGEN,(551) 996-2000,PAYM_30_HF,Payment for heart failure patients,...,"$22,201","$21,378","$23,089",,MORT_PAYM_30_HF,Value of Care Heart Failure measur,Better Mortality and Higher Payment,,07/01/2016,06/30/2019
10690,310001,HACKENSACK UNIVERSITY MEDICAL CENTER,30 PROSPECT AVE,HACKENSACK,NJ,07601,BERGEN,(551) 996-2000,PAYM_30_PN,Payment for pneumonia patients,...,"$21,324","$20,648","$22,048",,MORT_PAYM_30_PN,Value of Care Pneumonia measure,Better Mortality and Higher Payment,,07/01/2016,06/30/2019
10691,310001,HACKENSACK UNIVERSITY MEDICAL CENTER,30 PROSPECT AVE,HACKENSACK,NJ,07601,BERGEN,(551) 996-2000,PAYM_90_HIP_KNEE,Payment for hip/knee replacement patients,...,"$20,870","$20,501","$21,259",,COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Average Complications and Average Payment,,04/01/2016,03/31/2019
10692,310002,NEWARK BETH ISRAEL MEDICAL CENTER,201 LYONS AVE,NEWARK,NJ,07112,ESSEX,(973) 926-7850,PAYM_30_AMI,Payment for heart attack patients,...,"$26,155","$23,703","$28,847",,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment,,07/01/2016,06/30/2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10943,313300,CHILDREN'S SPECIALIZED HOSPITAL,200 SOMERSET STREET,NEW BRUNSWICK,NJ,08901,MIDDLESEX,(732) 258-7050,PAYM_90_HIP_KNEE,Payment for hip/knee replacement patients,...,Not Available,Not Available,Not Available,19,COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Not Available,19,04/01/2016,03/31/2019
10944,313302,WEISMAN CHILDRENS REHABILITATION HOSPITAL,"92 BRICK ROAD, 3RD FLOOR",MARLTON,NJ,08053,BURLINGTON,(856) 489-4520,PAYM_30_AMI,Payment for heart attack patients,...,Not Available,Not Available,Not Available,19,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Not Available,19,07/01/2016,06/30/2019
10945,313302,WEISMAN CHILDRENS REHABILITATION HOSPITAL,"92 BRICK ROAD, 3RD FLOOR",MARLTON,NJ,08053,BURLINGTON,(856) 489-4520,PAYM_30_HF,Payment for heart failure patients,...,Not Available,Not Available,Not Available,19,MORT_PAYM_30_HF,Value of Care Heart Failure measur,Not Available,19,07/01/2016,06/30/2019
10946,313302,WEISMAN CHILDRENS REHABILITATION HOSPITAL,"92 BRICK ROAD, 3RD FLOOR",MARLTON,NJ,08053,BURLINGTON,(856) 489-4520,PAYM_30_PN,Payment for pneumonia patients,...,Not Available,Not Available,Not Available,19,MORT_PAYM_30_PN,Value of Care Pneumonia measure,Not Available,19,07/01/2016,06/30/2019


In [7]:
value_df.columns = value_df.columns.str.replace(' ', '_').str.lower()
value_df.head()

Unnamed: 0,facility_id,facility_name,address,city,state,zip_code,county_name,phone_number,payment_measure_id,payment_measure_name,...,payment,lower_estimate,higher_estimate,payment_footnote,value_of_care_display_id,value_of_care_display_name,value_of_care_category,value_of_care_footnote,start_date,end_date
0,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,(334) 793-8701,PAYM_30_AMI,Payment for heart attack patients,...,"$24,934","$23,490","$26,429",,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment,,07/01/2016,06/30/2019
1,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,(334) 793-8701,PAYM_30_HF,Payment for heart failure patients,...,"$17,854","$17,061","$18,676",,MORT_PAYM_30_HF,Value of Care Heart Failure measur,Average Mortality and Average Payment,,07/01/2016,06/30/2019
2,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,(334) 793-8701,PAYM_30_PN,Payment for pneumonia patients,...,"$20,216","$19,186","$21,271",,MORT_PAYM_30_PN,Value of Care Pneumonia measure,Average Mortality and Higher Payment,,07/01/2016,06/30/2019
3,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,(334) 793-8701,PAYM_90_HIP_KNEE,Payment for hip/knee replacement patients,...,"$22,216","$21,191","$23,313",,COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Average Complications and Higher Payment,,04/01/2016,03/31/2019
4,10005,MARSHALL MEDICAL CENTERS,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,MARSHALL,(256) 593-8310,PAYM_30_AMI,Payment for heart attack patients,...,"$24,742","$22,016","$27,967",,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment,,07/01/2016,06/30/2019


In [8]:
for column in value_df.columns:
    print(column)

facility_id
facility_name
address
city
state
zip_code
county_name
phone_number
payment_measure_id
payment_measure_name
payment_category
denominator
payment
lower_estimate
higher_estimate
payment_footnote
value_of_care_display_id
value_of_care_display_name
value_of_care_category
value_of_care_footnote
start_date
end_date


In [9]:
updated_df=value_df.drop(['county_name', 'phone_number', 'payment_footnote', 'value_of_care_footnote', 'lower_estimate', 'higher_estimate', 'start_date', 'end_date', 'payment_measure_id'], axis=1).reset_index(drop=True)
updated_df

Unnamed: 0,facility_id,facility_name,address,city,state,zip_code,payment_measure_name,payment_category,denominator,payment,value_of_care_display_id,value_of_care_display_name,value_of_care_category
0,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for heart attack patients,No Different Than the National Average Payment,574,"$24,934",MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment
1,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for heart failure patients,No Different Than the National Average Payment,823,"$17,854",MORT_PAYM_30_HF,Value of Care Heart Failure measur,Average Mortality and Average Payment
2,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for pneumonia patients,Greater Than the National Average Payment,536,"$20,216",MORT_PAYM_30_PN,Value of Care Pneumonia measure,Average Mortality and Higher Payment
3,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for hip/knee replacement patients,Greater Than the National Average Payment,245,"$22,216",COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Average Complications and Higher Payment
4,010005,MARSHALL MEDICAL CENTERS,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,Payment for heart attack patients,No Different Than the National Average Payment,71,"$24,742",MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment
...,...,...,...,...,...,...,...,...,...,...,...,...,...
18835,670136,BAYLOR SCOTT & WHITE MEDICAL CENTER- AUSTIN,5245 W US 290,AUSTIN,TX,78735,Payment for hip/knee replacement patients,Not Available,Not Available,Not Available,COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Not Available
18836,670143,ASCENSION SETON BASTROP,"630 HIGWAY 71 W,",BASTROP,TX,78602,Payment for heart attack patients,Not Available,Not Available,Not Available,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Not Available
18837,670143,ASCENSION SETON BASTROP,"630 HIGWAY 71 W,",BASTROP,TX,78602,Payment for heart failure patients,Not Available,Not Available,Not Available,MORT_PAYM_30_HF,Value of Care Heart Failure measur,Not Available
18838,670143,ASCENSION SETON BASTROP,"630 HIGWAY 71 W,",BASTROP,TX,78602,Payment for pneumonia patients,Not Available,Not Available,Not Available,MORT_PAYM_30_PN,Value of Care Pneumonia measure,Not Available


In [10]:
no_nans_df = updated_df[updated_df.payment != 'Not Available']
no_nans_df

Unnamed: 0,facility_id,facility_name,address,city,state,zip_code,payment_measure_name,payment_category,denominator,payment,value_of_care_display_id,value_of_care_display_name,value_of_care_category
0,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for heart attack patients,No Different Than the National Average Payment,574,"$24,934",MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment
1,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for heart failure patients,No Different Than the National Average Payment,823,"$17,854",MORT_PAYM_30_HF,Value of Care Heart Failure measur,Average Mortality and Average Payment
2,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for pneumonia patients,Greater Than the National Average Payment,536,"$20,216",MORT_PAYM_30_PN,Value of Care Pneumonia measure,Average Mortality and Higher Payment
3,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for hip/knee replacement patients,Greater Than the National Average Payment,245,"$22,216",COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Average Complications and Higher Payment
4,010005,MARSHALL MEDICAL CENTERS,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,Payment for heart attack patients,No Different Than the National Average Payment,71,"$24,742",MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment
...,...,...,...,...,...,...,...,...,...,...,...,...,...
18786,670120,THE HOSPITALS OF PROVIDENCE TRANSMOUNTAIN CAMPUS,2000 TRANSMOUNTAIN RD,EL PASO,TX,79911,Payment for pneumonia patients,No Different Than the National Average Payment,143,"$20,221",MORT_PAYM_30_PN,Value of Care Pneumonia measure,Average Mortality and Average Payment
18792,670122,HOUSTON METHODIST THE WOODLANDS HOSPITAL,17201 INTERSTATE 45 SOUTH,THE WOODLANDS,TX,77385,Payment for heart attack patients,No Different Than the National Average Payment,53,"$27,138",MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment
18793,670122,HOUSTON METHODIST THE WOODLANDS HOSPITAL,17201 INTERSTATE 45 SOUTH,THE WOODLANDS,TX,77385,Payment for heart failure patients,Greater Than the National Average Payment,155,"$21,647",MORT_PAYM_30_HF,Value of Care Heart Failure measur,Average Mortality and Higher Payment
18794,670122,HOUSTON METHODIST THE WOODLANDS HOSPITAL,17201 INTERSTATE 45 SOUTH,THE WOODLANDS,TX,77385,Payment for pneumonia patients,No Different Than the National Average Payment,205,"$19,409",MORT_PAYM_30_PN,Value of Care Pneumonia measure,Better Mortality and Average Payment


In [11]:
no_nans_df.state.value_counts()

CA    912
TX    891
FL    625
IL    533
PA    500
NY    488
OH    483
MI    375
IN    338
GA    333
NC    333
WI    329
MO    295
VA    271
IA    259
TN    258
KY    248
KS    244
LA    241
OK    239
NJ    237
WA    230
AL    226
MA    214
MN    208
AZ    203
CO    200
MS    198
AR    189
SC    170
OR    170
MD    167
NE    165
WV    121
UT    110
ME    108
CT     99
NV     98
NM     93
ID     88
MT     88
NH     87
SD     82
ND     62
PR     59
WY     57
VT     46
HI     43
AK     36
RI     35
DE     24
DC     21
VI      6
GU      3
MP      3
Name: state, dtype: int64

In [12]:
no_nans_df.payment_measure_name.unique()

array(['Payment for heart attack patients',
       'Payment for heart failure patients',
       'Payment for pneumonia patients',
       'Payment for hip/knee replacement patients'], dtype=object)

In [13]:
no_nans_df.payment_category.unique()

array(['No Different Than the National Average Payment',
       'Greater Than the National Average Payment',
       'Less Than the National Average Payment'], dtype=object)

In [14]:
no_nans_df.value_of_care_category.unique()

array(['Average Mortality and Average Payment',
       'Average Mortality and Higher Payment',
       'Average Complications and Higher Payment',
       'Worse Mortality and Average Payment',
       'Worse Mortality and Lower Payment',
       'Average Complications and Average Payment',
       'Average Mortality and Lower Payment',
       'Average Complications and Lower Payment',
       'Better Mortality and Average Payment',
       'Worse Complications and Higher Payment',
       'Better Mortality and Lower Payment',
       'Worse Mortality and Higher Payment',
       'Worse Complications and Lower Payment',
       'Better Mortality and Higher Payment',
       'Better Complications and Lower Payment',
       'Better Complications and Average Payment',
       'Better Complications and Higher Payment',
       'Worse Complications and Average Payment', 'Not Available'],
      dtype=object)

In [15]:
clean_value_df=no_nans_df[no_nans_df["value_of_care_category"] != "Not Available"]
clean_value_df

Unnamed: 0,facility_id,facility_name,address,city,state,zip_code,payment_measure_name,payment_category,denominator,payment,value_of_care_display_id,value_of_care_display_name,value_of_care_category
0,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for heart attack patients,No Different Than the National Average Payment,574,"$24,934",MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment
1,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for heart failure patients,No Different Than the National Average Payment,823,"$17,854",MORT_PAYM_30_HF,Value of Care Heart Failure measur,Average Mortality and Average Payment
2,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for pneumonia patients,Greater Than the National Average Payment,536,"$20,216",MORT_PAYM_30_PN,Value of Care Pneumonia measure,Average Mortality and Higher Payment
3,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for hip/knee replacement patients,Greater Than the National Average Payment,245,"$22,216",COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Average Complications and Higher Payment
4,010005,MARSHALL MEDICAL CENTERS,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,Payment for heart attack patients,No Different Than the National Average Payment,71,"$24,742",MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment
...,...,...,...,...,...,...,...,...,...,...,...,...,...
18786,670120,THE HOSPITALS OF PROVIDENCE TRANSMOUNTAIN CAMPUS,2000 TRANSMOUNTAIN RD,EL PASO,TX,79911,Payment for pneumonia patients,No Different Than the National Average Payment,143,"$20,221",MORT_PAYM_30_PN,Value of Care Pneumonia measure,Average Mortality and Average Payment
18792,670122,HOUSTON METHODIST THE WOODLANDS HOSPITAL,17201 INTERSTATE 45 SOUTH,THE WOODLANDS,TX,77385,Payment for heart attack patients,No Different Than the National Average Payment,53,"$27,138",MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment
18793,670122,HOUSTON METHODIST THE WOODLANDS HOSPITAL,17201 INTERSTATE 45 SOUTH,THE WOODLANDS,TX,77385,Payment for heart failure patients,Greater Than the National Average Payment,155,"$21,647",MORT_PAYM_30_HF,Value of Care Heart Failure measur,Average Mortality and Higher Payment
18794,670122,HOUSTON METHODIST THE WOODLANDS HOSPITAL,17201 INTERSTATE 45 SOUTH,THE WOODLANDS,TX,77385,Payment for pneumonia patients,No Different Than the National Average Payment,205,"$19,409",MORT_PAYM_30_PN,Value of Care Pneumonia measure,Better Mortality and Average Payment


In [16]:
clean_value_df[clean_value_df.columns[9]] = clean_value_df[clean_value_df.columns[9]].replace('[\$,]', '', regex=True).astype(float)
clean_value_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_value_df[clean_value_df.columns[9]] = clean_value_df[clean_value_df.columns[9]].replace('[\$,]', '', regex=True).astype(float)


Unnamed: 0,facility_id,facility_name,address,city,state,zip_code,payment_measure_name,payment_category,denominator,payment,value_of_care_display_id,value_of_care_display_name,value_of_care_category
0,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for heart attack patients,No Different Than the National Average Payment,574,24934.0,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment
1,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for heart failure patients,No Different Than the National Average Payment,823,17854.0,MORT_PAYM_30_HF,Value of Care Heart Failure measur,Average Mortality and Average Payment
2,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for pneumonia patients,Greater Than the National Average Payment,536,20216.0,MORT_PAYM_30_PN,Value of Care Pneumonia measure,Average Mortality and Higher Payment
3,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for hip/knee replacement patients,Greater Than the National Average Payment,245,22216.0,COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Average Complications and Higher Payment
4,010005,MARSHALL MEDICAL CENTERS,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,Payment for heart attack patients,No Different Than the National Average Payment,71,24742.0,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment
...,...,...,...,...,...,...,...,...,...,...,...,...,...
18786,670120,THE HOSPITALS OF PROVIDENCE TRANSMOUNTAIN CAMPUS,2000 TRANSMOUNTAIN RD,EL PASO,TX,79911,Payment for pneumonia patients,No Different Than the National Average Payment,143,20221.0,MORT_PAYM_30_PN,Value of Care Pneumonia measure,Average Mortality and Average Payment
18792,670122,HOUSTON METHODIST THE WOODLANDS HOSPITAL,17201 INTERSTATE 45 SOUTH,THE WOODLANDS,TX,77385,Payment for heart attack patients,No Different Than the National Average Payment,53,27138.0,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment
18793,670122,HOUSTON METHODIST THE WOODLANDS HOSPITAL,17201 INTERSTATE 45 SOUTH,THE WOODLANDS,TX,77385,Payment for heart failure patients,Greater Than the National Average Payment,155,21647.0,MORT_PAYM_30_HF,Value of Care Heart Failure measur,Average Mortality and Higher Payment
18794,670122,HOUSTON METHODIST THE WOODLANDS HOSPITAL,17201 INTERSTATE 45 SOUTH,THE WOODLANDS,TX,77385,Payment for pneumonia patients,No Different Than the National Average Payment,205,19409.0,MORT_PAYM_30_PN,Value of Care Pneumonia measure,Better Mortality and Average Payment


In [17]:
clean_value_df['payment'].apply(type).value_counts()

<class 'float'>    12135
Name: payment, dtype: int64

In [18]:
clean_value_df['zip_code'].apply(type).value_counts()

<class 'str'>    12135
Name: zip_code, dtype: int64

In [19]:
clean_value_df['denominator'] = clean_value_df['denominator'].apply(lambda x: x.replace('$', '').replace(',', '')).astype(float)
clean_value_df.sample(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_value_df['denominator'] = clean_value_df['denominator'].apply(lambda x: x.replace('$', '').replace(',', '')).astype(float)


Unnamed: 0,facility_id,facility_name,address,city,state,zip_code,payment_measure_name,payment_category,denominator,payment,value_of_care_display_id,value_of_care_display_name,value_of_care_category
18497,530015,ST JOHNS MEDICAL CENTER,625 EAST BROADWAY,JACKSON,WY,83001,Payment for heart failure patients,Less Than the National Average Payment,52.0,15151.0,MORT_PAYM_30_HF,Value of Care Heart Failure measur,Average Mortality and Lower Payment
3666,110011,TANNER MEDICAL CENTER - CARROLLTON,705 DIXIE STREET,CARROLLTON,GA,30117,Payment for pneumonia patients,No Different Than the National Average Payment,412.0,17884.0,MORT_PAYM_30_PN,Value of Care Pneumonia measure,Worse Mortality and Average Payment
12687,360170,BERGER HOSPITAL,600 NORTH PICKAWAY STREET,CIRCLEVILLE,OH,43113,Payment for hip/knee replacement patients,No Different Than the National Average Payment,125.0,21829.0,COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Average Complications and Average Payment
17844,510047,FAIRMONT REGIONAL MEDICAL CENTER,1325 LOCUST AVENUE,FAIRMONT,WV,26554,Payment for heart attack patients,No Different Than the National Average Payment,63.0,26069.0,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment
3674,110016,WELLSTAR WEST GEORGIA MEDICAL CENTER,1514 VERNON ROAD,LAGRANGE,GA,30240,Payment for pneumonia patients,No Different Than the National Average Payment,294.0,19094.0,MORT_PAYM_30_PN,Value of Care Pneumonia measure,Average Mortality and Average Payment


In [20]:
clean_value_df['outcome'] = clean_value_df['value_of_care_display_id'].map(lambda v: v.split('_')[0])
clean_value_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_value_df['outcome'] = clean_value_df['value_of_care_display_id'].map(lambda v: v.split('_')[0])


Unnamed: 0,facility_id,facility_name,address,city,state,zip_code,payment_measure_name,payment_category,denominator,payment,value_of_care_display_id,value_of_care_display_name,value_of_care_category,outcome
0,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for heart attack patients,No Different Than the National Average Payment,574.0,24934.0,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment,MORT
1,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for heart failure patients,No Different Than the National Average Payment,823.0,17854.0,MORT_PAYM_30_HF,Value of Care Heart Failure measur,Average Mortality and Average Payment,MORT
2,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for pneumonia patients,Greater Than the National Average Payment,536.0,20216.0,MORT_PAYM_30_PN,Value of Care Pneumonia measure,Average Mortality and Higher Payment,MORT
3,010001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,Payment for hip/knee replacement patients,Greater Than the National Average Payment,245.0,22216.0,COMP_PAYM_90_HIP_KNEE,Value of Care hip/knee replacement,Average Complications and Higher Payment,COMP
4,010005,MARSHALL MEDICAL CENTERS,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,Payment for heart attack patients,No Different Than the National Average Payment,71.0,24742.0,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment,MORT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18786,670120,THE HOSPITALS OF PROVIDENCE TRANSMOUNTAIN CAMPUS,2000 TRANSMOUNTAIN RD,EL PASO,TX,79911,Payment for pneumonia patients,No Different Than the National Average Payment,143.0,20221.0,MORT_PAYM_30_PN,Value of Care Pneumonia measure,Average Mortality and Average Payment,MORT
18792,670122,HOUSTON METHODIST THE WOODLANDS HOSPITAL,17201 INTERSTATE 45 SOUTH,THE WOODLANDS,TX,77385,Payment for heart attack patients,No Different Than the National Average Payment,53.0,27138.0,MORT_PAYM_30_AMI,Value of Care Heart Attack measure,Average Mortality and Average Payment,MORT
18793,670122,HOUSTON METHODIST THE WOODLANDS HOSPITAL,17201 INTERSTATE 45 SOUTH,THE WOODLANDS,TX,77385,Payment for heart failure patients,Greater Than the National Average Payment,155.0,21647.0,MORT_PAYM_30_HF,Value of Care Heart Failure measur,Average Mortality and Higher Payment,MORT
18794,670122,HOUSTON METHODIST THE WOODLANDS HOSPITAL,17201 INTERSTATE 45 SOUTH,THE WOODLANDS,TX,77385,Payment for pneumonia patients,No Different Than the National Average Payment,205.0,19409.0,MORT_PAYM_30_PN,Value of Care Pneumonia measure,Better Mortality and Average Payment,MORT


In [None]:
clean_value_df['value_code'] = clean_value_df['value_of_care_display_id'].map(lambda v: v.split('_')[-1])
clean_value_df

In [None]:
clean_value_df=clean_value_df.drop(['value_of_care_display_id'], axis=1).reset_index(drop=True)

In [None]:
clean_value_df[clean_value_df.columns[-1]] = clean_value_df[clean_value_df.columns[-1]].replace('KNEE', 'HIP_KNEE', regex=True)

In [None]:
clean_value_df['zip_code'] = clean_value_df['zip_code'].astype(str)

In [None]:
only_nj_zips=clean_value_df[clean_value_df["state"] == "NJ"]
only_nj_zips

In [None]:
clean_value_df.head()

In [None]:
clean_value_df.to_csv("output_data/clean_value_df.csv", index=False)

### CSV on readmission rates

In [None]:
readmissions_df = pd.read_csv("data/Readmissions_Reduction_Program.csv", low_memory=False)
updated_readmissions_df=readmissions_df.drop(['Footnote', 'Start Date', 'End Date', "State", "Predicted Readmission Rate", "Expected Readmission Rate"], axis=1).reset_index(drop=True)
updated_readmissions_df.columns = updated_readmissions_df.columns.str.replace(' ', '_').str.lower()
updated_readmissions_df

In [None]:
no_nans_readmissions=updated_readmissions_df.dropna()
no_nans_readmissions

In [None]:
no_nans_readmissions['excess_readmission_ratio'].apply(type).value_counts()

In [None]:
no_nans_readmissions['number_of_discharges'].apply(type).value_counts()

In [None]:
no_nans_readmissions['number_of_readmissions'].apply(type).value_counts()

In [None]:
no_nans_readmissions['number_of_readmissions'] = no_nans_readmissions['number_of_readmissions'].apply(pd.to_numeric)

In [None]:
no_nans_readmissions['facility_id'].apply(type).value_counts()

In [None]:
no_nans_readmissions.describe()

In [None]:
no_nans_readmissions.to_csv("output_data/readmissions_df.csv", index=False)

### Export to PostgreSQL

In [None]:
from sqlalchemy import create_engine

In [None]:
final_census_df