In [2]:
# Dependencies and Setup
from census import Census
from us import states
from config import (census_key, gkey)
import us
import gmaps
import os
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from sodapy import Socrata
import requests
import json

In [41]:
c = Census(census_key, year=2016)
census_data = c.acs5.get(('B01003_001E', 'B17001_002E','B19013_001E'), {'for': 'county:*'})

# Convert to DataFrame
census_df = pd.DataFrame(census_data)

# Column Reordering
census_df= census_df.rename(columns={'B01003_001E': 'Population',
                                      'B17001_002E': 'Poverty Count',
                                      'B19013_001E': 'Median Household Income',
                                      'state':'State',
                                     'county':'County'})
# Add in Poverty Rate (Poverty Count / Population)
census_df['Poverty Rate'] = 100 * \
    census_df['Poverty Count'].astype(
        int) / census_df['Population'].astype(int)

In [42]:
# Clean Census Data - Dont run unless you have some time
census_df=census_df[census_df.State != '72']
census_df=census_df.reset_index()
census_df['FIPS']=census_df['State']+census_df['County']
url='https://api.census.gov/data/2018/pep/population?get=DENSITY&for=county:*&in=state:*&key='+census_key
response = requests.get(url).json()
column_names = response.pop(0)
density_df=pd.DataFrame(response,columns=column_names)
density_df['FIPS']=density_df['state']+density_df['county']
merge_df = pd.merge(census_df,density_df, on="FIPS")
merge_df= merge_df.rename(columns={'DENSITY':'Population Density'})
census_df=merge_df[['FIPS','Population','Population Density','Median Household Income','Poverty Rate']]

Unnamed: 0,FIPS,Population,Population Density,Median Household Income,Poverty Rate
0,05015,27690.0,44.800928174,38145.0,17.295052
1,05017,11189.0,16.199614005,29628.0,29.064260
2,05019,22684.0,25.472316857,35595.0,20.992770
3,05021,15202.0,23.217736871,32404.0,21.970793
4,05023,25587.0,45.084706902,41717.0,15.246023
...,...,...,...,...,...
3137,56037,44812.0,4.1288079711,68233.0,10.863162
3138,56039,22623.0,5.7748028059,75594.0,7.293462
3139,56041,20893.0,9.7510677126,53323.0,16.570143
3140,56043,8351.0,3.5221743077,46212.0,14.345587


In [None]:
# Clean Census Data - Dont run unless you have some time
census_df=census_df[census_df.State != '72']
census_df=census_df.reset_index()
state=census_df['State']
county=census_df['County']
popden=[]
baseurl='https://api.census.gov/data/2018/pep/population?get=DENSITY&for=county:'
for i in range(len(state)):
    try:
        url=baseurl+county[i]+'&in=state:'+state[i]+'&key='+census_key
        response = requests.get(url).json()
        response = float(response[1][0])
        popden.append(response)
    except:
        popden.append(np.nan)
census_df['Population Density']=popden
census_df['FIPS']=census_df['State']+census_df['County']
census_df=census_df[['FIPS','Population','Population Density','Median Household Income','Poverty Rate']]
census_df

In [43]:
# Read in Medicare.gov hospital compare url: https://data.medicare.gov/resource/xubh-q36u.json
dataset='xubh-q36u'
client = Socrata('data.medicare.gov', None)
hospitals = client.get(dataset,limit=6000)
hospitals_df = pd.DataFrame(hospitals)

# Clean Hospital Data
hospitals_df=hospitals_df[['hospital_name','city','state','county_name','hospital_overall_rating']]
hospitals_df=hospitals_df[hospitals_df.hospital_overall_rating != 'Not Available']
hospitals_df=hospitals_df.reset_index()
hospitals_df=hospitals_df.drop(columns='index')



Unnamed: 0,hospital_name,city,state,county_name,hospital_overall_rating
0,GOODLAND REGIONAL MEDICAL CENTER,GOODLAND,KS,SHERMAN,3
1,MARIA PARHAM MEDICAL CENTER,HENDERSON,NC,VANCE,2
2,ALLEGHANY COUNTY MEMORIAL HOSPITAL,SPARTA,NC,ALLEGHANY,2
3,ADVENTIST HEALTHCARE WHITE OAK MEDICAL CENTER,SILVER SPRING,MD,MONTGOMERY,3
4,ADVENTHEALTH LAKE WALES,LAKE WALES,FL,POLK,1
...,...,...,...,...,...
3573,VANDERBILT UNIVERSITY MEDICAL CENTER,NASHVILLE,TN,DAVIDSON,3
3574,JENNERSVILLE HOSPITAL,WEST GROVE,PA,CHESTER,3
3575,MARINERS HOSPITAL,TAVERNIER,FL,MONROE,5
3576,JEWISH HOSPITAL - SHELBYVILLE,SHELBYVILLE,KY,SHELBY,4


In [44]:
# Define Urls for the Johns Hopkins Data
confirm_url='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'
death_url='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv'

# Read in COVID-19 Files
confirm_df=pd.read_csv(confirm_url, error_bad_lines=False)
death_df=pd.read_csv(death_url, error_bad_lines=False)

In [45]:
# Clean Confirm data Rows
confirm_df=confirm_df[confirm_df.Admin2 != 'Unassigned']
confirm_df=confirm_df.dropna()
confirm_df=confirm_df[~confirm_df['Admin2'].astype(str).str.startswith('Out of')]
confirm_df=confirm_df[confirm_df.Admin2 != 'Out of*']
confirm_df=confirm_df.reset_index()
confirm_df['FIPS']=confirm_df.FIPS.map('{0:0>5.0f}'.format)

# Clean Death Data Rows
death_df=death_df[death_df.Admin2 != 'Unassigned']
death_df=death_df.dropna()
death_df=death_df[~death_df['Admin2'].astype(str).str.startswith('Out of')]
death_df=death_df[death_df.Admin2 != 'Out of*']
death_df=death_df.reset_index()

In [49]:
# Create the disease DataFrame
disease_df=confirm_df[['FIPS','Admin2','Province_State','Lat','Long_']]
disease_df= disease_df.rename(columns={'Admin2': 'County',
                                      'Province_State': 'State',
                                      'Lat': 'Latitude',
                                      'Long_':'Longitude'})
disease_df
confirm_5=[]
confirm_10=[]
confirm_20=[]
confirm_50=[]
death_5=[]
death_10=[]
death_20=[]
death_50=[]
[r,c]=confirm_df.shape

for j in range (0,r):
    for i in range (12,c):
        if confirm_df.iloc[j, i]>0:
            break
    try:
        response=confirm_df.iloc[j, i+4]
        confirm_5.append(response)
    except:
        confirm_5.append(np.nan)
    try:
        response=confirm_df.iloc[j, i+9]
        confirm_10.append(response)
    except:
        confirm_10.append(np.nan)
    try:
        response=confirm_df.iloc[j, i+19]
        confirm_20.append(response)
    except:
        confirm_20.append(np.nan)
    try:
        response=confirm_df.iloc[j, i+49]
        confirm_50.append(response)
    except:
        confirm_50.append(np.nan)
    try:
        response=death_df.iloc[j, i+5]
        death_5.append(response)
    except:
        death_5.append(np.nan)
    try:
        response=death_df.iloc[j, i+10]
        death_10.append(response)
    except:
        death_10.append(np.nan)
    try:
        response=death_df.iloc[j, i+20]
        death_20.append(response)
    except:
        death_20.append(np.nan)
    try:
        response=death_df.iloc[j, i+50]
        death_50.append(response)
    except:
        death_50.append(np.nan)
disease_df['confirm_5']=confirm_5
disease_df['confirm_10']=confirm_10
disease_df['confirm_20']=confirm_20
disease_df['confirm_50']=confirm_50
disease_df['death_5']=death_5
disease_df['death_10']=death_10
disease_df['death_20']=death_20
disease_df['death_50']=death_50

In [53]:
#Merge County Census Data with County COVID data
merge_df = pd.merge(disease_df, census_df, on="FIPS")
merge_df.to_csv('counties.csv',index=False,header=True)
hospitals_df.to_csv('hospitals.csv',index=False,header=True)