In [1]:
# import dependencies 
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as sts 
from scipy.stats import linregress
import json
import requests

In [None]:
# Load data 
confirmed_xl = 'Resources/tx_confirmed.xlsx'
fatalities_xl = 'Resources/tx_fatalities.xlsx'
county_popn_csv = 'Resources/Population Estimates by County.csv'

# APIs - documentation in resources folder
hospital_cap_url = 'https://opendata.arcgis.com/datasets/1044bb19da8d4dbfb6a96eb1b4ebf629_0.geojson'
nursing_homes_url = 'https://data.cms.gov/resource/s2uc-8wxp.json'# requires login and maybe sodapy ?
deaths_county_race = 'https://data.cdc.gov/api/views/k8wy-p9cg'
excess_death_comparisons = 'https://data.cdc.gov/api/views/m74n-4hbs'
tx_65_url = 'https://data.texas.gov/resource/qjby-4sji.json?'

# Save in dataframe 
confirmed_tx = pd.read_excel(confirmed_xl)
fatalities_tx = pd.read_excel(fatalities_xl)
county_popn = pd.read_csv(county_popn_csv)

In [None]:
# Drop null rows 
confirmed_tx = confirmed_tx.drop([254, 255])

# View data
confirmed_tx

In [None]:
# Clean up: Delete last row (avoid totaling the total count) / null values & running total columns
fatalities_tx = fatalities_tx[['County Name','Fatalities 02-02-2021']].drop([254,255])

# View data (* data is a running total not additional fatalities per day)
fatalities_tx

In [None]:
# Get total for each county 
confirmed_tx['Total Confirmed'] = confirmed_tx.sum(axis=1)

# Select only county and total columns 
confirmed_tx = confirmed_tx[['County', 'Total Confirmed']]

# View 
confirmed_tx

In [None]:
# Match indices to merge 
confirmed_tx = pd.DataFrame(confirmed_tx) # Avoid deprecation warning 
fatalities_tx = pd.DataFrame(fatalities_tx) 

# Reformat to match columns 
confirmed_tx['County'] = confirmed_tx['County'].str.title()
fatalities_tx['County Name'] = fatalities_tx['County Name'].str.title()
fatalities_tx = fatalities_tx.rename(columns={'County Name': 'County'})

# Merge dataframes
summary_cases = pd.merge(confirmed_tx, fatalities_tx, on='County')
summary_cases.head()


In [None]:
## Summary table: add death rate and proportion 65+

# Calculate death rate 
total_cases = summary_cases['Total Confirmed']
total_fatalities = summary_cases['Fatalities 02-02-2021']
summary_cases['% Fatal'] = total_fatalities / total_cases * 100

# Add 65+ population 
county_list = summary_cases['County']

# Reformat county data: remove ', TX' and reformat to titlecase 


# Create empty lists 
county_names = []

# Loop through county list and get 65+ population and county population 
#for county in county_list:
    

summary_cases

In [None]:
# Print counties with highest fatality rate 
highest_fatality = summary_cases.sort_values('% Fatal', ascending=False, ignore_index=True)
highest_fatality = highest_fatality.head(10)

# View dataframe
highest_fatality


In [None]:
summary_cases['% 65+'] =''
summary_cases

In [None]:
# Fix county names (API is case sensitive)
summary_cases = summary_cases.replace({'Mcculloch': 'McCulloch',
                                       'Mclennan': 'McLennan',
                                       'Mcmullen': 'McMullen'})

In [None]:
tx_65_url = 'https://data.texas.gov/resource/qjby-4sji.json?'
#counter = 0
county_list = []
county_sixty_five = []
missing_info = []

for i, row in summary_cases.iterrows():
    
    county = row[0]
    
    query_url = tx_65_url + 'county=' + county
    response = requests.get(query_url).json()
        
    try: 
        over_65 = response[0]['_65_and_older_of_population']
        summary_cases.loc[i, '% 65+'] = over_65
    
        county_name = response[0]['county']
        county_list.append(county_name)
    except (KeyError, IndexError):
        print(f'Missing information for {county}')
        pass
    
    #counter +=1
    #if counter > 5:
        #break
    
#print(json.dumps(response, sort_keys=True, indent=4))
summary_cases

In [None]:
# Build scatterplot

# Get x and y values

# Add to scatterplot
