In [None]:
from urllib.request import urlopen
import json
import pandas as pd
import plotly.express as px
import scipy.io as spio
import numpy as np
from plotly.offline import init_notebook_mode
init_notebook_mode()


from parseJohnsHopkins import getTzuHsiClusters
from parseData import correct_county_name_from_df
    

# Tzu-hsi Clustering

## Add FIPS to clusters dataset from Covid19 cases dataset

In [None]:
# get COVID-19 cases file from asset
file_type = 'cases'
raw_df = pd.read_csv(f'../../assets/us_{file_type}_counties.csv', header=0, index_col=None)
raw_df = raw_df.fillna(0)
raw_df.sort_values(by=['State','County Name'])

# create state_county column for loop optimization
raw_df['state_county'] = raw_df[['State', 'County Name']].agg('_'.join, axis=1)
raw_df = correct_county_name_from_df(raw_df)
raw_df.sort_values(by=['state_county'])

processed_df = raw_df[['state_county','countyFIPS']]
state_county_to_fips = processed_df.set_index('state_county')
state_county_to_fips = state_county_to_fips['countyFIPS']

state_county_to_fips

In [None]:
# Add the fips column to the Tsu-Hzi's Clusters dataset
def add_fips_to_clusters_data(clusters_df):
    
    clusters_list = []
    fip_list = []
    county_name_list = []
    for a_county in clusters_df.index.tolist():
#         print(a_county, end ='')
        if a_county in state_county_to_fips.index:
            cluster_id = clusters_df[a_county]
            fip = state_county_to_fips[a_county]
            county_name_list.append(a_county)
            clusters_list.append(cluster_id)
            
            # add leading zero to fip with 4 digits
            if len(str(fip)) == 4:
                fip = '0' + str(fip)
            else:
                fip = str(fip)
            fip_list.append(fip)
#             print(f' --- {fip}')
#         else:
#             print(a_county, ' --- skipped')
            
    assert len(fip_list) == len(clusters_list)
    print(len(fip_list), 'Counties in Total')
        
    fips_state_county_clusters_df = pd.DataFrame(data= {'fips': fip_list, 'severity_clusters': clusters_list, 'county_name': county_name_list} )
    return fips_state_county_clusters_df
    

## Draw Map

In [None]:
# built in color scales https://plotly.com/python/builtin-colorscales/
def show_covid19_cases_on_map(df, geojson_counties, color_map, date_info):
    fig = px.choropleth(df,
                        geojson=geojson_counties,
                        locations='fips',
                        color='severity_clusters',
                        color_continuous_scale=color_map,
                        scope="usa",
                        labels={'severity_clusters':'Severity'})
    
    fig.update_geos(showsubunits=True, subunitcolor="black")

    fig.update_layout(title={
        'text': f"Severity of COVID-19 Cases per County in {date_info}",
        'y':1.0,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}, margin={"r":0,"t":0,"l":0,"b":0})

    fig.show()

In [None]:
cluster_type = "constants"
initial_date_list = ['3/15', '4/1', '4/15', '5/1', '5/15']
cluster_final_date_list = ['4/30', '5/15', '5/31', '6/15', '6/30']

initial_date_list = [ '4/1']
cluster_final_date_list = ['5/15']

color_map = px.colors.diverging.Portland.copy()
color_map.reverse()
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    geojson_counties = json.load(response)

In [None]:
for initial_date, cluster_final_date in zip(initial_date_list, cluster_final_date_list):
    date_info = f"{initial_date}~{cluster_final_date}"

    # load data
    initial_clusters = getTzuHsiClusters(column_date=date_info, cluster_type=cluster_type)
    initial_clusters = initial_clusters[initial_clusters > 0]  # remove cluster 0
    initial_clusters[initial_clusters==2] = 1
    initial_clusters[initial_clusters==3] = 1
    initial_clusters[initial_clusters==4] = 2
    initial_clusters[initial_clusters==5] = 2
    initial_clusters[initial_clusters==6] = 2
    initial_clusters[initial_clusters==7] = 3
    initial_clusters[initial_clusters==8] = 3
    initial_clusters[initial_clusters==9] = 3
    fips_state_county_clusters_df = add_fips_to_clusters_data(initial_clusters)
    show_covid19_cases_on_map(fips_state_county_clusters_df, geojson_counties, color_map, date_info)
    

# COVID19 Risk Levels

In [None]:
def find_nan_rows(df):
    is_NaN = df.isnull()
    row_has_NaN = is_NaN.any(axis=1)
    rows_with_NaN = df[row_has_NaN]

    print(rows_with_NaN)
# find_nan_rows(risk_level_df)

def updateCounty(county_str):
    county_str_list = county_str.split(', ')
    return county_str_list[1] + '_' + county_str_list[0]
    
updateCounty('Abbeville County, SC')

In [None]:
risk_level_df = pd.read_csv(f'../../assets/us_risk_levels.csv', header=0, index_col=None)

# Data Processing
# all rows except risk level
risk_level_df = risk_level_df[risk_level_df.Metric == 'COVID-19 Risk Level']
# remove Statewide Unallocated, Grand Princess Cruise Ship, New York City Unallocated

risk_level_df = risk_level_df[~risk_level_df.County.str.contains('Statewide Unallocated')]
risk_level_df = risk_level_df[~risk_level_df.County.str.contains('Grand Princess Cruise Ship')]
risk_level_df = risk_level_df[~risk_level_df.County.str.contains('New York City Unallocated')]

# get columns
list_of_dates = ['04-30', '05-15', '05-31', '06-15', '06-30', '07-15', '07-30']
list_of_dates = ['2020-' + date for date in list_of_dates]
risk_level_df = risk_level_df[['County']+list_of_dates]

# Convert Green=4, Yellow=3, Orange=2, Red=1
risk_level_df[risk_level_df == 'Green'] = 4
risk_level_df[risk_level_df == 'Yellow'] = 3
risk_level_df[risk_level_df == 'Orange'] = 2
risk_level_df[risk_level_df == 'Red'] = 1
risk_level_df[list_of_dates] = risk_level_df[list_of_dates].astype(str).astype(int)

# rename county to state_county and lowercase them
risk_level_df.County = risk_level_df.County.apply(lambda x: updateCounty(x))
risk_level_df = risk_level_df.rename(columns={'County': 'state_county'})
risk_level_df = correct_county_name_from_df(risk_level_df)
risk_level_df = risk_level_df.set_index('state_county')

print(risk_level_df)
print(risk_level_df.dtypes)


In [None]:
# Draw Map
for a_date in list_of_dates:
    # assign fips to each county
    fips_state_county_clusters_df = add_fips_to_clusters_data(risk_level_df[a_date])
    show_covid19_cases_on_map(fips_state_county_clusters_df, geojson_counties, color_map, a_date)
