# Notebook for preparing redesign of data management

### Python Libraries?

In [None]:
import pandas as pd
import numpy as np

from urllib.request import urlopen
import json

import time

import requests

In [None]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate

In [None]:
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go

### Custom Modules

In [None]:
import data_processing
import plotting

# Extension to auto reload custom modules
%load_ext autoreload

%autoreload 1

%aimport data_processing
%aimport plotting

In [None]:
# Get county geojson
COVID_GEOJSON = data_processing.load_county_geojson()

# Get state data
COVID_STATES_DF = data_processing.get_covid_state_data()

# Get county data
COVID_COUNTIES_DF = data_processing.get_covid_county_data()

In [None]:
COVID_STATES_DF = data_processing.get_covid_state_data()

In [None]:
plotting.plot_choropleth_state(COVID_STATES_DF, '2020-08-20', 'deathIncrease')

## Data Importing

### County GeoJson for polygons on choropleth

In [None]:
# Get the Map of US counties
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    county_geojson = json.load(response)

In [None]:
with open('data/county_geojson.json','w') as fout:
    json.dump(county_geojson,fout)

In [None]:
with open('data/county_geojson.json','r') as fout:
    county_geojson = json.load(fout)

### County Data
This is where we import data from [New York Times GitHub page](https://github.com/nytimes/covid-19-data) to get county level coronavirus data.



### Using data_processing

In [None]:
COVID_COUNTIES_DF = data_processing.get_covid_county_data()

In [None]:
date_dict = data_processing.generate_slider_dates(COVID_COUNTIES_DF)

In [None]:
time.strftime('%Y-%m-%d',time.localtime(max(date_dict)))

In [None]:
COVID_COUNTIES_DF['date'].max()

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

### Get State Data

In [None]:
COVID_STATES_DF = data_processing.get_covid_state_data()

In [None]:
STATE_MASK = (COVID_STATES_DF['state'] == 'CO')
DATE_MASK = ((COVID_STATES_DF['date'] >= '2020-05-10') & (COVID_STATES_DF['date'] <= '2020-05-20'))
NEGATIVE_DEATH = (COVID_STATES_DF['deathIncrease']<0)

In [None]:
COVID_STATES_DF[STATE_MASK & DATE_MASK][['date','death','state','deathIncrease']].head(15)

In [None]:
COVID_STATES_DF[NEGATIVE_DEATH][['date','death','state','deathIncrease']].head(200)

In [None]:
COVID_STATES_DF[STATE_MASK & NEGATIVE_DEATH][['date','death','deathIncrease']].sort_values(by='date',ascending = True).head(20)

In [None]:
COVID_STATES_DF.columns

In [None]:
plotting.choropleth_state_deaths_density(COVID_STATES_DF,'death', '2020-08-26')

In [None]:
COVID_STATES_DF.head()

In [None]:
COVID_STATES_DF.head()

In [None]:
state_stats.head()

In [None]:
# Import the unemployment data because it has the fips codes
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv",
                   dtype={"fips": str})
df['prefix'] = [x[:2] for x in df['fips']]

In [None]:
df.head()

In [None]:
USA-states

In [None]:

deaths = states_df.reset_index()
################################################################################
# Read in states data
states = pd.read_csv('./data/tbl_states.csv')

# Fix fips code to be a string, prefix 0 to single digit codes
states['fips'] = states['fips'].astype(str).apply(lambda x: '0' + x if len(x) == 1 else x)

In [None]:
df.head()

In [None]:
states.head()

In [None]:
merged = pd.merge(df, states, left_on='prefix', right_on='fips', how='outer')

In [None]:
merged.head()

In [None]:
states_df.columns

In [None]:
a = states_df[['state','death','deathIncrease','positive','fips']]

In [None]:

merged = pd.merge(df, states, left_on='prefix', right_on='fips', how='outer')
    # Rename convention
merged.rename(columns={'fips_x': 'fips_co', 'fips_y':'fips_st'}, inplace=True)

max_date = max(deaths['date'])
deaths = deaths[deaths['date'] == max_date]

final = pd.merge(merged, deaths, how='outer')
final = final.drop(columns=['prefix'])

# Create a log scale of lived in density
final['logd'] = np.log(final['Lived'])

# create a column for death_per_m
x = final['death']*1000000 / final['Pop']
final['death_per_m'] = x.copy()

final['log_std_density'] = np.log(final['Standard'])

In [None]:
states_df.columns

In [None]:
final.columns

In [None]:
final.columns

In [None]:
final.head()

### Using data_processing

In [None]:
date_dict = data_processing.generate_slider_dates(df)

In [None]:
date_dict

In [None]:
# Get today's date
time.strftime('%Y-%m-%d', time.localtime())

In [None]:
time.strftime('%Y-%m-%d')

# Dash Components

In [None]:
date = '2020-08-26'
date_mask = (df['date'] == date)

In [None]:
a = round(df[date_mask]['cases'].mean(),-1)

In [None]:
round(a.mean(),-1)

In [None]:
AB = pd.DataFrame(df.to_dict())

In [None]:
AB

In [None]:
dcc.Store(id='store', data = df)

# Plotly

# Animation frames

In [None]:
COVID_STATES_DF

In [None]:
import datetime

In [None]:
range()

In [None]:
COVID_STATES_DF = data_processing.get_covid_state_data()

In [None]:
type(COVID_STATES_DF['date'][0])

In [None]:
type(COVID_COUNTIES_DF['date'][0])

In [None]:
a = COVID_STATES_DF['date'][0]

In [None]:
a.strftime(format="%Y-%m-%d")

In [None]:
animation_dates = COVID_STATES_DF[STATE_MASK]['date'].map(lambda x:x.strftime(format="%Y-%m-%d"))
animation_dates.sort_values(inplace=True)
animation_date_dict = {'date':list(animation_dates)}

In [None]:
category_orders = {'date':list(COVID_STATES_DF[STATE_MASK]['date'].sort_values())}

In [None]:
category_orders

In [None]:
animation_dates = generate_animation_dates(COVID_STATES_DF)

In [None]:
COVID_STATES_DF['date'].max()

In [None]:
start_date = '2020-03-01'
max_date = COVID_STATES_DF['date'].max()

In [None]:
range_index=pd.date_range(start=start_date, end=max_date, freq='W')
index = range_index.map(lambda x:x.strftime('%Y-%m-%d'))
animation_date_dict = {'date':list(index)}

In [None]:
animation_date_dict

In [None]:
pd.date_range(start=start_date,end=max_date)

In [None]:
a.map(lambda x:x.strftime('%Y-%m-%d'))

In [None]:
animation_date_dict

In [None]:
range(start_date, max_date,1)

In [None]:

def generate_animation_dates(df):
    
    # Hardcode a start date
    start_date = '2020-03-01'
    max_date = df['date'].max()
    

    # Create a list of dates from max to min, going back 2 weeks each time
    date_list = range(max_date_int, start_date_int, -(14*24*60*60))
    date_dict = {day:{'label':time.strftime('%Y-%m-%d',time.localtime(day)),'style':{'writing-mode': 'vertical-rl','text-orientation': 'sideways', 'height':'70px'}}  for day in date_list}
    return date_dict

In [None]:
def plot_animation(df=COVID_STATES_DF, category='death'):
    fig = px.choropleth(data_frame=df,
                locations='state',
                color='death',
                locationmode='USA-states',
                animation_frame='date',
                category_orders=animation_date_dict)
    
    fig.update_geos(center = {"lat": 37.0902, "lon": -95.7129},
                    scope = 'usa')
    return fig

In [None]:
fig = plot_animation()

In [None]:
fig

In [None]:
fig = px.choropleth(data_frame = fires_by_year, 
                  locations = fires_by_year['STATE'],
                  color = 'FIRE_COUNT',
                  range_color = [0,9000],
                  locationmode = 'USA-states',
                  animation_frame = 'FIRE_YEAR')

# Generate Aggregate Statistics

Here we will generate national statistics for select dates

In [None]:
COVID_STATES_DF

In [None]:
date = '2020-08-26'
date_mask = (COVID_STATES_DF['date'] == date)
death = COVID_STATES_DF[date_mask]['death'].sum()
positive = COVID_STATES_DF[date_mask]['positive'].sum()
hospitalized_currently = COVID_STATES_DF[date_mask]['hospitalizedCurrently'].sum()

In [None]:
(death, positive, hospitalized_currently)

In [None]:
COVID_STATES_DF.groupby(by='date')['positive'].sum().index

In [None]:
go.Figure(go.Bar(x = COVID_STATES_DF.groupby(by='date')['death'].sum().index,
    y = COVID_STATES_DF.groupby(by='date')['death'].sum()))


In [None]:
plotting.plot_national(COVID_STATES_DF, 'hospitalizedCurrently')

In [None]:
from plotly.subplots import make_subplots

In [None]:
fig = make_subplots(rows=3,cols=1)
fig.append_trace(plotting.plot_national(COVID_STATES_DF, 'death'),row=1,col=1)
fig.append_trace(plotting.plot_national(COVID_STATES_DF, 'positive'),row=2,col=1)
fig.append_trace(plotting.plot_national(COVID_STATES_DF, 'hospitalizedCurrently'),row=3,col=1)
fig.update_layout(height=600, width=600, title_text="Stacked Subplots")
fig.show()


In [None]:
px.bar(x = COVID_STATES_DF.groupby(by='date')['positiveIncrease'].sum().index,
    y = COVID_STATES_DF.groupby(by='date')['positiveIncrease'].sum())

In [None]:
px.bar(x = COVID_STATES_DF.groupby(by='date')['deathIncrease'].sum().index,
    y = COVID_STATES_DF.groupby(by='date')['deathIncrease'].sum())

In [None]:
def generate_aggregate_stats(covid_states_df,date):
    date_mask = (covid_states_df['date'] == date)
    death = covid_states_df[date_mask]['death'].sum()
    positive = covid_states_df[date_mask]['positive'].sum()
    hospitalized_currently = covid_states_df[date_mask]['hospitalizedCurrently'].sum()
    return (death, positive, hospitalized_currently)

In [None]:
a = 200000

In [None]:
b = f"{a:,d}"

In [None]:
b

In [None]:
data_processing.generate_state_aggregate_stat(COVID_STATES_DF,'2020-06-20','death')

In [None]:
html.H5()

In [None]:
filepath = 'data/co-est2019-alldata.csv'

In [None]:
census_county = pd.read_csv("data/co-est2019-alldata.csv",
                            encoding = "ISO-8859-1")

In [None]:
census_county2 = pd.read_csv("data/census_county_est2019.csv")

In [None]:
census_county[(census_county['STNAME'] == 'New York')]

In [None]:
census_county.head()

In [None]:
[print(column) for column in census_county.columns];

In [None]:
census_county.to_csv('data/census_county_est2019.csv', index=False)

In [None]:
census_county.head()