# Notebook for preparing redesign of data management

### Python Libraries?

In [None]:
import pandas as pd
import numpy as np

from urllib.request import urlopen
import json

import time

import requests

In [None]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate

### Custom Modules

In [None]:
import data_processing
import plotting

%load_ext autoreload

%autoreload 1

%aimport data_processing
%aimport plotting

## Data Importing

### County GeoJson for polygons on choropleth

In [None]:
# Get the Map of US counties
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    county_geojson = json.load(response)

In [None]:
with open('data/county_geojson.json','w') as fout:
    json.dump(county_geojson,fout)

In [None]:
with open('data/county_geojson.json','r') as fout:
    county_geojson = json.load(fout)

### County Data
This is where we import data from [New York Times GitHub page](https://github.com/nytimes/covid-19-data) to get county level coronavirus data.



In [None]:
today = time.strftime('%Y%m%d')
filepath = f'data/covid_counties_{today}.csv'
if path.exists(filepath):
    print("Pulling from file.")
    df = pd.read_csv(filepath)
else:
    print("Pulling from github.")
    url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
    df = pd.read_csv(url)
    df.to_csv(filepath)

# Reassign our fips to be a string of length 5
df['fipsnum'] = df['fips']
df['fips'] = df['fipsnum'].astype(str).apply(lambda x: '0'+x[:4] if len(x) == 6 else x[:5])
# Set date format
df['date'] = pd.to_datetime(df['date'], format = '%Y-%m-%d')
# Create log_deaths column
df['log_deaths'] = np.log(df['deaths'] + 1)

### Using data_processing

In [None]:
df = data_processing.get_covid_county_data()

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

### Get State Data

In [None]:
def get_covid_state_data():
    today = time.strftime('%Y%m%d')
    filepath = f'data/covid_states_{today}.csv'
    if path.exists(filepath):
        print("Pulling from file.")
        covid_states_df = pd.read_csv(filepath)
    else:
        print("Pulling from Covid Tracking API")
        # Coronavirus data by state from covidtracking API
        states_url = "https://covidtracking.com/api/states/daily"
        r = requests.get(states_url)
        covid_states_df = pd.DataFrame(r.json())
        
        # Set date as datetime format
        covid_states_df['date'] = pd.to_datetime(covid_states_df.date, format="%Y%m%d")
        # set date to index
        covid_states_df.set_index(keys='date',inplace=True)
        covid_states_df.to_csv(filepath)
            
    return covid_states_df

In [None]:
COVID_STATES_DF = data_processing.get_covid_state_data()

In [None]:
COVID_STATES_DF.info()

In [None]:
plotting.choropleth_state_deaths_density(COVID_STATES_DF,'death')

In [None]:
COVID_STATES_DF

In [None]:
COVID_STATES_DF.head()

In [None]:
COVID_STATES_DF.loc['2020-08-26']

In [None]:
state_stats.head()

In [None]:
# Import the unemployment data because it has the fips codes
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv",
                   dtype={"fips": str})
df['prefix'] = [x[:2] for x in df['fips']]

In [None]:
df.head()

In [None]:
USA-states

In [None]:

deaths = states_df.reset_index()
################################################################################
# Read in states data
states = pd.read_csv('./data/tbl_states.csv')

# Fix fips code to be a string, prefix 0 to single digit codes
states['fips'] = states['fips'].astype(str).apply(lambda x: '0' + x if len(x) == 1 else x)

In [None]:
df.head()

In [None]:
states.head()

In [None]:
merged = pd.merge(df, states, left_on='prefix', right_on='fips', how='outer')

In [None]:
merged.head()

In [None]:
states_df.columns

In [None]:
a = states_df[['state','death','deathIncrease','positive','fips']]

In [None]:

merged = pd.merge(df, states, left_on='prefix', right_on='fips', how='outer')
    # Rename convention
merged.rename(columns={'fips_x': 'fips_co', 'fips_y':'fips_st'}, inplace=True)

max_date = max(deaths['date'])
deaths = deaths[deaths['date'] == max_date]

final = pd.merge(merged, deaths, how='outer')
final = final.drop(columns=['prefix'])

# Create a log scale of lived in density
final['logd'] = np.log(final['Lived'])

# create a column for death_per_m
x = final['death']*1000000 / final['Pop']
final['death_per_m'] = x.copy()

final['log_std_density'] = np.log(final['Standard'])

In [None]:
states_df.columns

In [None]:
final.columns

In [None]:
final.columns

In [None]:
final.head()

### Get Dates for Date Slider

In [None]:
# Get today's datea
date = time.strftime('%Y-%m-%d')
# Set New York for state mask
state = 'New York'

# Only look at New York Dates
state_mask = (df['state'] == state)

# create today's date mask
date_mask = (df['date'] == date)

# Get min date from df
min_date = int(time.mktime(df['date'].min().timetuple()))

# Hardcode a start date
start_date = '2020-03-01'
start_date_int = int(time.mktime(datetime.datetime.strptime(start_date, '%Y-%m-%d').timetuple()))

# Get max date from df
max_date_int = int(time.mktime(df['date'].max().timetuple()))

# Create a list of dates from max to min, going back 2 weeks each time
date_list = range(max_date_int, start_date_int, -(14*24*60*60))
date_dict = {day:time.strftime('%Y-%m-%d',time.localtime(day))  for day in date_list}

### Using data_processing

In [None]:
date_dict = data_processing.generate_slider_dates(df)

In [None]:
date_dict

In [None]:
# Get today's date
time.strftime('%Y-%m-%d', time.localtime())

In [None]:
time.strftime('%Y-%m-%d')

# Dash Components

In [None]:
date = '2020-08-26'
date_mask = (df['date'] == date)

In [None]:
a = round(df[date_mask]['cases'].mean(),-1)

In [None]:
round(a.mean(),-1)

In [None]:
AB = pd.DataFrame(df.to_dict())

In [None]:
AB

In [None]:
dcc.Store(id='store', data = df)