# (Attempt at) Visualizing the data

In [None]:
# Import the necessary packages
import numpy as np
import matplotlib as plt
import pandas as pd
import plotly.express as px
from urllib.request import urlopen
import json

# Take all data into account or just 2020 data (for speed)
USE_ALL_DATA = True

# Read in the data.
if USE_ALL_DATA:
    df1 = pd.read_csv("RawData/us-counties-2020.csv", dtype={"fips" : str})
    df2 = pd.read_csv("RawData/us-counties-2021.csv", dtype={"fips" : str})
    df3 = pd.read_csv("RawData/us-counties-2022.csv", dtype={"fips" : str})
    df = pd.concat([df1, df2, df3], ignore_index = True)
else:
    df = pd.read_csv("RawData/us-counties-2020.csv", dtype={"fips" : str})


### Template from internet

In [None]:
"""
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

df_temp = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv",
                   dtype={"fips": str})

fig = px.choropleth(df_temp, geojson=counties, locations='fips', color='unemp',
                           color_continuous_scale="Viridis",
                           range_color=(0, 12),
                           scope="usa",
                           labels={'unemp':'unemployment rate'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
"""

### Aggregate all data per county and plot it

In [None]:
# Construct a column containing fips and select only necessary columns
df['fips'] = df.apply(lambda row: row['geoid'][4:], axis = 1)
df = df[['date', 'fips', 'cases']]

# Aggregate cases per county
aggregate_data = df.groupby("fips", as_index = False).agg({"cases" : "sum"})

# Read in the data set

pop_df2 = pd.read_csv("RawData/co-est2021-alldata.csv", encoding = 'ISO-8859-1', dtype={'COUNTY': str, 'STATE': str})
pop_df2['fips'] = pop_df2['STATE'] + pop_df2['COUNTY']
pop_df2 = pop_df2[['fips', 'CTYNAME', 'POPESTIMATE2021']]
pop_df2 = pop_df2.rename(columns = {'CTYNAME': 'county', 'POPESTIMATE2021': 'Pop2021'})

# Merge data sets 
inner_merge2 = pd.merge(left = aggregate_data, right = pop_df2, left_on = 'fips', right_on = 'fips')

print(aggregate_data.shape)
print(inner_merge2.shape)

# Compute cases/population.
merged2 = inner_merge2.assign(cpp = lambda row: row['cases']/row['Pop2021'])

print()
print(merged2[['cases', 'Pop2021', 'cpp']].describe())


In [None]:
# Plot the county data in a map

with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

fig = px.choropleth(merged2, geojson=counties, locations='fips', color='cpp',
                           color_continuous_scale="Viridis",
                           range_color=(0, 0.30),
                           scope="usa",
                           labels={'cpp':'cases of covid per population'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

## Plot data per week with a cool slider

Be aware: the last cell is quite slow code. Even though it is a slider, I would not recommend sliding the dot around, but just clicking on the spot where you want the slider to be. I would also not recommend sliding this slider around a lot, just for fun (not that I did that...) 

In [None]:
# Import some more packages
import ipywidgets as widgets
import datetime as dt

In [None]:
## Aggregate data per week (again on county level)

# Running time: ~2-3 mins

# Convert 'date' column to a more workable format
if type(df.loc[0, 'date']) == str:
    df['date'] = pd.to_datetime(df['date'])

# Extract the weeks
# Use Monday of the week of the first recorded date as reference
first_date = min(df['date'])
REFERENCE_DATE = first_date - dt.timedelta(days=first_date.weekday())

def get_week_number(row, REFERENCE_DATE):
    DAYS_IN_WEEK = 7
    t = row['date'] - REFERENCE_DATE
    start_of_week = row['date'] - dt.timedelta(days=row['date'].weekday())
    end_of_week = start_of_week + dt.timedelta(days=6)
    return (t.days // DAYS_IN_WEEK, start_of_week, end_of_week)

df[['week', 'startOfWeek', 'endOfWeek']] = \
    df.apply(lambda row: get_week_number(row, REFERENCE_DATE), axis = 1, result_type="expand")

# Aggregate data by weeks and fips
agg_week = df.groupby(['week', 'fips'], as_index = False).agg({"cases" : "sum", "startOfWeek" : "max", "endOfWeek" : "max"})

In [None]:
## Merge data week data with population data

population = pd.read_csv("RawData/co-est2021-alldata.csv", encoding = 'ISO-8859-1', dtype={'COUNTY': str, 'STATE': str})
population['fips'] = population['STATE'] + population['COUNTY']
population = population[['fips', 'CTYNAME', 'POPESTIMATE2021']]
population = population.rename(columns = {'CTYNAME': 'county', 'POPESTIMATE2021': 'Pop2021'})

week_merge = pd.merge(left = agg_week, right = population, left_on = 'fips', right_on = 'fips').dropna()

print(agg_week.shape)
print(week_merge.shape)

# Compute cases/population.
week_merge['casespercapita'] = 100*week_merge['cases']/week_merge['Pop2021']

print()
print(week_merge[['cases', 'Pop2021', 'casespercapita']].describe())

# Only retain necessary columns
week_merge = week_merge[['fips', 'week', 'casespercapita', 'startOfWeek', 'endOfWeek']]
week_merge.to_csv("PreprocessedData/week_merge.csv")

In [None]:
a = widgets.IntSlider(
    description='a',
    value=7,
    min=0,
    max=max(week_merge['week']),
    step=1)

with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
        counties = json.load(response)

def f(a):
    # Plot the county data in a map
    fig = px.choropleth(week_merge[week_merge['week'] == a], geojson=counties, locations='fips', color='casespercapita',
                               color_continuous_scale="Viridis",
                               range_color=(0, 0.5),
                               scope="usa",
                               labels={'casespercapita':'%new cases <br> (on county level)'}
                              )    
    fig.update_layout(title_text = "Covid-19 cases for week " + str(a) + '<br>' + \
                     "(" + week_merge.loc[a, 'startOfWeek'].strftime("%d/%b/%Y") + " - " + \
                      week_merge.loc[a, 'endOfWeek'].strftime("%d/%b/%Y") + ")",
                     margin={"r":0,"t":50,"l":0,"b":0, "autoexpand" : True},
                     width = 800)
    fig.show()

out = widgets.interactive_output(f, {'a': a})

widgets.HBox([widgets.VBox([a]), out])

## Pre-compute all the images and make a video of them

The video is made using other software.

In [None]:
# Even more imports
import io
from PIL import Image

In [None]:
# kaleido needs to be pip-installed

# Running time: long

with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
        counties = json.load(response)

def f(a, MAX_WEEK):
    print("Busy making plot for week {}. ({}% completion)".format(a, round(100*(a / MAX_WEEK), 1)))
    
    # Plot the county data in a map
    fig = px.choropleth(week_merge[week_merge['week'] == a], geojson=counties, locations='fips', color='casespercapita',
                               color_continuous_scale="Viridis",
                               range_color=(0, 0.5),
                               scope="usa",
                               labels={'casespercapita':'%new cases <br> (on county level)'}
                              )    
    fig.update_layout(title_text = "Covid-19 cases for week " + str(a) + '<br>' + \
                     "(" + week_merge.loc[a, 'startOfWeek'].strftime("%d/%b/%Y") + " - " + \
                      week_merge.loc[a, 'endOfWeek'].strftime("%d/%b/%Y") + ")",
                     margin={"r":0,"t":50,"l":0,"b":0, "autoexpand" : True},
                     width = 800)
    
    name = str(a) + ".png"
    
    fig.write_image(name)    

for a in range(min(week_merge['week']), max(week_merge['week']) + 1):
    f(a, max(week_merge['week']))

# ToDo?: Implement a progress bar widget
