In [None]:
import pandas as pd
import holoviews as hv
import numpy as np
import matplotlib.pyplot as plt

import geopy
import folium

import datetime as dt

hv.extension('bokeh')

# Visualizing Covid Cases + Deaths from Kaggle Dataset

## File is 1.5 millions rows long. Contains 3274 counties in the US

### Models
- Holoviews slider controlled cases/deaths scatterplot
- Folium radius indicated cases/deaths map
- Folium color indicated cases/deaths map

### Link
[Kaggle Dataset](https://www.kaggle.com/fireballbyedimyrnmom/us-counties-covid-19-dataset)

In [None]:
df = pd.read_csv("../datasets/us-counties.csv")
df

In [None]:
# seperate dated records by county

def county_date_seperated(df):
    cdsd = df.groupby(['date', 'county', 'state']).sum()
    return cdsd

cdsd = county_date_seperated(df)

In [None]:
# dynamic graph for deaths by county for dates after the start of data

def dgraph(td):
    origin = dt.date(2020, 1, 21)
    req_date = origin + dt.timedelta(days=td)
    req_data = cdsd['deaths'][str(req_date).split(' ')[0]]
    data = [(f"{county[0]}, {county[1]}", entry) for county, entry in req_data.iteritems()]
    return hv.Scatter(data, hv.Dimension('Counties'), 'Deaths')

dmap = hv.DynamicMap(dgraph, kdims=['Days_From_Origin'])
dmap.redim.range(Days_From_Origin=(0,552))

In [None]:
# geocode the counties

from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import tqdm
from tqdm import tqdm

def geocode(df):
    geolocator = Nominatim(user_agent="kaggle_geoviz")
    geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
    county_list = df[['county', 'state']].groupby(['county', 'state']).count().index
    print(len(county_list))
    coords_df = {}
    error_counties = []
    for county in tqdm(county_list):
        address = f"{county[0]}, {county[1]}"
        location = geolocator.geocode(f"{address}, USA")
        try:
            coords_df[address] = (location.latitude, location.longitude)
        except:
            error_counties.append(address)
    return coords_df, error_counties
coords_df, error_counties = geocode(df)

In [None]:
coords_df = pd.DataFrame.from_dict(coords_df)
coords_df.to_csv('../datasets/coords-us-counties.csv')

In [None]:
# load coords_df from csv
# was run once and downloaded subsequenty

coords_df = pd.read_csv('../datasets/coords-us-counties.csv')


In [None]:
# plot markers on folium map

map = folium.Map(location=[39.5, -98.35], tile="Stamen Toner")

for coords in coords_df:
    x, y = coords_df[coords][0], coords_df[coords][1]
    folium.Marker(
        location=[x, y],
        popup=coords,
        icon=folium.Icon(icon="cloud")
    ).add_to(map)
    
map

In [None]:
# plot with varying size based on date

map = folium.Map(location=[39.5, -98.35], tiles="Stamen Toner")

def cd_dgraph(td):
    error_counties = []
    origin = dt.date(2020, 1, 21)
    req_date = origin + dt.timedelta(days=td)
    req_data = cdsd['deaths'][str(req_date).split(' ')[0]]
    data = [(county, entry) for county, entry in req_data.iteritems()]
    for entry in data:
        try:
            x, y = coords_df[f"{entry[0][0]}, {entry[0][1]}"][0], coords_df[f"{entry[0][0]}, {entry[0][1]}"][1]
            deaths = entry[1]
            folium.Circle(
                location=[x, y],
                radius=deaths/10000,
                popup=f"{entry[0][0]}, {entry[0][1]}",
                icon=folium.Icon(icon="cloud")
            ).add_to(map)
        except:
            error_counties.append(f"{entry[0][0]}, {entry[0][1]}")
    return map

cd_dgraph(80)

In [None]:
# plot with varying heat based on date

from folium.plugins import HeatMapWithTime

import tqdm
from tqdm import tqdm

map = folium.Map(location=[39.5, -98.35])

heat_data = []

def daily_heatmap_data_generator(td):
    error_counties = []
    origin = dt.date(2020, 1, 21)
    req_date = origin + dt.timedelta(days=td)
    req_data = cdsd['deaths'][str(req_date).split(' ')[0]]
    data = [(county, entry) for county, entry in req_data.iteritems()]
    heat_data = []
    for entry in data:
        try:
            x, y = coords_df[f"{entry[0][0]}, {entry[0][1]}"][0], coords_df[f"{entry[0][0]}, {entry[0][1]}"][1]
            deaths = entry[1]
            heat_data.append([x, y, deaths])
        except:
            error_counties.append(f"{entry[0][0]}, {entry[0][1]}")

    return heat_data

def heatmap_time_series_generator(tdrange): # td range is how many days from the origin you want
    ts_heat = []
    origin = dt.date(2020, 1, 21)
    for td in tqdm(range(tdrange)):
        ts_heat.append(daily_heatmap_data_generator(td))
    HeatMapWithTime(ts_heat).add_to(map)
    return map, ts_heat

map, ts_heat2 = heatmap_time_series_generator(550)
ts_heat2

In [None]:
# heatmap adjusted to use deaths per cases

from folium.plugins import HeatMapWithTime

import tqdm
from tqdm import tqdm

hmap_ts = folium.Map(location=[39.5, -98.35])

heat_data = []

# cdsd['DPC'] = cdsd.apply(lambda row: row.deaths / row.cases * 10000, axis=1)

def daily_heatmap_data_generator_dpc(td):
    error_counties = []
    origin = dt.date(2020, 1, 21)
    req_date = origin + dt.timedelta(days=td)
    req_data = cdsd['DPC'][str(req_date).split(' ')[0]]
    data = [(county, entry) for county, entry in req_data.iteritems()]
    heat_data = []
    for entry in data:
        try:
            x, y = coords_df[f"{entry[0][0]}, {entry[0][1]}"][0], coords_df[f"{entry[0][0]}, {entry[0][1]}"][1]
            dpc = entry[1]
            heat_data.append([x, y, dpc])
        except:
            error_counties.append(f"{entry[0][0]}, {entry[0][1]}")
    return heat_data

def heatmap_time_series_generator_dpc(tdrange): # td range is how many days from the origin you want
    ts_heat = []
    origin = dt.date(2020, 1, 21)
    for td in tqdm(range(tdrange)):
        ts_heat.append(daily_heatmap_data_generator_dpc(td))
    HeatMapWithTime(ts_heat, auto_play=True).add_to(hmap_ts)
    return ts_heat

ts_heat = heatmap_time_series_generator_dpc(550)
hmap_ts

In [None]:
ts_heat[450:]

In [None]:
from folium.plugins import HeatMap

newmap = folium.Map(location=[39.5, -98.35])
HeatMap(daily_heatmap_data_generator_dpc(500)).add_to(newmap)

newmap

In [None]:
hmap_ts

In [None]:
ts_heat[549][0]

In [None]:
ts_heat2[549][0]