### ✍️ Required libraries & settings

In [None]:
import sys
sys.path.insert(1, '/home/sbanik@quansight.com/demo-dashboards')

from intake_utils import (
    catalog_init,
    list_catalog,
    view_catalog)

import pandas as pd
import numpy as np
import panel as pn
import matplotlib.pyplot as plt
import hvplot.pandas
import holoviews as hv
from holoviews import opts,dim
hv.extension('bokeh')
from bokeh.models import HoverTool

### 👓 About dataset

- Title: Provisional Death Counts for Influenza, Pneumonia, and COVID-19
- Brief description: Deaths counts for influenza, pneumonia, and coronavirus disease 2019 (COVID-19) reported to NCHS by week ending date, by state and HHS region, and age group.

#### 🗒 Columns in this Dataset:

    - data_as_of: Date when the analysis is carried out
    - start_week: First date of data period
    - end_week: Last date of data period
    - mmwrweek: Month wrt to year on the scale of (1-53) 
        more details - https://ndc.services.cdc.gov/wp-content/uploads/MMWR_Week_overview.pdf
    - mmwryear: MMWR year
    - week_ending_date: Week-ending date for the week of reporting
    - group: Group info
    - indicator: Indicator info
    - jurisdiction: Jurisdiction of Residence
    - age_group: Age bucket
    - covid_19_deaths: Deaths involving COVID-19 (ICD-code U07.1)
    - total_deaths: Deaths from all causes
    - pneumonia_deaths: Pneumonia Deaths (ICD-10 codes J12.0-J18.9)
    - influenza_deaths: Influenza Deaths (ICD-10 codes J09-J11)
    - pneumonia_or_influenza: Deaths with Pneumonia or Influenza 
    - pneumonia_influenza_or_covid: Pneumonia, Influenza, or COVID-19 Deaths (ICD-10 codes J12.0-J18.9, J09-J11, U07.1)
    - footnote: Info/disclaimer (if any)


### 📖 Reading data via `intake` catalog

In [None]:
catalog = catalog_init()

In [None]:
list(catalog)

In [None]:
data = catalog.flu_pneumonia_covid.read()

### 👩🏽‍💻 Converting `json data` to pandas dataframe

In [None]:
df = pd.json_normalize(data)

In [None]:
df.head(2)

### 🔍 Exploring datatypes & shape

In [None]:
df.shape

In [None]:
df.info()

### 👩🏽‍💻 Updating datatypes for required columns

In [None]:
df = df.astype({'data_as_of': 'datetime64[ns]', 
                'start_week': 'datetime64[ns]',
                'end_week': 'datetime64[ns]',
                'mmwryear': 'Int64',
                'mmwrweek': 'Int64',
                'week_ending_date': 'datetime64[ns]',
                'covid_19_deaths': 'Int64',
                'total_deaths': 'Int64',
                'pneumonia_deaths': 'Int64',
                'influenza_deaths': 'Int64',
                'pneumonia_or_influenza': 'Int64',
                'pneumonia_influenza_or_covid': 'Int64'
               })

In [None]:
df.dtypes

### 🛠 Exploring basic statistical measure, like percentile, mean and std of the numerical values

In [None]:
df.describe()

### 👩🏽‍🔧 Replacing `NaN` with numerical value

In [None]:
df = df.replace(np.nan, 0)

### Data processing for visualisation

In [None]:
group_df = df.loc[df['age_group']!="All Ages"].groupby(['age_group']).total_deaths.sum().reset_index()

In [None]:
group_df['percentage_increase'] = group_df.total_deaths.pct_change().replace(np.nan, 0)

In [None]:
group_df

### ✨ Insights using Holoviews

#### Deceased based on age-group

In [None]:
hover = HoverTool(tooltips=[("Deceased", "$y"),
                            ("Age Group", "@age_group")])
bar_chart_age_group = hv.Bars(pd.DataFrame(group_df, columns=['age_group','total_deaths']), label="Total deceased based on age-group") 
bar_chart_age_group.opts(xrotation=45, width=600, height=400, tools=[hover]) 

curve_percentage_increase= hv.Table((group_df.age_group, group_df.percentage_increase),"Age-group","percentage increase", label=('Percentage increase in deceased count'))
curve_percentage_increase.opts(xrotation=45, width=600, height=400) 

layout = (bar_chart_age_group + curve_percentage_increase).cols(2)
layout.opts(opts.Layout(shared_axes=False, merge_tools=True))

#### Insight
- With increase in age-group, the overall death count increased substantially 
- Highest increment in deceased cases could be observed between age group *0-17* to *18-64*

#### Based on jurisdiction & age group & date range

In [None]:
key_dimensions   = [('week_ending_date', 'Date'), ('jurisdiction', 'Jurisdiction'), ('age_group', 'Age Group')]
value_dimensions = [('covid_19_deaths', 'Deseased due to COVID-19'), ('total_deaths', 'Total deseased')]
macro = hv.Table(df, key_dimensions, value_dimensions)

plot = macro.to.table('Age Group', ['Total deseased', 'Deseased due to COVID-19']).opts(height=150)
plot.overlay('Jurisdiction')
plot

#### Overview plot based on deceased (Average) and jurisdiction in 2020

In [None]:
avg_us = df.loc[df["jurisdiction"]=="United States", "total_deaths"].aggregate(function=np.mean)
df_sorted_jurisdiction = df.loc[df["jurisdiction"]!="United States"].sort_values(by=['total_deaths'], ascending=False)

jurisdiction_plot = hv.Bars((df_sorted_jurisdiction.jurisdiction, df_sorted_jurisdiction.total_deaths), "Jurisdiction", "Deseased Count (Average)", label="Deseased stats based on jurisdiction \nUnited states avg= %s"% np.round(avg_us.function)).aggregate(function=np.mean)
hover = HoverTool(tooltips=[("Deceased", "$y"),
                            ("Jurisdiction", "@Jurisdiction")])

jurisdiction_plot.opts(xrotation=45, width=1200, height=400, tools=[hover])

#### Insight
 - HHS Region 4 has the highest (avg) deseased count in United states
 - District of Columbia has the lowest (avg) deseased count in United states

#### Disease stats (covid_19, influenza, pneumonia)

In [None]:
week_group_df = df.groupby(['week_ending_date']).sum().reset_index()
group = "weekly deceased stats"

total = hv.Bars((week_group_df.week_ending_date, week_group_df.total_deaths),"Date","Deceased count",label="Total", group=group)
covid_19 = hv.Bars((week_group_df.week_ending_date, week_group_df.covid_19_deaths),"Date","Deceased count",label="COVID-19", group=group)
pneumonia = hv.Bars((week_group_df.week_ending_date, week_group_df.pneumonia_deaths), "Date", "Deceased count",label="Pneumonia", group=group)
influenza = hv.Bars((week_group_df.week_ending_date, week_group_df.influenza_deaths),"Date","Deceased count", label="Influenza", group=group)
hover = HoverTool(tooltips=[("y", "$y"),
                            ("Date", "@Date")])

covid_19.opts(width=400, height=400, xrotation=45, tools = [hover]) 
pneumonia.opts(width=400, height=400, xrotation=45, tools = [hover]) 
influenza.opts(width=400, height=400, xrotation=45, tools = [hover]) 
total.opts(width=400, height=400, xrotation=45, tools = [hover]) 

layout = (total + covid_19 + influenza + pneumonia).cols(2)
layout.opts(opts.Layout(shared_axes=False, merge_tools=True))

#### Insight
- Steady increase in covid19 cases could be observed

### ⬇️ Combined overview

In [None]:
import panel.widgets as pnw

df = df.drop(['data_as_of','start_week','end_week',
              'mmwryear', 'week_ending_date', 'group', 
              'indicator', 'footnote', 'mmwrweek', 'total_deaths'], axis=1, errors='ignore')

columns = sorted(df.columns)
discrete = [x for x in columns if df[x].dtype == object]
continuous = [x for x in columns if x not in discrete]
quantileable = [x for x in continuous if len(df[x].unique()) > 20]

x = pnw.Select(name='X-Axis', value='covid_19_deaths', options=quantileable)
y = pnw.Select(name='Y-Axis', value='covid_19_deaths', options=quantileable)
size = pnw.Select(name='Size', options=quantileable)
color = pnw.Select(name='Color', options=quantileable)

@pn.depends(x.param.value, y.param.value, color.param.value, size.param.value) 
def create_figure(x, y, color, size):
    opts = dict(cmap='rainbow', width=800, height=600, line_color='black')
    if color != 'None':
        opts['color'] = color 
    if size != 'None':
        opts['size'] = hv.dim(size).norm()*50
    return hv.Points(df, [x, y], label="%s vs %s" % (x.title(), y.title())).opts(**opts)

widgets = pn.WidgetBox(x, y, color, size, width=200)
pn.Row(widgets, create_figure)