In [1]:
%matplotlib inline

import requests
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from time import sleep
import seaborn as sns
sns.set()

In [2]:
data_path = r'./data/cdcpniweekly/'
if not os.path.exists(data_path):
    os.makedirs(data_path)

In [3]:
years = range(2009, 2021)
weeks = range(1, 54)
# years = [2019]
# weeks = [12]

# Uncomment this code to pull the data down to your local machine.Please don't repeatedly pull down the data as it's hard on the CDC's servers. I have included the result of running this code in the repo. So unless you want to grab more recent snapshots down the road there's no point in running it.

In [4]:
## CDC DATA Format
## https://www.cdc.gov/flu/weekly/weeklyarchives2017-2018/data/nchsdata42.csv
# for year in years:
#     for week in weeks:
#         if (year == 2009 and week < 40) or (year == 2020 and week > 13):
#             next
#         else: 
#             request_url = ('https://www.cdc.gov/flu/weekly/weeklyarchives' +
#                            str(year - 1) + '-' + str(year) +
#                            '/data/nchsdata' + str(week).zfill(2) + '.csv')
#             print(request_url)
#             request = requests.get(request_url)
#             isStatusOK = request.status_code == requests.codes.ok
#             if isStatusOK:
#                 decoded_content = request.content.decode('utf-8')
#                 with open('./data/cdcpniweekly/' + str(year) + '-' + str(week).zfill(2) + '.csv', 'w') as my_data_file:
#                     print('isOK')
#                     my_data_file.write(decoded_content)

In [5]:
weekly_reports = []
path = data_path # use your path
all_files = glob.glob(path + "/*.csv")
for filename in all_files:
#     print(filename)
    if 'combined' not in filename:
        year = filename[20:24]
        month = filename[25:27]
        df = pd.read_csv(filename, index_col=None, header=0)
        df['snapshotYear'] = year
        df['snapshotWeek'] = month
        df['snapshot'] = year + month
        weekly_reports.append(df)
combined_data = pd.concat(weekly_reports, ignore_index=True)

In [6]:
combined_data.to_csv(data_path + 'combined.csv')

In [7]:
combined_data['snapshot'].unique()

array(['201601', '201604', '201605', '201606', '201607', '201608',
       '201609', '201610', '201611', '201612', '201613', '201614',
       '201615', '201616', '201617', '201618', '201619', '201620',
       '201621', '201622', '201623', '201624', '201625', '201626',
       '201627', '201628', '201629', '201630', '201631', '201632',
       '201633', '201634', '201635', '201636', '201637', '201638',
       '201639', '201640', '201641', '201642', '201643', '201644',
       '201645', '201646', '201647', '201648', '201649', '201650',
       '201651', '201652', '201701', '201702', '201703', '201704',
       '201705', '201706', '201707', '201708', '201709', '201710',
       '201711', '201712', '201713', '201714', '201715', '201716',
       '201717', '201718', '201719', '201720', '201721', '201722',
       '201723', '201724', '201725', '201726', '201727', '201728',
       '201729', '201730', '201731', '201732', '201733', '201734',
       '201735', '201736', '201737', '201738', '201739', '2017

In [8]:
snapshot_id = '201604'
data = combined_data.query('snapshot == @snapshot_id')
data.head(3)

Unnamed: 0,Year,Week,Percent of Deaths Due to Pneumonia and Influenza,Expected,Threshold,All Deaths,Pneumonia Deaths,Influenza Deaths,snapshotYear,snapshotWeek,snapshot
272,2010,41,7.450072,7.25991,7.60749,46617,3466,7,2016,4,201604
273,2010,42,7.286496,7.35655,7.70391,47142,3432,3,2016,4,201604
274,2010,43,7.524668,7.45967,7.80682,46620,3503,5,2016,4,201604


In [9]:
combined_data['snapshot'].unique()

array(['201601', '201604', '201605', '201606', '201607', '201608',
       '201609', '201610', '201611', '201612', '201613', '201614',
       '201615', '201616', '201617', '201618', '201619', '201620',
       '201621', '201622', '201623', '201624', '201625', '201626',
       '201627', '201628', '201629', '201630', '201631', '201632',
       '201633', '201634', '201635', '201636', '201637', '201638',
       '201639', '201640', '201641', '201642', '201643', '201644',
       '201645', '201646', '201647', '201648', '201649', '201650',
       '201651', '201652', '201701', '201702', '201703', '201704',
       '201705', '201706', '201707', '201708', '201709', '201710',
       '201711', '201712', '201713', '201714', '201715', '201716',
       '201717', '201718', '201719', '201720', '201721', '201722',
       '201723', '201724', '201725', '201726', '201727', '201728',
       '201729', '201730', '201731', '201732', '201733', '201734',
       '201735', '201736', '201737', '201738', '201739', '2017

In [10]:
# for snapshot in combined_data['snapshot'].unique()[:10]:
#     data = combined_data.query('snapshot == @snapshot')
#     data.pivot_table(values='Pneumonia Deaths', columns='Year', index='Week').plot(xlim=(1, 53),
#                                                                                    ylim=(0,7000))
#     clear_output(wait=False)

In [11]:
snapshot_ids = list(combined_data['snapshot'].unique())
snaptshot_ids = snapshot_ids.sort()

In [12]:
snapshots = {}
for snapshot in snapshot_ids:
    snapshots[snapshot] = combined_data.query('snapshot == @snapshot').pivot_table(
        values='All Deaths',
        columns='Year',
        index='Week')

In [13]:
[""]*12

['', '', '', '', '', '', '', '', '', '', '', '']

In [14]:
# CDC DATA Format
dash_styles = [""]*15

colors = sns.color_palette("husl", 12)

pallete = {
    2009: colors[0],
    2010: colors[1],
    2011: colors[2],
    2012: colors[3],
    2013: colors[4],
    2014: colors[5],
    2015: colors[6],
    2016: colors[7],
    2017: colors[8],
    2018: colors[9],
    2019: colors[10],
    2020: colors[11],
}

sns.set_style("dark")
sns.set_context("notebook", font_scale=2.5, rc={"lines.linewidth": 5})
def plot_data(x):
    plt = sns.relplot(kind="line",
                      data=snapshots[x],
                      dashes=dash_styles,
                      height=9,
                      aspect=1.5,
                      hue="Year",
                      palette=pallete)
    plt.set(ylim=(0,70000))
    plt.set(xlim=(0,53))
    plt.set(title="All Deaths as of " + x[:4] + ' week ' + x[-2:])
    plt.set(xlabel='Week of Year', ylabel='Death Count')

w = widgets.Dropdown(options=snapshot_ids, value=snapshot_ids[0])
interact(plot_data, x=w)

interactive(children=(Dropdown(description='x', options=('201601', '201604', '201605', '201606', '201607', '20…

<function __main__.plot_data(x)>

In [15]:
for value in snapshot_ids:
    w.value = value