In [1]:
%matplotlib inline

import requests
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from time import sleep
import seaborn as sns
sns.set()

def display_all(df):
    with pd.option_context("display.max_rows", 1000, "display.max_columns", 1000): 
        display(df)

In [2]:
data_path = r'./data/cdcpniweekly/'
if not os.path.exists(data_path):
    os.makedirs(data_path)

In [3]:
years = range(2016, 2021)
weeks = range(1, 54)

- Any given week's report will have all weeks up until that week.
- It appears that the at least some of the early reports had a two week lag, not a one week lag.
- A flu season starts on Week 40 of one year and goes to week 39 of the next year.
Example:
- The first week of the 2017-2018 flu year is https://www.cdc.gov/flu/weekly/weeklyarchives2017-2018/data/nchsdata40.csv
- The Last week of the 2017-2018 flu year is https://www.cdc.gov/flu/weekly/weeklyarchives2017-2018/data/nchsdata39.csv


Uncomment the next cell to pull the data down to your local machine.

# Please don't repeatedly pull down the data as it creates unnecessary work for the CDC's servers. I have included the result of running this code in the repo. So unless you want to grab more recent snapshots down the road there's no point in running it. Just run the notebook as is and it will use the data in this repo.

In [4]:
# # CDC DATA Format
# # https://www.cdc.gov/flu/weekly/weeklyarchives2017-2018/data/nchsdata42.csv
# for year in years:
#     for week in weeks:
#         if year == 2020 and week > 13:
#             next
#         else: 
#             flu_year = str(year - 1) + '-' + str(year)
#             request_url = ('https://www.cdc.gov/flu/weekly/weeklyarchives' +
#                            flu_year + '/data/nchsdata' + str(week).zfill(2) + '.csv')
#             print(request_url)
#             request = requests.get(request_url)
#             isStatusOK = request.status_code == requests.codes.ok
#             if isStatusOK:
#                 decoded_content = request.content.decode('utf-8')
#                 with open('./data/cdcpniweekly/' + flu_year + '-' + str(week).zfill(2) + '.csv', 'w') as my_data_file:
#                     print('isOK')
#                     my_data_file.write(decoded_content)

In [5]:
def calendarWeektoFluSeasonWeek(week):
    intWeek = int(week)
    if intWeek > 39:
        return intWeek - 39
    else:
        return intWeek + 13
    
def FluSeasonWeektoCalendarWeek(week):
    intWeek = int(week)
    if intWeek < 14:
        return intWeek + 39
    else:
        return intWeek - 13
    
weekly_reports = []
path = data_path # use your path
all_files = glob.glob(path + "/*.csv")
for filename in all_files:
#     print(filename)
    if 'combined' not in filename:
        flu_season_start_year = filename[20:24]
        flu_season_end_year = filename[25:29]
        calendar_week = filename[30:32]
        flu_season_week = calendarWeektoFluSeasonWeek(calendar_week)
#         print(flu_year_start, flu_year_end, calendar_week)
        df = pd.read_csv(filename, index_col=None, header=0)
        df['calendarWeek'] = int(calendar_week)
        df['filename'] = filename
        df['fluSeason'] = filename[20:29]
        df['fluSeasonStartYear'] = flu_season_start_year
        df['fluSeasonEndYear'] = flu_season_end_year
        df['fluSeasonWeek'] = flu_season_week
        df['reportId'] = df['fluSeason'] + '-' + df['fluSeasonWeek'].apply(str).str.zfill(2)
#         df['snapshotYear'] = snapshotYear
#         df['snapshotCalendarWeek'] = week
#         df['snapshot'] = df['fluSeason'] + df['snapshotCalendarWeek']
        weekly_reports.append(df)
combined_data = pd.concat(weekly_reports, ignore_index=True)
combined_data.to_csv(data_path + 'combined.csv')

In [15]:
display_all(combined_data.query('fluSeason == "2019-2020"'))

Unnamed: 0,Year,Week,Percent of Deaths Due to Pneumonia and Influenza,Expected,Threshold,All Deaths,Pneumonia Deaths,Influenza Deaths,calendarWeek,filename,fluSeason,fluSeasonStartYear,fluSeasonEndYear,fluSeasonWeek,reportId
81889,2012,1,8.479120,8.15718,8.49104,51102,4323,10,1,./data/cdcpniweekly\2019-2020-01.csv,2019-2020,2019,2020,14,2019-2020-14
81890,2012,2,8.343472,8.22181,8.55556,50962,4245,7,1,./data/cdcpniweekly\2019-2020-01.csv,2019-2020,2019,2020,14,2019-2020-14
81891,2012,3,8.370908,8.27534,8.60898,51010,4261,9,1,./data/cdcpniweekly\2019-2020-01.csv,2019-2020,2019,2020,14,2019-2020-14
81892,2012,4,8.448458,8.31696,8.65049,50163,4227,11,1,./data/cdcpniweekly\2019-2020-01.csv,2019-2020,2019,2020,14,2019-2020-14
81893,2012,5,8.140332,8.34602,8.67945,49568,4026,9,1,./data/cdcpniweekly\2019-2020-01.csv,2019-2020,2019,2020,14,2019-2020-14
81894,2012,6,8.109680,8.36208,8.69540,49854,4024,19,1,./data/cdcpniweekly\2019-2020-01.csv,2019-2020,2019,2020,14,2019-2020-14
81895,2012,7,8.179701,8.36486,8.69807,50662,4129,15,1,./data/cdcpniweekly\2019-2020-01.csv,2019-2020,2019,2020,14,2019-2020-14
81896,2012,8,8.292069,8.35430,8.68740,50892,4191,29,1,./data/cdcpniweekly\2019-2020-01.csv,2019-2020,2019,2020,14,2019-2020-14
81897,2012,9,8.406219,8.33051,8.66350,50879,4234,43,1,./data/cdcpniweekly\2019-2020-01.csv,2019-2020,2019,2020,14,2019-2020-14
81898,2012,10,8.681514,8.29380,8.62668,50740,4346,59,1,./data/cdcpniweekly\2019-2020-01.csv,2019-2020,2019,2020,14,2019-2020-14


In [7]:
report_ids = list(np.sort(combined_data['reportId'].unique()))
report_ids[:3]

['2015-2016-01', '2015-2016-02', '2015-2016-03']

In [8]:
reports = {}
for report_id in report_ids:
    pivoted_data = combined_data.query('reportId == @report_id').pivot_table(
        values='Pneumonia Deaths',
        columns='Year',
        index='Week')
    reports[report_id] = pivoted_data
#     snapshots[snapshot] = pd.concat([pivoted_data[40:], pivoted_data[:39]])
#     snapshots[snapshot]['fluWeek'] = list(range(1, len(snapshots[snapshot]) + 1))
#     snapshots[snapshot] = snapshots[snapshot].set_index('fluWeek')

In [9]:
reports["2015-2016-01"]

Year,2010,2011,2012,2013,2014,2015
Week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,,4817.0,4360.0,5620.0,4555.0,5467.0
2,,4725.0,4262.0,6080.0,5047.0,5477.0
3,,4715.0,4274.0,5941.0,5021.0,5106.0
4,,4820.0,4232.0,5649.0,4789.0,4772.0
5,,4705.0,4029.0,5291.0,4629.0,4641.0
6,,4755.0,4027.0,5028.0,4480.0,4456.0
7,,4970.0,4133.0,4889.0,4390.0,4340.0
8,,5013.0,4195.0,4581.0,3995.0,4377.0
9,,4822.0,4238.0,4489.0,3873.0,4332.0
10,,4727.0,4351.0,4420.0,4052.0,4247.0


In [10]:
[""]*12

['', '', '', '', '', '', '', '', '', '', '', '']

In [11]:
# CDC DATA Format
dash_styles = [""]*15

colors = sns.color_palette("husl", 12)

# pallete = {
#     "2008 - 2009": colors[0],
#     "2009 - 2010": colors[1],
#     "2010 - 2011": colors[2],
#     "2011 - 2012": colors[3],
#     "2012 - 2013": colors[4],
#     "2013 - 2014": colors[5],
#     "2014 - 2015": colors[6],
#     "2015 - 2016": colors[7],
#     "2016 - 2017": colors[8],
#     "2017 - 2018": colors[9],
#     "2018 - 2019": colors[10],
#     "2019 - 2020": colors[11],
# }

pallete = {
    2009: colors[0],
    2010: colors[1],
    2011: colors[2],
    2012: colors[3],
    2013: colors[4],
    2014: colors[5],
    2015: colors[6],
    2016: colors[7],
    2017: colors[8],
    2018: colors[9],
    2019: colors[10],
    2020: colors[11],
}

sns.set_style("whitegrid")
sns.set_context("notebook", font_scale=2.5, rc={"lines.linewidth": 5})
def plot_data(x):
    week = x[-2:]
    start_year = x[0:4]
    end_year = x[5:9]
    if int(week) < 14:
        year = start_year
        week = int(week) + 39
    else:
        year = end_year
        week = int(week) - 13
    plt = sns.relplot(kind="line",
                      data=reports[x],
                      dashes=dash_styles,
                      height=9,
                      aspect=1.5,
                      hue="Year",
                      palette=pallete)
    plt.set(ylim=(0,7000))
    plt.set(xlim=(0,53))
    plt.set(title="U.S. Provisional Pnuemonia Deaths as of " + year + ' week ' + str(week))
    plt.set(xlabel='Week of Year', ylabel='Death Count')

w = widgets.Dropdown(options=report_ids, value=report_ids[0])
interact(plot_data, x=w)

interactive(children=(Dropdown(description='x', options=('2015-2016-01', '2015-2016-02', '2015-2016-03', '2015…

<function __main__.plot_data(x)>

This look is what creates the animation

In [12]:
# for value in report_ids:
#     w.value = value