In [None]:
"""
In this notebook, we aim to plot/show the raw JHU covid data, as well as the JHU covid data after some preprocessing in STAN
"""

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
% cd /content/gdrive/My Drive/Github/"CS 499 - SPRING 2022"/"0. Data Analysis"
! pwd

Mounted at /content/gdrive
/content/gdrive/My Drive/Github/CS 499 - SPRING 2022/0. Data Analysis
/content/gdrive/My Drive/Github/CS 499 - SPRING 2022/0. Data Analysis


In [None]:
"""
Plot raw JHU data
"""

###################################################################################################################
###################################################################################################################
# Download raw data with minimal preprocessing - cell takes 40 seconds to run
###################################################################################################################
###################################################################################################################

# Import needed libraries
import pandas as pd 
from datetime import datetime

# Get list of dates between start_date and end_date formatted as Python strings
dateList = []
start_date = '2020-04-12'
end_date = '2022-01-24'
date_list = pd.date_range(start_date, end_date).strftime("%m-%d-%Y")

# Base url to which we will append onto date + ".csv" to download data from
url_base = f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/"

# List of dataframes
data_list = [] # != date_list
for date in date_list:
  df = pd.read_csv(url_base + str(date) + ".csv") 
  df.loc[:, 'date_today'] = datetime.strptime(date, "%m-%d-%Y")
  data = df.rename(columns={"date_today": "date_today", "Province_State": "province_state", "Country_Region": "country_region", "Last_Update": "last_update", 
                                "Lat": "latitude", "Long_": "longitude", 'Confirmed': "confirmed", 'Deaths': "deaths", 'Recovered': "recovered",
                                'Active': "active", 'FIPS': "fips", 'Incident_Rate': "incident_rate", 
                                "Total_Test_Results": "total_test_results", "People_Hospitalized": "people_hospitalized", 
                                'Case_Fatality_Ratio': "case_fatality_ratio", 'UID': "uid", 'ISO3': "iso3", 
                                'Testing_Rate': "testing_rate", 'Hospitalization_Rate': "hospitalization_rate"})
  data_list.append(data)

# Code showing that first 18 days of data in this date range have an extra and unnecessary row called "Recovered"
# for i in range(len(data_list)):
#   thing = data_list[i]
#   if len(thing) != 58:
#     print(i, len(thing))
# set1 = set(data_list[17].state.unique())
# set2 = set(data_list[18].state.unique())
# set1-set2

# Take 653 dataframes in data_list each with 58 rows of data, and concatenate them into 1 giant dataframe
data = pd.concat(data_list, axis=0)

# Remove the extra "Recovered" row
data = data[data.province_state != "Recovered"] # 58 states x 653 days between = 37874 rows of data 

# Save data as CSV file
data.to_csv('./data/jhu_raw_data.csv')

In [None]:
###################################################################################################################
###################################################################################################################
# Actually do the plotting for JHU raw data - cell takes 2 minutes 32 seconds to run
###################################################################################################################
###################################################################################################################

import matplotlib.pyplot as plt
import numpy as np

data = data.fillna(0)

state_list = data.province_state.unique()
columns_list = []
for column in data.columns:
  columns_list.append(column)
  # ['province_state', 'country_region', 'last_update', 'latitude', 'longitude', 
  #   'confirmed', 'deaths', 'recovered', 'active', 
  #   'fips', 'incident_rate', 'People_Tested', 
  #   'people_hospitalized', 'Mortality_Rate', 'uid', 'iso3', 
  #   'testing_rate', 'hospitalization_rate', 'date_today', 'total_test_results', 'case_fatality_ratio']
columns_of_interest = ['confirmed', 'deaths', 'recovered', 'active', 'incident_rate', 'People_Tested', 'people_hospitalized', 'Mortality_Rate', 
                       'testing_rate', 'hospitalization_rate', 'total_test_results', 'case_fatality_ratio']


# Plot confirmed cases
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].confirmed.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Confirmed: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/confirmed.png")

# Plot deaths
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].deaths.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Deaths: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/deaths.png")

# Plot recovered
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].recovered.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Recovered: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/recovered.png")

# Plot active
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].active.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Active: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/active.png")

# Plot incident rate
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].incident_rate.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Incident Rate: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/incident_rate.png")

# Plot people tested
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].People_Tested.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("People Tested: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/people_tested.png")

# Plot people hospitalized
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].people_hospitalized.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("People Hospitalized: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/people_hospitalized.png")

# Plot Mortality Rate
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].Mortality_Rate.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Mortality Rate: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/mortality_rate.png")

# Plot testing rate
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].testing_rate.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Testing Rate: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/testing_rate.png")

# Plot Hospitalization Rate
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].hospitalization_rate.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Hospitalization Rate: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/hospitalization_rate.png")

# Plot total_test_results
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].total_test_results.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Total Test Results: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/total_test_results.png")

# Plot Case Fatality Ratio
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].case_fatality_ratio.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Case fatality ratio: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/case_fatality_ratio.png")

Output hidden; open in https://colab.research.google.com to view.

In [None]:
"""
Plot STAN's data/variables
"""

###################################################################################################################
###################################################################################################################
# Download raw data with minimal preprocessing - cell takes 40 seconds to run
# Copied code for downloading+some preprocessing of data from STAN's data_downloader.py and utils.py to this cell
###################################################################################################################
###################################################################################################################

import pandas as pd
import io
import logging
import requests
import pickle

from datetime import datetime
from multiprocessing import Pool

import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

def check_url(url):
    """
    Function to check the existence of ulr
    :param url:
    :return:
    """
    request = requests.get(url, verify=False)
    if request.status_code < 400:
        return True
    else:
        logging.info(f"URL for {url.split('/')[-1]} does not exist!")
        return False

def download_data(url):
    """
    Function that downloads the csv files from Github
    :param url: url of the csv file
    :type url: str
    :return: content of csv file
    :rtype: pandas.DataFrame
    """
    if check_url(url):
        x = requests.get(url=url, verify=False).content
        df = pd.read_csv(io.StringIO(x.decode('utf8')))
        return df


class GenerateTrainingData:

    def __init__(self):
        self.df = None
        self.url_base = f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/" \
                        f"csse_covid_19_daily_reports_us/"
        self.common_columns = ["state", "latitude", "longitude", "fips", "date_today", "confirmed", "deaths",
                               "recovered",
                               "active", "hospitalization"]

    def download_single_file(self, date):
        url = self.url_base + "/" + f"{date}.csv"
        data = download_data(url=url)
        if data is None:
            logging.info(f"{date}.csv doesn't not exists or failed to be downloaded!")
            return None
        data.loc[:, 'date_today'] = datetime.strptime(date, "%m-%d-%Y")
        data = data.rename(columns={"Province_State": "state", "Lat": "latitude", "Long_": "longitude",
                                    'Confirmed': "confirmed", 'Deaths': "deaths", 'Recovered': "recovered",
                                    'Active': "active", 'FIPS': "fips", "People_Hospitalized": "hospitalization"}) \
            .dropna(subset=['fips'])
        data.loc[:, "fips"] = data['fips'].astype(int)
        data = data[self.common_columns].fillna(0)
        return data

    def download_jhu_data(self, start_time, end_time):
        date_list = pd.date_range(start_time, end_time).strftime("%m-%d-%Y")

        data = Pool().map(self.download_single_file, date_list)
        print('Finish download')

        # Save object to pickle for later use in debugging
        filehandler = open("./data/jhu_debugging_data.pickle", 'wb') 
        pickle.dump(data, filehandler)

        data = [x for x in data if x is not None]
        data = pd.concat(data, axis=0)

        data.loc[:, 'date_today'] = pd.to_datetime(data['date_today'])
        df = []
        for fips in data['fips'].unique():
            temp = data[data['fips'] == fips].sort_values('date_today')
            temp.loc[:, "new_cases"] = temp['confirmed'].copy()
            # transform to daily cases
            for col in ["new_cases", "deaths", "hospitalization"]:
                t = temp[col].copy().sort_values().to_numpy()
                t[1:] = t[1:] - t[:-1]
                temp = temp.iloc[1:]
                temp.loc[:, col] = t[1:]
            df.append(temp)
        df = pd.concat(df, axis=0)

        df.to_pickle('./data/jhu_processed_data.pickle')
        return df

jhu_processed_data = GenerateTrainingData().download_jhu_data('2020-04-12', '2022-01-24')

Finish download


In [None]:
# Some analysis of the JHU debugging data pickle to see why "JHU_processed_data.pickle" doesn't have 653 days of data for every state 
#   (653 = number of days between 2020-04-12 and 2022-01-24, inclusive of both start and end date)

# Get set of dates from raw data 
start_date = '2020-04-12'
end_date = '2022-01-24'
date_list = pd.date_range(start_date, end_date).strftime("%Y-%m-%d")
jhu_raw_set_dates = set(date_list)
print("Length of raw dataset dates:", len(jhu_raw_set_dates))

# Get dates from processed data 
jhu_processed_np_datetime_strings = np.datetime_as_string(jhu_processed_data.date_today.unique(), unit='D')
jhu_processed_set_dates = set(jhu_processed_np_datetime_strings)
print("Length of processed dataset dates:", len(jhu_processed_set_dates))

# Get difference in dates (what dates are present in the raw data but ARENT present in the processed data)
print("Difference in dates:", jhu_raw_set_dates - jhu_processed_set_dates)


Length of raw dataset dates: 653
Length of processed dataset dates: 650
Difference in dates: {'2020-04-14', '2020-04-13', '2020-04-12'}


In [None]:
# Debug why the processed data is missing data from 3 dates

import pickle 

filehandler = open("./data/jhu_debugging_data.pickle", 'rb')
data = pickle.load(filehandler) # so far so good, seems like there are 653 dataframes in "data"

for i in range(len(data)):
  curr_df = data[i]
  if (curr_df.shape != (58, 10)):
    print("Shape not consistent in data:", i, curr_df.shape) # seems like 2020-04-12 is missing data from 1 of 58 states/provinces

data = [x for x in data if x is not None] # still so far so good, 653 items in data
data = pd.concat(data, axis=0) # concatenates everything in the list into a giant dataframe
print(data.shape) # 37873 = 653 days * 58 rows of data per day - 1 location/day missing (April 12th, 2020)

data.loc[:, 'date_today'] = pd.to_datetime(data['date_today']) # converts dates to datetime objects?
print(data.shape) # 37873 = 653 days * 58 rows of data per day - 1 location/day missing (April 12th, 2020)

print("length of data['fips'].unique():", len(data['fips'].unique())) # 60 fips codes
print("unique fips codes:", data['fips'].unique())

df = []
for fips in data['fips'].unique():
    temp = data[data['fips'] == fips].sort_values('date_today')

    # print("temp.shape before ", temp.shape) # (653, 10)

    temp.loc[:, "new_cases"] = temp['confirmed'].copy()

    print("temp.shape during, part1", temp.shape)

    # transform to daily cases
    for col in ["new_cases", "deaths", "hospitalization"]:
        t = temp[col].copy().sort_values().to_numpy()
        t[1:] = t[1:] - t[:-1]
        temp = temp.iloc[1:]
        temp.loc[:, col] = t[1:]
    
    # print("temp.shape after", temp.shape) # (650, 11)
    
    df.append(temp)
df = pd.concat(df, axis=0)

Shape not consistent in data: 0 (57, 10)
(37873, 10)
(37873, 10)
length of data['fips'].unique(): 60
unique fips codes: [    1     2     4     5     6     8     9    10   888    11    12    13
   999    66    15    16    17    18    19    20    21    22    23    24
    25    26    27    28    29    30    31    32    33    34    35    36
    37    38    39    40    41    42    72    44    45    46    47    48
    49    50    51    53    54    55    56    60    69 88888 99999    78]
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (1, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (1, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (65

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (653, 11)
temp.shape during, part1 (652, 11)
temp.shape during, part1 (652, 11)
temp.shape during, part1 (652, 11)


In [None]:
# Show that there are 650 records of data for most states, with a few exceptions (Diamond Princess, Grand Princess, and Virgin Islands which each have 649 records of data)
for state in jhu_processed_data.state.unique():
  num_records_for_state = len(jhu_processed_data[jhu_processed_data.state == state])
  print(state, num_records_for_state)

Alabama 650
Alaska 650
Arizona 650
Arkansas 650
California 650
Colorado 650
Connecticut 650
Delaware 650
District of Columbia 650
Florida 650
Georgia 650
Guam 650
Hawaii 650
Idaho 650
Illinois 650
Indiana 650
Iowa 650
Kansas 650
Kentucky 650
Louisiana 650
Maine 650
Maryland 650
Massachusetts 650
Michigan 650
Minnesota 650
Mississippi 650
Missouri 650
Montana 650
Nebraska 650
Nevada 650
New Hampshire 650
New Jersey 650
New Mexico 650
New York 650
North Carolina 650
North Dakota 650
Ohio 650
Oklahoma 650
Oregon 650
Pennsylvania 650
Puerto Rico 650
Rhode Island 650
South Carolina 650
South Dakota 650
Tennessee 650
Texas 650
Utah 650
Vermont 650
Virginia 650
Washington 650
West Virginia 650
Wisconsin 650
Wyoming 650
American Samoa 650
Northern Mariana Islands 650
Diamond Princess 649
Grand Princess 649
Virgin Islands 649


In [None]:
###################################################################################################################
###################################################################################################################
# Actually do the plotting for JHU processed data
###################################################################################################################
###################################################################################################################

import matplotlib.pyplot as plt
import numpy as np

state_list = jhu_processed_data.state.unique()

# jhu_processed_data.columns
# Index(['state', 'latitude', 'longitude', 'fips', 'date_today', 'confirmed',
#        'deaths', 'recovered', 'active', 'hospitalization', 'new_cases'],
#       dtype='object')
columns_of_interest = ['confirmed', 'deaths', 'recovered', 'active', 'hospitalization', 'new_cases']

data = jhu_processed_data

x1 = np.arange(650) # For most states/locations
x2 = np.arange(649) # For Diamond Princess, Grand Princess, and Virgin Islands

# Plot confirmed cases
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.state == curr_state_name].confirmed.to_list()
  if (state_list[i] != 'Diamond Princess' and state_list[i] != 'Grand Princess' and state_list[i] != 'Virgin Islands'):
    axs[idx0, idx1].plot(x1, data_of_interest)
  else:
    axs[idx0, idx1].plot(x2, data_of_interest)
  axs[idx0, idx1].set_title("Confirmed: " + curr_state_name)
plt.savefig("./plots/JHU_processed_data/confirmed.png")

# Plot deaths
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.state == curr_state_name].deaths.to_list()
  if (state_list[i] != 'Diamond Princess' and state_list[i] != 'Grand Princess' and state_list[i] != 'Virgin Islands'):
    axs[idx0, idx1].plot(x1, data_of_interest)
  else:
    axs[idx0, idx1].plot(x2, data_of_interest)
  axs[idx0, idx1].set_title("Deaths: " + curr_state_name)
plt.savefig("./plots/JHU_processed_data/deaths.png")

# Plot recovered
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.state == curr_state_name].recovered.to_list()
  if (state_list[i] != 'Diamond Princess' and state_list[i] != 'Grand Princess' and state_list[i] != 'Virgin Islands'):
    axs[idx0, idx1].plot(x1, data_of_interest)
  else:
    axs[idx0, idx1].plot(x2, data_of_interest)
  axs[idx0, idx1].set_title("Recovered: " + curr_state_name)
plt.savefig("./plots/JHU_processed_data/recovered.png")

# Plot active cases
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.state == curr_state_name].active.to_list()
  if (state_list[i] != 'Diamond Princess' and state_list[i] != 'Grand Princess' and state_list[i] != 'Virgin Islands'):
    axs[idx0, idx1].plot(x1, data_of_interest)
  else:
    axs[idx0, idx1].plot(x2, data_of_interest)
  axs[idx0, idx1].set_title("Active: " + curr_state_name)
plt.savefig("./plots/JHU_processed_data/active.png")

# Plot hospitalization
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.state == curr_state_name].hospitalization.to_list()
  if (state_list[i] != 'Diamond Princess' and state_list[i] != 'Grand Princess' and state_list[i] != 'Virgin Islands'):
    axs[idx0, idx1].plot(x1, data_of_interest)
  else:
    axs[idx0, idx1].plot(x2, data_of_interest)
  axs[idx0, idx1].set_title("Hospitalization: " + curr_state_name)
plt.savefig("./plots/JHU_processed_data/hospitalization.png")

# Plot new_cases
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.state == curr_state_name].new_cases.to_list()
  if (state_list[i] != 'Diamond Princess' and state_list[i] != 'Grand Princess' and state_list[i] != 'Virgin Islands'):
    axs[idx0, idx1].plot(x1, data_of_interest)
  else:
    axs[idx0, idx1].plot(x2, data_of_interest)
  axs[idx0, idx1].set_title("New cases: " + curr_state_name)
plt.savefig("./plots/JHU_processed_data/new_cases.png")

Output hidden; open in https://colab.research.google.com to view.