In [2]:
from google.colab import drive
drive.mount('/content/gdrive')
% cd /content/gdrive/My Drive/Github/"CS 499 - SPRING 2022"/"0. Data Analysis"
! pwd

Mounted at /content/gdrive
/content/gdrive/My Drive/Github/CS 499 - SPRING 2022/0. Data Analysis
/content/gdrive/My Drive/Github/CS 499 - SPRING 2022/0. Data Analysis


In [3]:
"""
Plot raw JHU data
"""

###################################################################################################################
###################################################################################################################
# Download raw data with minimal preprocessing - cell takes 40 seconds to run
###################################################################################################################
###################################################################################################################

# Import needed libraries
import pandas as pd 
from datetime import datetime

# Get list of dates between start_date and end_date formatted as Python strings
dateList = []
start_date = '2020-04-12'
end_date = '2022-01-24'
date_list = pd.date_range(start_date, end_date).strftime("%m-%d-%Y")

# Base url to which we will append onto date + ".csv" to download data from
url_base = f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/"

# List of dataframes
data_list = [] # != date_list
for date in date_list:
  df = pd.read_csv(url_base + str(date) + ".csv") 
  df.loc[:, 'date_today'] = datetime.strptime(date, "%m-%d-%Y")
  data = df.rename(columns={"date_today": "date_today", "Province_State": "province_state", "Country_Region": "country_region", "Last_Update": "last_update", 
                                "Lat": "latitude", "Long_": "longitude", 'Confirmed': "confirmed", 'Deaths': "deaths", 'Recovered': "recovered",
                                'Active': "active", 'FIPS': "fips", 'Incident_Rate': "incident_rate", 
                                "Total_Test_Results": "total_test_results", "People_Hospitalized": "people_hospitalized", 
                                'Case_Fatality_Ratio': "case_fatality_ratio", 'UID': "uid", 'ISO3': "iso3", 
                                'Testing_Rate': "testing_rate", 'Hospitalization_Rate': "hospitalization_rate"})
  data_list.append(data)

# Code showing that first 18 days of data in this date range have an extra and unnecessary row called "Recovered"
# for i in range(len(data_list)):
#   thing = data_list[i]
#   if len(thing) != 58:
#     print(i, len(thing))
# set1 = set(data_list[17].state.unique())
# set2 = set(data_list[18].state.unique())
# set1-set2

# Take 653 dataframes in data_list each with 58 rows of data, and concatenate them into 1 giant dataframe
data = pd.concat(data_list, axis=0)

# Remove the extra "Recovered" row
data = data[data.province_state != "Recovered"] # 58 states x 653 days between = 37874 rows of data 

# Save data as CSV file
data.to_csv('./data/jhu_raw_data.csv')

In [53]:
###################################################################################################################
###################################################################################################################
# Actually do the plotting for JHU raw data - cell takes 2 minutes 32 seconds to run
###################################################################################################################
###################################################################################################################

import matplotlib.pyplot as plt
import numpy as np

data = data.fillna(0)

state_list = data.province_state.unique()
columns_list = []
for column in data.columns:
  columns_list.append(column)
  # ['province_state', 'country_region', 'last_update', 'latitude', 'longitude', 
  #   'confirmed', 'deaths', 'recovered', 'active', 
  #   'fips', 'incident_rate', 'People_Tested', 
  #   'people_hospitalized', 'Mortality_Rate', 'uid', 'iso3', 
  #   'testing_rate', 'hospitalization_rate', 'date_today', 'total_test_results', 'case_fatality_ratio']
columns_of_interest = ['confirmed', 'deaths', 'recovered', 'active', 'incident_rate', 'People_Tested', 'people_hospitalized', 'Mortality_Rate', 
                       'testing_rate', 'hospitalization_rate', 'total_test_results', 'case_fatality_ratio']


# Plot confirmed cases
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].confirmed.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Confirmed: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/confirmed.png")

# Plot deaths
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].deaths.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Deaths: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/deaths.png")

# Plot recovered
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].recovered.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Recovered: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/recovered.png")

# Plot active
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].active.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Active: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/active.png")

# Plot incident rate
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].incident_rate.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Incident Rate: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/incident_rate.png")

# Plot people tested
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].People_Tested.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("People Tested: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/people_tested.png")

# Plot people hospitalized
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].people_hospitalized.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("People Hospitalized: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/people_hospitalized.png")

# Plot Mortality Rate
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].Mortality_Rate.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Mortality Rate: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/mortality_rate.png")

# Plot testing rate
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].testing_rate.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Testing Rate: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/testing_rate.png")

# Plot Hospitalization Rate
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].hospitalization_rate.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Hospitalization Rate: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/hospitalization_rate.png")

# Plot total_test_results
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].total_test_results.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Total Test Results: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/total_test_results.png")

# Plot Case Fatality Ratio
fig, axs = plt.subplots(29, 2)
fig.set_size_inches(8, 232)
x = np.arange(653)
for i in range(len(state_list)): 
  idx0 = i // 2
  idx1 = i % 2
  curr_state_name = state_list[i]
  data_of_interest = data[data.province_state == curr_state_name].case_fatality_ratio.to_list()
  axs[idx0, idx1].plot(x, data_of_interest)
  axs[idx0, idx1].set_title("Case fatality ratio: " + curr_state_name)
plt.savefig("./plots/JHU_raw_data/case_fatality_ratio.png")

Output hidden; open in https://colab.research.google.com to view.

In [54]:
"""
Plot STAN's data/variables
"""
###################################################################################################################
###################################################################################################################
# TODO
###################################################################################################################
###################################################################################################################

"\nPlot STAN's data/variables\n"

In [55]:
"""
Plot STAN's output
"""

"\nPlot STAN's output\n"