In [4]:
%%bash
cd Data_Sets/COVID_19_Tracking/

curl -O https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv
curl -O https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv
curl -O https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv

curl -O https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv
curl -O https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv





curl: (23) Failed writing body (0 != 1869)
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  778k  100  778k    0     0  3950k      0 --:--:-- --:--:-- --:--:-- 4551k


In [5]:
#the data is from https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases
import datetime as dt
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

#Read in the tables with pandas
confirmed_global = pd.read_csv('Data_Sets/COVID_19_Tracking/time_series_covid19_confirmed_global.csv', keep_default_na=False)
deaths_global = pd.read_csv('Data_Sets/COVID_19_Tracking/time_series_covid19_deaths_global.csv', keep_default_na=False)
recovered_global = pd.read_csv('Data_Sets/COVID_19_Tracking/time_series_covid19_recovered_global.csv', keep_default_na=False)

#print out the top 5 entries of the dataset
print(confirmed_global.head(3))

  Province/State Country/Region      Lat     Long  1/22/20  1/23/20  1/24/20  \
0                   Afghanistan  33.0000  65.0000        0        0        0   
1                       Albania  41.1533  20.1683        0        0        0   
2                       Algeria  28.0339   1.6596        0        0        0   

   1/25/20  1/26/20  1/27/20  ...  3/23/20  3/24/20  3/25/20  3/26/20  \
0        0        0        0  ...       40       74       84       94   
1        0        0        0  ...      104      123      146      174   
2        0        0        0  ...      230      264      302      367   

   3/27/20  3/28/20  3/29/20  3/30/20  3/31/20  4/1/20  
0      110      110      120      170      174     237  
1      186      197      212      223      243     259  
2      409      454      511      584      716     847  

[3 rows x 75 columns]


In [6]:
#an empty array to hold a list of dates
date_list = []
#create an empty dictionary for countries/regions with their provinces/states
state_country_dict = {}


#get a list of dates from the column names
date_list = list(confirmed_global)
date_list = date_list[4:]

#loop through the confirmed_global table
for index, row in confirmed_global.iterrows():
    #if the country/region isn't in the dictionary then add it
    if row['Country/Region'] not in state_country_dict:
        #set the values as a list so i can append to them when needed
        state_country_dict[row['Country/Region']] = [row['Province/State']]
    #if it is in then add onto it
    else:
        state_country_dict[row['Country/Region']].append(row['Province/State'])

In [50]:
#Print out a list of countries
country_list = list(state_country_dict.keys())
print(*country_list, sep=", ")

#Grab input on which country to select
region_input = input("Please enter a country (Case-Sensitive): ")

print()
#if the region has a states
if state_country_dict[region_input] != ['']:
    #Print out a list of states for that country
    state_list = list(state_country_dict[region_input])
    print(*state_list, sep=", ")
    
    #Grab input on which state to select
    state_input = input("Please enter a state (Case-Sensitive): ")
#otherwise enter nothing
else:
    state_input = ''


Afghanistan, Albania, Algeria, Andorra, Angola, Antigua and Barbuda, Argentina, Armenia, Australia, Austria, Azerbaijan, Bahamas, Bahrain, Bangladesh, Barbados, Belarus, Belgium, Benin, Bhutan, Bolivia, Bosnia and Herzegovina, Brazil, Brunei, Bulgaria, Burkina Faso, Cabo Verde, Cambodia, Cameroon, Canada, Central African Republic, Chad, Chile, China, Colombia, Congo (Brazzaville), Congo (Kinshasa), Costa Rica, Cote d'Ivoire, Croatia, Diamond Princess, Cuba, Cyprus, Czechia, Denmark, Djibouti, Dominican Republic, Ecuador, Egypt, El Salvador, Equatorial Guinea, Eritrea, Estonia, Eswatini, Ethiopia, Fiji, Finland, France, Gabon, Gambia, Georgia, Germany, Ghana, Greece, Guatemala, Guinea, Guyana, Haiti, Holy See, Honduras, Hungary, Iceland, India, Indonesia, Iran, Iraq, Ireland, Israel, Italy, Jamaica, Japan, Jordan, Kazakhstan, Kenya, Korea, South, Kuwait, Kyrgyzstan, Latvia, Lebanon, Liberia, Liechtenstein, Lithuania, Luxembourg, Madagascar, Malaysia, Maldives, Malta, Mauritania, Mauriti

In [68]:
%matplotlib notebook
plt.style.use('ggplot')

#filter the confirmed global down to the region then to the state
confirmed = confirmed_global[confirmed_global['Country/Region'] == region_input]
confirmed = confirmed[confirmed['Province/State'] == state_input]

#filter the deaths global down to the region then to the state
deaths = deaths_global[deaths_global['Country/Region'] == region_input]
deaths = deaths[deaths['Province/State'] == state_input]

#filter the recovered global down to the region then to the state
recovered = recovered_global[recovered_global['Country/Region'] == region_input]
recovered = recovered[recovered['Province/State'] == state_input]

#creates the lists to plot
total = []
total_dates = []
label_dates = []

#loop through all the dates
for i in date_list:
    #ignore dates that have 0 confirmed cases
    if confirmed[i].values > 0:
        #get the total active cases
        total.append(int(confirmed[i].values - deaths[i].values - recovered[i].values))
        total_dates.append(i)
        
        #grab only first day of the month in the date_list
        if '/1/' in i:
            label_dates.append(i)
        #otherwise leave that spot blank
        else:
            label_dates.append('')
#grab the first and last date for the labels
label_dates[0] = total_dates[0]
label_dates[-1] = total_dates[-1]

#Plot the graph with the month list and the 
plt.plot(total_dates, total)

plt.title("The COVID_19 Curve for " + state_input + " " + region_input)
plt.xticks(total_dates, label_dates, rotation=90)

plt.show()

<IPython.core.display.Javascript object>

In [73]:
#Plot the graph with the month list and the 
plt.plot(total_dates[-14:], total[-14:])

plt.title("The COVID_19 Curve for " + state_input + " " + region_input)
plt.xticks(rotation=45)
plt.ylim(bottom=0)

plt.show()

<IPython.core.display.Javascript object>