In [1]:
import pandas as pd
from datetime import datetime, timedelta
pd.options.mode.chained_assignment = None  # default='warn'
pd.options.display.float_format = '{:.1f}'.format

In [2]:
date_today = (datetime.today() - timedelta(days=1)).strftime("%m-%d-%Y")

In [3]:
url = f'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{date_today}.csv'

In [4]:
df = pd.read_csv(url, error_bad_lines=False)
df = df.drop(columns=['Lat', 'Long_', 'FIPS'])

In [5]:
# Estimate the % of cases being reported
# FiveThirtyEight has experts estimating around 9-12% of cases are being reported.
percent_estimate = 0.12

In [6]:
df['Estimated Cases'] = df['Confirmed'] * (1/percent_estimate)

In [7]:
df_countries = df.groupby(['Country_Region']).sum()
df_countries = df_countries.drop(columns=['Estimated Cases', 'Recovered', 'Active'])
df_countries = df_countries[df_countries['Confirmed']>=5000]
df_countries['Fatality Rate'] = df_countries['Deaths'] / df_countries['Confirmed'] * 100
df_countries = df_countries.sort_values(by='Fatality Rate', ascending=False)

In [8]:
print(df_countries)

                Confirmed  Deaths  Fatality Rate
Country_Region                                  
Italy              159516   20465           12.8
Belgium             30589    3903           12.8
United Kingdom      89570   11347           12.7
France             137875   14986           10.9
Netherlands         26710    2833           10.6
Spain              170099   17756           10.4
Sweden              10948     919            8.4
Iran                73303    4585            6.3
Brazil              23430    1328            5.7
Romania              6633     331            5.0
Ecuador              7529     355            4.7
Switzerland         25688    1138            4.4
Denmark              6513     285            4.4
US                 580619   23529            4.1
China               83213    3345            4.0
Poland               6934     245            3.5
Ireland             10647     365            3.4
India               10453     358            3.4
Portugal            

In [9]:
df_countries = df.groupby(['Country_Region']).sum()
df_countries = df_countries.drop(columns=['Confirmed', 'Estimated Cases', 'Active'])
df_countries = df_countries[df_countries['Recovered']>=1000]
df_countries['Recoveries per Death'] = df_countries['Recovered'] / df_countries['Deaths']
df_countries = df_countries.sort_values(by='Recoveries per Death', ascending=False)

In [10]:
print(df_countries)

                Deaths  Recovered  Recoveries per Death
Country_Region                                         
Korea, South       217       7447                  34.3
Thailand            40       1288                  32.2
Australia           61       1806                  29.6
Malaysia            77       2276                  29.6
Chile               82       2367                  28.9
China             3345      78039                  23.3
Germany           3194      64300                  20.1
Austria            368       7343                  20.0
Israel             116       1855                  16.0
Peru               216       2642                  12.2
Switzerland       1138      13700                  12.0
Pakistan            93       1095                  11.8
Iran              4585      45983                  10.0
Canada             780       7758                   9.9
Russia             148       1470                   9.9
Denmark            285       2403               

In [11]:
df_us = df[df['Country_Region']=='US']

In [12]:
confirmed_pct = "{0:.2f}%".format(df_us['Confirmed'].sum()/331002651*100)
deaths_pct = "{0:.2f}%".format(df_us['Deaths'].sum()/df_us['Confirmed'].sum()*100)

In [13]:
est_confirmed_pct = "{0:.2f}%".format(df_us['Estimated Cases'].sum()/331002651*100)
est_deaths_pct = "{0:.2f}%".format(df_us['Deaths'].sum()/df_us['Estimated Cases'].sum()*100)

In [14]:
print('There have been %d confirmed cases of COVID-19 in the US.' % df_us['Confirmed'].sum())
print('There have been %d deaths from COVID-19 in the US.' % df_us['Deaths'].sum())
print('An American has a %s percent chance of having had a confirmed case of COVID-19.' % confirmed_pct)
print('An American has a %s percent chance of having died from a confirmed case of COVID-19.' % deaths_pct)

There have been 580619 confirmed cases of COVID-19 in the US.
There have been 23529 deaths from COVID-19 in the US.
An American has a 0.18% percent chance of having had a confirmed case of COVID-19.
An American has a 4.05% percent chance of having died from a confirmed case of COVID-19.


In [15]:
print('Experts estimate there have been %d cases of COVID-19 in the US.' % df_us['Estimated Cases'].sum())
print('With estimated numbers an American has a %s percent chance of having had COVID-19' % est_confirmed_pct)
print('The estimated numbers give us a fatality rate of %s in the US' % est_deaths_pct)

Experts estimate there have been 4838491 cases of COVID-19 in the US.
With estimated numbers an American has a 1.46% percent chance of having had COVID-19
The estimated numbers give us a fatality rate of 0.49% in the US


In [16]:
df_mn = df[df['Province_State']=='Minnesota']

In [17]:
confirmed_pct = "{0:.2f}%".format(df_mn['Confirmed'].sum()/5700000*100)
deaths_pct = "{0:.2f}%".format(df_mn['Deaths'].sum()/df_mn['Confirmed'].sum()*100)

In [18]:
est_confirmed_pct = "{0:.2f}%".format(df_mn['Estimated Cases'].sum()/5700000*100)
est_deaths_pct = "{0:.2f}%".format(df_mn['Deaths'].sum()/df_mn['Estimated Cases'].sum()*100)

In [19]:
print('There have been %d confirmed cases of COVID-19 in Minnesota.' % df_mn['Confirmed'].sum())
print('There have been %d deaths from COVID-19 in Minnesota.' % df_mn['Deaths'].sum())
print('A Minnesotan has a %s percent chance of having had a confirmed case of COVID-19.' % confirmed_pct)
print('A Minnesotan has a %s percent chance of having died from a confirmed case of COVID-19.' % deaths_pct)

There have been 1621 confirmed cases of COVID-19 in Minnesota.
There have been 70 deaths from COVID-19 in Minnesota.
A Minnesotan has a 0.03% percent chance of having had a confirmed case of COVID-19.
A Minnesotan has a 4.32% percent chance of having died from a confirmed case of COVID-19.


In [20]:
print('Experts estimate there have been %d cases of COVID-19 in MN.' % df_mn['Estimated Cases'].sum())
print('With estimated numbers a Minnesotan has a %s percent chance of having had COVID-19' % est_confirmed_pct)
print('The estimated numbers give us a fatality rate of %s in MN' % est_deaths_pct)

Experts estimate there have been 13508 cases of COVID-19 in MN.
With estimated numbers a Minnesotan has a 0.24% percent chance of having had COVID-19
The estimated numbers give us a fatality rate of 0.52% in MN
