In [1]:
import pandas as pd
from datetime import datetime, timedelta
pd.options.mode.chained_assignment = None  # default='warn'
pd.options.display.float_format = '{:.1f}'.format

In [2]:
date_today = (datetime.today() - timedelta(days=1)).strftime("%m-%d-%Y")

In [3]:
url = f'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{date_today}.csv'

In [4]:
df = pd.read_csv(url, error_bad_lines=False)
df = df.drop(columns=['Lat', 'Long_', 'FIPS'])

In [5]:
# Estimate the % of cases being reported
# FiveThirtyEight has experts estimating around 9-12% of cases are being reported.
percent_estimate = 0.12

In [6]:
df['Estimated Cases'] = df['Confirmed'] * (1/percent_estimate)

In [7]:
df_countries = df.groupby(['Country_Region']).sum()
df_countries = df_countries.drop(columns=['Estimated Cases', 'Recovered', 'Active'])
df_countries = df_countries[df_countries['Confirmed']>=5000]
df_countries['Fatality Rate'] = df_countries['Deaths'] / df_countries['Confirmed'] * 100
df_countries = df_countries.sort_values(by='Fatality Rate', ascending=False)

In [8]:
print(df_countries)

                Confirmed  Deaths  Fatality Rate
Country_Region                                  
Italy              128948   15887           12.3
United Kingdom      48436    4943           10.2
Netherlands         17953    1771            9.9
Spain              131646   12641            9.6
France              93773    8093            8.6
Belgium             19691    1447            7.3
Iran                58226    3603            6.2
Sweden               6830     401            5.9
Brazil              11130     486            4.4
China               82602    3333            4.0
Switzerland         21100     715            3.4
US                 337072    9619            2.9
Portugal            11278     295            2.6
Turkey              27069     574            2.1
Korea, South        10237     183            1.8
Austria             12051     204            1.7
Canada              15756     259            1.6
Germany            100123    1584            1.6
Norway              

In [9]:
df_countries = df.groupby(['Country_Region']).sum()
df_countries = df_countries.drop(columns=['Confirmed', 'Estimated Cases', 'Active'])
df_countries = df_countries[df_countries['Recovered']>=1000]
df_countries['Recoveries per Death'] = df_countries['Recovered'] / df_countries['Deaths']
df_countries = df_countries.sort_values(by='Recoveries per Death', ascending=False)

In [10]:
print(df_countries)

                Deaths  Recovered  Recoveries per Death
Country_Region                                         
Korea, South       183       6463                  35.3
China             3333      77207                  23.2
Germany           1584      28700                  18.1
Malaysia            61       1005                  16.5
Austria            204       2998                  14.7
Canada             259       3012                  11.6
Switzerland        715       6415                   9.0
Denmark            179       1429                   8.0
Iran              3603      19736                   5.5
Spain            12641      38080                   3.0
Belgium           1447       3751                   2.6
France            8093      16349                   2.0
Turkey             574       1042                   1.8
US                9619      17448                   1.8
Italy            15887      21815                   1.4


In [11]:
df_us = df[df['Country_Region']=='US']

In [12]:
confirmed_pct = "{0:.2f}%".format(df_us['Confirmed'].sum()/331002651*100)
deaths_pct = "{0:.2f}%".format(df_us['Deaths'].sum()/df_us['Confirmed'].sum()*100)

In [13]:
est_confirmed_pct = "{0:.2f}%".format(df_us['Estimated Cases'].sum()/331002651*100)
est_deaths_pct = "{0:.2f}%".format(df_us['Deaths'].sum()/df_us['Estimated Cases'].sum()*100)

In [14]:
print('There have been %d confirmed cases of COVID-19 in the US.' % df_us['Confirmed'].sum())
print('There have been %d deaths from COVID-19 in the US.' % df_us['Deaths'].sum())
print('An American has a %s percent chance of having had a confirmed case of COVID-19.' % confirmed_pct)
print('An American has a %s percent chance of having died from a confirmed case of COVID-19.' % deaths_pct)

There have been 337072 confirmed cases of COVID-19 in the US.
There have been 9619 deaths from COVID-19 in the US.
An American has a 0.10% percent chance of having had a confirmed case of COVID-19.
An American has a 2.85% percent chance of having died from a confirmed case of COVID-19.


In [15]:
print('Experts estimate there have been %d cases of COVID-19 in the US.' % df_us['Estimated Cases'].sum())
print('With estimated numbers an American has a %s percent chance of having had COVID-19' % est_confirmed_pct)
print('The estimated numbers give us a fatality rate of %s in the US' % est_deaths_pct)

Experts estimate there have been 2808933 cases of COVID-19 in the US.
With estimated numbers an American has a 0.85% percent chance of having had COVID-19
The estimated numbers give us a fatality rate of 0.34% in the US


In [16]:
df_mn = df[df['Province_State']=='Minnesota']

In [17]:
confirmed_pct = "{0:.2f}%".format(df_mn['Confirmed'].sum()/5700000*100)
deaths_pct = "{0:.2f}%".format(df_mn['Deaths'].sum()/df_mn['Confirmed'].sum()*100)

In [18]:
est_confirmed_pct = "{0:.2f}%".format(df_mn['Estimated Cases'].sum()/5700000*100)
est_deaths_pct = "{0:.2f}%".format(df_mn['Deaths'].sum()/df_mn['Estimated Cases'].sum()*100)

In [19]:
print('There have been %d confirmed cases of COVID-19 in Minnesota.' % df_mn['Confirmed'].sum())
print('There have been %d deaths from COVID-19 in Minnesota.' % df_mn['Deaths'].sum())
print('A Minnesotan has a %s percent chance of having had a confirmed case of COVID-19.' % confirmed_pct)
print('A Minnesotan has a %s percent chance of having died from a confirmed case of COVID-19.' % deaths_pct)

There have been 935 confirmed cases of COVID-19 in Minnesota.
There have been 29 deaths from COVID-19 in Minnesota.
A Minnesotan has a 0.02% percent chance of having had a confirmed case of COVID-19.
A Minnesotan has a 3.10% percent chance of having died from a confirmed case of COVID-19.


In [20]:
print('Experts estimate there have been %d cases of COVID-19 in MN.' % df_mn['Estimated Cases'].sum())
print('With estimated numbers a Minnesotan has a %s percent chance of having had COVID-19' % est_confirmed_pct)
print('The estimated numbers give us a fatality rate of %s in MN' % est_deaths_pct)

Experts estimate there have been 7791 cases of COVID-19 in MN.
With estimated numbers a Minnesotan has a 0.14% percent chance of having had COVID-19
The estimated numbers give us a fatality rate of 0.37% in MN
