In [1]:
import pandas as pd
from datetime import datetime, timedelta
pd.options.mode.chained_assignment = None  # default='warn'
pd.options.display.float_format = '{:.1f}'.format

In [2]:
date_today = (datetime.today() - timedelta(days=1)).strftime("%m-%d-%Y")

In [3]:
url = f'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{date_today}.csv'

In [4]:
df = pd.read_csv(url, error_bad_lines=False)
df = df.drop(columns=['Lat', 'Long_', 'FIPS'])

In [5]:
# Estimate the % of cases being reported
# FiveThirtyEight has experts estimating around 9-12% of cases are being reported.
percent_estimate = 0.12

In [6]:
df['Estimated Cases'] = df['Confirmed'] * (1/percent_estimate)

In [7]:
df_countries = df.groupby(['Country_Region']).sum()
df_countries = df_countries.drop(columns=['Estimated Cases', 'Recovered', 'Active'])
df_countries = df_countries[df_countries['Confirmed']>=10000]
df_countries['Fatality Rate'] = df_countries['Deaths'] / df_countries['Confirmed'] * 100
df_countries = df_countries.sort_values(by='Fatality Rate', ascending=False)

In [8]:
print(df_countries)

                Confirmed  Deaths  Fatality Rate
Country_Region                                  
Belgium             40956    5998           14.6
Italy              183957   24648           13.4
United Kingdom     130172   17378           13.4
France             159297   20829           13.1
Sweden              15322    1765           11.5
Netherlands         34317    3929           11.4
Spain              204178   21282           10.4
Brazil              43079    2741            6.4
Iran                84802    5297            6.2
China               83853    4636            5.5
US                 823786   44845            5.4
Switzerland         28063    1478            5.3
Ecuador             10398     520            5.0
Canada              39402    1909            4.8
Ireland             16040     730            4.6
Portugal            21379     762            3.6
Germany            148291    5033            3.4
Austria             14873     491            3.3
India               

In [9]:
df_countries = df.groupby(['Country_Region']).sum()
df_countries = df_countries.drop(columns=['Confirmed', 'Estimated Cases', 'Active'])
df_countries = df_countries[df_countries['Recovered']>=5000]
df_countries['Recoveries per Death'] = df_countries['Recovered'] / df_countries['Deaths']
df_countries = df_countries.sort_values(by='Recoveries per Death', ascending=False)

In [10]:
print(df_countries)

                Deaths  Recovered  Recoveries per Death
Country_Region                                         
Korea, South       237       8213                  34.7
Austria            491      10971                  22.3
Germany           5033      95200                  18.9
China             4636      77799                  16.8
Peru               484       6982                  14.4
Switzerland       1478      19400                  13.1
Ireland            730       9233                  12.6
Iran              5297      60965                  11.5
Brazil            2741      22991                   8.4
Canada            1909      13188                   6.9
Turkey            2259      14918                   6.6
Spain            21282      82514                   3.9
Italy            24648      51600                   2.1
France           20829      39819                   1.9
US               44845      75204                   1.7
Belgium           5998       9002               

In [11]:
df_us = df[df['Country_Region']=='US']

In [12]:
confirmed_pct = "{0:.2f}%".format(df_us['Confirmed'].sum()/331002651*100)
deaths_pct = "{0:.2f}%".format(df_us['Deaths'].sum()/df_us['Confirmed'].sum()*100)

In [13]:
est_confirmed_pct = "{0:.2f}%".format(df_us['Estimated Cases'].sum()/331002651*100)
est_deaths_pct = "{0:.2f}%".format(df_us['Deaths'].sum()/df_us['Estimated Cases'].sum()*100)

In [14]:
print('There have been %d confirmed cases of COVID-19 in the US.' % df_us['Confirmed'].sum())
print('There have been %d deaths from COVID-19 in the US.' % df_us['Deaths'].sum())
print('An American has a %s percent chance of having had a confirmed case of COVID-19.' % confirmed_pct)
print('An American has a %s percent chance of having died from a confirmed case of COVID-19.' % deaths_pct)

There have been 823786 confirmed cases of COVID-19 in the US.
There have been 44845 deaths from COVID-19 in the US.
An American has a 0.25% percent chance of having had a confirmed case of COVID-19.
An American has a 5.44% percent chance of having died from a confirmed case of COVID-19.


In [15]:
print('Experts estimate there have been %d cases of COVID-19 in the US.' % df_us['Estimated Cases'].sum())
print('With estimated numbers an American has a %s percent chance of having had COVID-19' % est_confirmed_pct)
print('The estimated numbers give us a fatality rate of %s in the US' % est_deaths_pct)

Experts estimate there have been 6864883 cases of COVID-19 in the US.
With estimated numbers an American has a 2.07% percent chance of having had COVID-19
The estimated numbers give us a fatality rate of 0.65% in the US


In [16]:
df_mn = df[df['Province_State']=='Minnesota']

In [17]:
confirmed_pct = "{0:.2f}%".format(df_mn['Confirmed'].sum()/5700000*100)
deaths_pct = "{0:.2f}%".format(df_mn['Deaths'].sum()/df_mn['Confirmed'].sum()*100)

In [18]:
est_confirmed_pct = "{0:.2f}%".format(df_mn['Estimated Cases'].sum()/5700000*100)
est_deaths_pct = "{0:.2f}%".format(df_mn['Deaths'].sum()/df_mn['Estimated Cases'].sum()*100)

In [19]:
print('There have been %d confirmed cases of COVID-19 in Minnesota.' % df_mn['Confirmed'].sum())
print('There have been %d deaths from COVID-19 in Minnesota.' % df_mn['Deaths'].sum())
print('A Minnesotan has a %s percent chance of having had a confirmed case of COVID-19.' % confirmed_pct)
print('A Minnesotan has a %s percent chance of having died from a confirmed case of COVID-19.' % deaths_pct)

There have been 2567 confirmed cases of COVID-19 in Minnesota.
There have been 160 deaths from COVID-19 in Minnesota.
A Minnesotan has a 0.05% percent chance of having had a confirmed case of COVID-19.
A Minnesotan has a 6.23% percent chance of having died from a confirmed case of COVID-19.


In [20]:
print('Experts estimate there have been %d cases of COVID-19 in MN.' % df_mn['Estimated Cases'].sum())
print('With estimated numbers a Minnesotan has a %s percent chance of having had COVID-19' % est_confirmed_pct)
print('The estimated numbers give us a fatality rate of %s in MN' % est_deaths_pct)

Experts estimate there have been 21391 cases of COVID-19 in MN.
With estimated numbers a Minnesotan has a 0.38% percent chance of having had COVID-19
The estimated numbers give us a fatality rate of 0.75% in MN
