In [1]:
import pandas as pd
from datetime import datetime, timedelta
pd.options.mode.chained_assignment = None  # default='warn'
pd.options.display.float_format = '{:.1f}'.format

In [2]:
date_today = (datetime.today() - timedelta(days=1)).strftime("%m-%d-%Y")

In [3]:
url = f'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{date_today}.csv'

In [4]:
df = pd.read_csv(url, error_bad_lines=False)
df = df.drop(columns=['Lat', 'Long_', 'FIPS'])

In [5]:
# Estimate the % of cases being reported
# FiveThirtyEight has experts estimating around 9-12% of cases are being reported.
percent_estimate = 0.12

In [6]:
df['Estimated Cases'] = df['Confirmed'] * (1/percent_estimate)

In [7]:
df_countries = df.groupby(['Country_Region']).sum()
df_countries = df_countries.drop(columns=['Estimated Cases', 'Recovered', 'Active'])
df_countries = df_countries[df_countries['Confirmed']>=10000]
df_countries['Fatality Rate'] = df_countries['Deaths'] / df_countries['Confirmed'] * 100
df_countries = df_countries.sort_values(by='Fatality Rate', ascending=False)

In [8]:
print(df_countries)

                Confirmed  Deaths  Fatality Rate
Country_Region                                  
Belgium             42797    6490           15.2
France             159460   21889           13.7
United Kingdom     139246   18791           13.5
Italy              189973   25549           13.4
Sweden              16755    2021           12.1
Netherlands         35921    4192           11.7
Spain              213024   22157           10.4
Mexico              11633    1069            9.2
Brazil              50036    3331            6.7
Iran                87026    5481            6.3
US                 869170   49954            5.7
China               83884    4636            5.5
Switzerland         28496    1549            5.4
Romania             10096     545            5.4
Canada              43286    2241            5.2
Ecuador             11183     560            5.0
Ireland             17607     794            4.5
Poland              10511     454            4.3
Portugal            

In [9]:
df_countries = df.groupby(['Country_Region']).sum()
df_countries = df_countries.drop(columns=['Confirmed', 'Estimated Cases', 'Active'])
df_countries = df_countries[df_countries['Recovered']>=5000]
df_countries['Recoveries per Death'] = df_countries['Recovered'] / df_countries['Deaths']
df_countries = df_countries.sort_values(by='Recoveries per Death', ascending=False)

In [10]:
print(df_countries)

                Deaths  Recovered  Recoveries per Death
Country_Region                                         
Korea, South       240       8501                  35.4
Chile              168       5804                  34.5
Israel             192       5611                  29.2
Austria            522      11694                  22.4
Germany           5575     103300                  18.5
China             4636      77983                  16.8
Denmark            394       5573                  14.1
Switzerland       1549      20600                  13.3
Peru               572       7422                  13.0
Iran              5481      64843                  11.8
Ireland            794       9233                  11.6
Brazil            3331      26573                   8.0
Turkey            2491      18491                   7.4
India              721       5012                   7.0
Canada            2241      14761                   6.6
Spain            22157      89250               

In [11]:
df_us = df[df['Country_Region']=='US']

In [12]:
confirmed_pct = "{0:.2f}%".format(df_us['Confirmed'].sum()/331002651*100)
deaths_pct = "{0:.2f}%".format(df_us['Deaths'].sum()/df_us['Confirmed'].sum()*100)

In [13]:
est_confirmed_pct = "{0:.2f}%".format(df_us['Estimated Cases'].sum()/331002651*100)
est_deaths_pct = "{0:.2f}%".format(df_us['Deaths'].sum()/df_us['Estimated Cases'].sum()*100)

In [14]:
print('There have been %d confirmed cases of COVID-19 in the US.' % df_us['Confirmed'].sum())
print('There have been %d deaths from COVID-19 in the US.' % df_us['Deaths'].sum())
print('An American has a %s percent chance of having had a confirmed case of COVID-19.' % confirmed_pct)
print('An American has a %s percent chance of having died from a confirmed case of COVID-19.' % deaths_pct)

There have been 869170 confirmed cases of COVID-19 in the US.
There have been 49954 deaths from COVID-19 in the US.
An American has a 0.26% percent chance of having had a confirmed case of COVID-19.
An American has a 5.75% percent chance of having died from a confirmed case of COVID-19.


In [15]:
print('Experts estimate there have been %d cases of COVID-19 in the US.' % df_us['Estimated Cases'].sum())
print('With estimated numbers an American has a %s percent chance of having had COVID-19' % est_confirmed_pct)
print('The estimated numbers give us a fatality rate of %s in the US' % est_deaths_pct)

Experts estimate there have been 7243083 cases of COVID-19 in the US.
With estimated numbers an American has a 2.19% percent chance of having had COVID-19
The estimated numbers give us a fatality rate of 0.69% in the US


In [16]:
df_mn = df[df['Province_State']=='Minnesota']

In [17]:
confirmed_pct = "{0:.2f}%".format(df_mn['Confirmed'].sum()/5700000*100)
deaths_pct = "{0:.2f}%".format(df_mn['Deaths'].sum()/df_mn['Confirmed'].sum()*100)

In [18]:
est_confirmed_pct = "{0:.2f}%".format(df_mn['Estimated Cases'].sum()/5700000*100)
est_deaths_pct = "{0:.2f}%".format(df_mn['Deaths'].sum()/df_mn['Estimated Cases'].sum()*100)

In [19]:
print('There have been %d confirmed cases of COVID-19 in Minnesota.' % df_mn['Confirmed'].sum())
print('There have been %d deaths from COVID-19 in Minnesota.' % df_mn['Deaths'].sum())
print('A Minnesotan has a %s percent chance of having had a confirmed case of COVID-19.' % confirmed_pct)
print('A Minnesotan has a %s percent chance of having died from a confirmed case of COVID-19.' % deaths_pct)

There have been 2942 confirmed cases of COVID-19 in Minnesota.
There have been 200 deaths from COVID-19 in Minnesota.
A Minnesotan has a 0.05% percent chance of having had a confirmed case of COVID-19.
A Minnesotan has a 6.80% percent chance of having died from a confirmed case of COVID-19.


In [20]:
print('Experts estimate there have been %d cases of COVID-19 in MN.' % df_mn['Estimated Cases'].sum())
print('With estimated numbers a Minnesotan has a %s percent chance of having had COVID-19' % est_confirmed_pct)
print('The estimated numbers give us a fatality rate of %s in MN' % est_deaths_pct)

Experts estimate there have been 24516 cases of COVID-19 in MN.
With estimated numbers a Minnesotan has a 0.43% percent chance of having had COVID-19
The estimated numbers give us a fatality rate of 0.82% in MN
