In [1]:
import pandas as pd
from datetime import datetime, timedelta
pd.options.mode.chained_assignment = None  # default='warn'
pd.options.display.float_format = '{:.1f}'.format

In [2]:
date_today = (datetime.today() - timedelta(days=1)).strftime("%m-%d-%Y")

In [3]:
url = f'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{date_today}.csv'

In [4]:
df = pd.read_csv(url, error_bad_lines=False)
df = df.drop(columns=['Lat', 'Long_', 'FIPS'])

In [5]:
# Estimate the % of cases being reported
# FiveThirtyEight has experts estimating around 9-12% of cases are being reported.
percent_estimate = 0.12

In [6]:
df['Estimated Cases'] = df['Confirmed'] * (1/percent_estimate)

In [7]:
df_countries = df.groupby(['Country_Region']).sum()
df_countries = df_countries.drop(columns=['Estimated Cases', 'Recovered', 'Active'])
df_countries = df_countries[df_countries['Confirmed']>=5000]
df_countries['Fatality Rate'] = df_countries['Deaths'] / df_countries['Confirmed'] * 100
df_countries = df_countries.sort_values(by='Fatality Rate', ascending=False)

In [8]:
print(df_countries)

                Confirmed  Deaths  Fatality Rate
Country_Region                                  
Italy              124632   15362           12.3
United Kingdom      42477    4320           10.2
Netherlands         16727    1656            9.9
Spain              126168   11947            9.5
France              90848    7574            8.3
Belgium             18431    1283            7.0
Iran                55743    3452            6.2
Sweden               6443     373            5.8
Brazil              10360     445            4.3
China               82543    3330            4.0
Switzerland         20505     666            3.2
US                 308850    8407            2.7
Portugal            10524     266            2.5
Turkey              23934     501            2.1
Korea, South        10156     177            1.7
Canada              12978     218            1.7
Austria             11781     186            1.6
Germany             96092    1444            1.5
Norway              

In [9]:
df_countries = df.groupby(['Country_Region']).sum()
df_countries = df_countries.drop(columns=['Confirmed', 'Estimated Cases', 'Active'])
df_countries = df_countries[df_countries['Recovered']>=1000]
df_countries['Recoveries per Death'] = df_countries['Recovered'] / df_countries['Deaths']
df_countries = df_countries.sort_values(by='Recoveries per Death', ascending=False)

In [10]:
print(df_countries)

                Deaths  Recovered  Recoveries per Death
Country_Region                                         
Korea, South       177       6325                  35.7
China             3330      76946                  23.1
Germany           1444      26400                  18.3
Austria            186       2507                  13.5
Canada             218       2577                  11.8
Switzerland        666       6415                   9.6
Denmark            161       1379                   8.6
Iran              3452      19736                   5.7
Spain            11947      34219                   2.9
Belgium           1283       3247                   2.5
France            7574      15572                   2.1
US                8407      14652                   1.7
Italy            15362      20996                   1.4


In [11]:
df_us = df[df['Country_Region']=='US']

In [12]:
confirmed_pct = "{0:.2f}%".format(df_us['Confirmed'].sum()/331002651*100)
deaths_pct = "{0:.2f}%".format(df_us['Deaths'].sum()/df_us['Confirmed'].sum()*100)

In [13]:
est_confirmed_pct = "{0:.2f}%".format(df_us['Estimated Cases'].sum()/331002651*100)
est_deaths_pct = "{0:.2f}%".format(df_us['Deaths'].sum()/df_us['Estimated Cases'].sum()*100)

In [14]:
print('There have been %d confirmed cases of COVID-19 in the US.' % df_us['Confirmed'].sum())
print('There have been %d deaths from COVID-19 in the US.' % df_us['Deaths'].sum())
print('An American has a %s percent chance of having had a confirmed case of COVID-19.' % confirmed_pct)
print('An American has a %s percent chance of having died from a confirmed case of COVID-19.' % deaths_pct)

There have been 308850 confirmed cases of COVID-19 in the US.
There have been 8407 deaths from COVID-19 in the US.
An American has a 0.09% percent chance of having had a confirmed case of COVID-19.
An American has a 2.72% percent chance of having died from a confirmed case of COVID-19.


In [15]:
print('Experts estimate there have been %d cases of COVID-19 in the US.' % df_us['Estimated Cases'].sum())
print('With estimated numbers an American has a %s percent chance of having had COVID-19' % est_confirmed_pct)
print('The estimated numbers give us a fatality rate of %s in the US' % est_deaths_pct)

Experts estimate there have been 2573750 cases of COVID-19 in the US.
With estimated numbers an American has a 0.78% percent chance of having had COVID-19
The estimated numbers give us a fatality rate of 0.33% in the US


In [16]:
df_mn = df[df['Province_State']=='Minnesota']

In [17]:
confirmed_pct = "{0:.2f}%".format(df_mn['Confirmed'].sum()/5700000*100)
deaths_pct = "{0:.2f}%".format(df_mn['Deaths'].sum()/df_mn['Confirmed'].sum()*100)

In [18]:
est_confirmed_pct = "{0:.2f}%".format(df_mn['Estimated Cases'].sum()/5700000*100)
est_deaths_pct = "{0:.2f}%".format(df_mn['Deaths'].sum()/df_mn['Estimated Cases'].sum()*100)

In [19]:
print('There have been %d confirmed cases of COVID-19 in Minnesota.' % df_mn['Confirmed'].sum())
print('There have been %d deaths from COVID-19 in Minnesota.' % df_mn['Deaths'].sum())
print('A Minnesotan has a %s percent chance of having had a confirmed case of COVID-19.' % confirmed_pct)
print('A Minnesotan has a %s percent chance of having died from a confirmed case of COVID-19.' % deaths_pct)

There have been 865 confirmed cases of COVID-19 in Minnesota.
There have been 24 deaths from COVID-19 in Minnesota.
A Minnesotan has a 0.02% percent chance of having had a confirmed case of COVID-19.
A Minnesotan has a 2.77% percent chance of having died from a confirmed case of COVID-19.


In [20]:
print('Experts estimate there have been %d cases of COVID-19 in MN.' % df_mn['Estimated Cases'].sum())
print('With estimated numbers a Minnesotan has a %s percent chance of having had COVID-19' % est_confirmed_pct)
print('The estimated numbers give us a fatality rate of %s in MN' % est_deaths_pct)

Experts estimate there have been 7208 cases of COVID-19 in MN.
With estimated numbers a Minnesotan has a 0.13% percent chance of having had COVID-19
The estimated numbers give us a fatality rate of 0.33% in MN
