In [4]:
# Dependencies (storing and anaysis)
import numpy as np
import pandas as pd

# Dependencies (visualization)
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

In [7]:
# COVID-19 Data

# covid_19 dataset
covid = pd.read_csv('Resources/covid_2020.csv', parse_dates=['Date'])
print(covid)   

         Province/State  Country/Region      Lat      Long       Date  \
0                 Anhui  Mainland China  31.8257  117.2264 2020-01-22   
1               Beijing  Mainland China  40.1824  116.4142 2020-01-22   
2             Chongqing  Mainland China  30.0572  107.8740 2020-01-22   
3                Fujian  Mainland China  26.0789  117.9874 2020-01-22   
4                 Gansu  Mainland China  36.0611  103.8343 2020-01-22   
...                 ...             ...      ...       ...        ...   
9085  Ramsey County, MN              US  44.9964  -93.0616 2020-03-06   
9086  Washoe County, NV              US  40.5608 -119.6035 2020-03-06   
9087   Wayne County, PA              US  41.6739  -75.2479 2020-03-06   
9088    Yolo County, CA              US  38.7646 -121.9018 2020-03-06   
9089                NaN    Vatican City  41.9029   12.4534 2020-03-06   

      Confirmed  Deaths  Recovered  
0           1.0     0.0        0.0  
1          14.0     0.0        0.0  
2           

In [8]:
 # selecting only the columns we need
covid = covid[['Date', 'Country/Region', 'Confirmed', 'Deaths', 'Recovered']]
print(covid)

           Date  Country/Region  Confirmed  Deaths  Recovered
0    2020-01-22  Mainland China        1.0     0.0        0.0
1    2020-01-22  Mainland China       14.0     0.0        0.0
2    2020-01-22  Mainland China        6.0     0.0        0.0
3    2020-01-22  Mainland China        1.0     0.0        0.0
4    2020-01-22  Mainland China        0.0     0.0        0.0
...         ...             ...        ...     ...        ...
9085 2020-03-06              US        1.0     0.0        0.0
9086 2020-03-06              US        1.0     0.0        0.0
9087 2020-03-06              US        1.0     0.0        0.0
9088 2020-03-06              US        1.0     0.0        0.0
9089 2020-03-06    Vatican City        1.0     0.0        0.0

[9090 rows x 5 columns]


In [10]:
# renaming columns with slash
covid.columns = ['Date', 'Country', 'Cases', 'Deaths', 'Recovered']
print(covid)

           Date         Country  Cases  Deaths  Recovered
0    2020-01-22  Mainland China    1.0     0.0        0.0
1    2020-01-22  Mainland China   14.0     0.0        0.0
2    2020-01-22  Mainland China    6.0     0.0        0.0
3    2020-01-22  Mainland China    1.0     0.0        0.0
4    2020-01-22  Mainland China    0.0     0.0        0.0
...         ...             ...    ...     ...        ...
9085 2020-03-06              US    1.0     0.0        0.0
9086 2020-03-06              US    1.0     0.0        0.0
9087 2020-03-06              US    1.0     0.0        0.0
9088 2020-03-06              US    1.0     0.0        0.0
9089 2020-03-06    Vatican City    1.0     0.0        0.0

[9090 rows x 5 columns]


In [11]:
# group by date and country
covid = covid.groupby(['Date', 'Country'])['Cases', 'Deaths', 'Recovered']
covid = covid.sum().reset_index()
print(covid)

           Date               Country  Cases  Deaths  Recovered
0    2020-01-22           Afghanistan    0.0     0.0        0.0
1    2020-01-22               Algeria    0.0     0.0        0.0
2    2020-01-22               Andorra    0.0     0.0        0.0
3    2020-01-22             Argentina    0.0     0.0        0.0
4    2020-01-22               Armenia    0.0     0.0        0.0
...         ...                   ...    ...     ...        ...
4450 2020-03-06                    US  278.0    14.0        8.0
4451 2020-03-06               Ukraine    1.0     0.0        0.0
4452 2020-03-06  United Arab Emirates   29.0     0.0        5.0
4453 2020-03-06          Vatican City    1.0     0.0        0.0
4454 2020-03-06               Vietnam   16.0     0.0       16.0

[4455 rows x 5 columns]


In [12]:
# only countries with cases
covid_cases = covid[covid['Cases']>0]
print(covid_cases)

           Date               Country  Cases  Deaths  Recovered
44   2020-01-22                 Japan    2.0     0.0        0.0
52   2020-01-22                 Macau    1.0     0.0        0.0
53   2020-01-22        Mainland China  547.0    17.0       28.0
84   2020-01-22           South Korea    1.0     0.0        0.0
89   2020-01-22                Taiwan    1.0     0.0        0.0
...         ...                   ...    ...     ...        ...
4450 2020-03-06                    US  278.0    14.0        8.0
4451 2020-03-06               Ukraine    1.0     0.0        0.0
4452 2020-03-06  United Arab Emirates   29.0     0.0        5.0
4453 2020-03-06          Vatican City    1.0     0.0        0.0
4454 2020-03-06               Vietnam   16.0     0.0       16.0

[1640 rows x 5 columns]
