This notebook explores US covid-19 cases using [covid19py](https://pypi.org/project/COVID19Py/) package

In [1]:
#!pip install COVID19Py

In [2]:
import COVID19Py
import plotly.express as px

import pandas as pd
import numpy as np

### Lets explore the api

In [3]:
covid19 = COVID19Py.COVID19()

There are 3 ways to choose the source of the 
- csbs (data Conference of State Bank Supervisors)
- jhu (John Hopkins University)
- nyt (New York Times)

jhu data-source will be used as a default source if you don't specify a source parameter in your request.

In [4]:
covid19 = COVID19Py.COVID19(data_source="csbs")

In [5]:
# Quick summary
latest = covid19.getLatest()
latest

{'confirmed': 1471735, 'deaths': 87411, 'recovered': 0}

In [6]:
# Location based update (current numbers)
locations = covid19.getLocations()
locations[1]

{'id': 1,
 'country': 'US',
 'country_code': 'US',
 'country_population': 327167434,
 'province': 'New York',
 'county': 'Nassau',
 'last_updated': '2020-05-16T20:19:00Z',
 'coordinates': {'latitude': '40.74165225', 'longitude': '-73.58899619'},
 'latest': {'confirmed': 39033, 'deaths': 2034, 'recovered': 0}}

In [7]:
# longitudinal data
covid19 = COVID19Py.COVID19(data_source="jhu")
data = covid19.getLocationByCountryCode("US",timelines=True)

In [8]:
data

[{'id': 225,
  'country': 'US',
  'country_code': 'US',
  'country_population': 327167434,
  'province': '',
  'last_updated': '2020-05-17T02:00:28.248747Z',
  'coordinates': {'latitude': '37.0902', 'longitude': '-95.7129'},
  'latest': {'confirmed': 1442824, 'deaths': 87530, 'recovered': 0},
  'timelines': {'confirmed': {'latest': 1442824,
    'timeline': {'2020-01-22T00:00:00Z': 1,
     '2020-01-23T00:00:00Z': 1,
     '2020-01-24T00:00:00Z': 2,
     '2020-01-25T00:00:00Z': 2,
     '2020-01-26T00:00:00Z': 5,
     '2020-01-27T00:00:00Z': 5,
     '2020-01-28T00:00:00Z': 5,
     '2020-01-29T00:00:00Z': 5,
     '2020-01-30T00:00:00Z': 5,
     '2020-01-31T00:00:00Z': 7,
     '2020-02-01T00:00:00Z': 8,
     '2020-02-02T00:00:00Z': 8,
     '2020-02-03T00:00:00Z': 11,
     '2020-02-04T00:00:00Z': 11,
     '2020-02-05T00:00:00Z': 11,
     '2020-02-06T00:00:00Z': 11,
     '2020-02-07T00:00:00Z': 11,
     '2020-02-08T00:00:00Z': 11,
     '2020-02-09T00:00:00Z': 11,
     '2020-02-10T00:00:00Z': 1

### Let's get data from India

In [9]:
covid19 = COVID19Py.COVID19(data_source="jhu")
india_data = covid19.getLocationByCountryCode("IN",timelines=True)

In [10]:
india_data

[{'id': 131,
  'country': 'India',
  'country_code': 'IN',
  'country_population': 1352617328,
  'province': '',
  'last_updated': '2020-05-17T02:00:27.501009Z',
  'coordinates': {'latitude': '21.0', 'longitude': '78.0'},
  'latest': {'confirmed': 85784, 'deaths': 2753, 'recovered': 0},
  'timelines': {'confirmed': {'latest': 85784,
    'timeline': {'2020-01-22T00:00:00Z': 0,
     '2020-01-23T00:00:00Z': 0,
     '2020-01-24T00:00:00Z': 0,
     '2020-01-25T00:00:00Z': 0,
     '2020-01-26T00:00:00Z': 0,
     '2020-01-27T00:00:00Z': 0,
     '2020-01-28T00:00:00Z': 0,
     '2020-01-29T00:00:00Z': 0,
     '2020-01-30T00:00:00Z': 1,
     '2020-01-31T00:00:00Z': 1,
     '2020-02-01T00:00:00Z': 1,
     '2020-02-02T00:00:00Z': 2,
     '2020-02-03T00:00:00Z': 3,
     '2020-02-04T00:00:00Z': 3,
     '2020-02-05T00:00:00Z': 3,
     '2020-02-06T00:00:00Z': 3,
     '2020-02-07T00:00:00Z': 3,
     '2020-02-08T00:00:00Z': 3,
     '2020-02-09T00:00:00Z': 3,
     '2020-02-10T00:00:00Z': 3,
     '2020-02

In [11]:
# Longitudinal data of confirmed cases from India
india_data[0]['timelines']['confirmed']['timeline'].items()

dict_items([('2020-01-22T00:00:00Z', 0), ('2020-01-23T00:00:00Z', 0), ('2020-01-24T00:00:00Z', 0), ('2020-01-25T00:00:00Z', 0), ('2020-01-26T00:00:00Z', 0), ('2020-01-27T00:00:00Z', 0), ('2020-01-28T00:00:00Z', 0), ('2020-01-29T00:00:00Z', 0), ('2020-01-30T00:00:00Z', 1), ('2020-01-31T00:00:00Z', 1), ('2020-02-01T00:00:00Z', 1), ('2020-02-02T00:00:00Z', 2), ('2020-02-03T00:00:00Z', 3), ('2020-02-04T00:00:00Z', 3), ('2020-02-05T00:00:00Z', 3), ('2020-02-06T00:00:00Z', 3), ('2020-02-07T00:00:00Z', 3), ('2020-02-08T00:00:00Z', 3), ('2020-02-09T00:00:00Z', 3), ('2020-02-10T00:00:00Z', 3), ('2020-02-11T00:00:00Z', 3), ('2020-02-12T00:00:00Z', 3), ('2020-02-13T00:00:00Z', 3), ('2020-02-14T00:00:00Z', 3), ('2020-02-15T00:00:00Z', 3), ('2020-02-16T00:00:00Z', 3), ('2020-02-17T00:00:00Z', 3), ('2020-02-18T00:00:00Z', 3), ('2020-02-19T00:00:00Z', 3), ('2020-02-20T00:00:00Z', 3), ('2020-02-21T00:00:00Z', 3), ('2020-02-22T00:00:00Z', 3), ('2020-02-23T00:00:00Z', 3), ('2020-02-24T00:00:00Z', 3), ('

In [12]:
df = pd.DataFrame.from_dict(india_data[0]['timelines']['confirmed']['timeline'].items())
df.tail()

Unnamed: 0,0,1
110,2020-05-11T00:00:00Z,70768
111,2020-05-12T00:00:00Z,74292
112,2020-05-13T00:00:00Z,78055
113,2020-05-14T00:00:00Z,81997
114,2020-05-15T00:00:00Z,85784


^ So we have data for last 115 days. Let' create a formatted dataframe

In [13]:
covid19 = COVID19Py.COVID19(data_source="jhu")
india_data = covid19.getLocationByCountryCode("IN",timelines=True)
# dict to df
df = pd.DataFrame.from_dict(india_data[0]['timelines']['confirmed']['timeline'].items())
# rename
mapping = {df.columns[0]:'datetime', df.columns[1]: 'confirmed_cases'}
df = df.rename(columns=mapping)
# get date column
df['date'] = pd.to_datetime(df['datetime'])
df['date'] = df['date'].dt.date
india_confirmed = df
india_confirmed['country'] = 'India'
india_confirmed['percent_change'] = 100*india_confirmed.confirmed_cases.pct_change()
india_confirmed['percent_change'][0] = 0.00000
# reverse the df
india_confirmed = india_confirmed.reindex(index=india_confirmed.index[::-1])
india_confirmed.head()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,datetime,confirmed_cases,date,country,percent_change
114,2020-05-15T00:00:00Z,85784,2020-05-15,India,4.618462
113,2020-05-14T00:00:00Z,81997,2020-05-14,India,5.050285
112,2020-05-13T00:00:00Z,78055,2020-05-13,India,5.065148
111,2020-05-12T00:00:00Z,74292,2020-05-12,India,4.979652
110,2020-05-11T00:00:00Z,70768,2020-05-11,India,5.370676


In [14]:
# lets get a T - day column 
india_confirmed.reset_index(drop=True,inplace=True)
india_confirmed['day'] = india_confirmed.index + 1
india_confirmed.head()

Unnamed: 0,datetime,confirmed_cases,date,country,percent_change,day
0,2020-05-15T00:00:00Z,85784,2020-05-15,India,4.618462,1
1,2020-05-14T00:00:00Z,81997,2020-05-14,India,5.050285,2
2,2020-05-13T00:00:00Z,78055,2020-05-13,India,5.065148,3
3,2020-05-12T00:00:00Z,74292,2020-05-12,India,4.979652,4
4,2020-05-11T00:00:00Z,70768,2020-05-11,India,5.370676,5


### Let's get data from USA

In [15]:
covid19 = COVID19Py.COVID19(data_source="jhu")
us_data = covid19.getLocationByCountryCode("US",timelines=True)
# dict to df
df = pd.DataFrame.from_dict(us_data[0]['timelines']['confirmed']['timeline'].items())
# rename
mapping = {df.columns[0]:'datetime', df.columns[1]: 'confirmed_cases'}
df = df.rename(columns=mapping)
# get date column
df['date'] = pd.to_datetime(df['datetime'])
df['date'] = df['date'].dt.date
us_confirmed = df
us_confirmed['country'] = 'US'
us_confirmed['percent_change'] = 100*us_confirmed.confirmed_cases.pct_change()
us_confirmed['percent_change'][0] = 0.00000
# reverse the df
us_confirmed = us_confirmed.reindex(index=us_confirmed.index[::-1])
us_confirmed.head()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,datetime,confirmed_cases,date,country,percent_change
114,2020-05-15T00:00:00Z,1442824,2020-05-15,US,1.766854
113,2020-05-14T00:00:00Z,1417774,2020-05-14,US,1.968346
112,2020-05-13T00:00:00Z,1390406,2020-05-13,US,1.535736
111,2020-05-12T00:00:00Z,1369376,2020-05-12,US,1.594725
110,2020-05-11T00:00:00Z,1347881,2020-05-11,US,1.400855


In [16]:
us_confirmed.reset_index(drop=True,inplace=True)
us_confirmed['day'] = us_confirmed.index + 1
us_confirmed.head()

Unnamed: 0,datetime,confirmed_cases,date,country,percent_change,day
0,2020-05-15T00:00:00Z,1442824,2020-05-15,US,1.766854,1
1,2020-05-14T00:00:00Z,1417774,2020-05-14,US,1.968346,2
2,2020-05-13T00:00:00Z,1390406,2020-05-13,US,1.535736,3
3,2020-05-12T00:00:00Z,1369376,2020-05-12,US,1.594725,4
4,2020-05-11T00:00:00Z,1347881,2020-05-11,US,1.400855,5


In [22]:
# Last 30 days
df = pd.concat([us_confirmed.iloc[:30,],india_confirmed.iloc[:30,]],ignore_index=True)
fig = px.scatter(df, x="day", y="confirmed_cases", color="country", trendline="ols")
fig['layout']['yaxis']['autorange'] = "reversed"
fig.show()

results = px.get_trendline_results(fig)
print(results)

  country                                     px_fit_results
0      US  <statsmodels.regression.linear_model.Regressio...
1   India  <statsmodels.regression.linear_model.Regressio...


In [24]:
# Last 10 days
df = pd.concat([us_confirmed.iloc[:10,],india_confirmed.iloc[:10,]],ignore_index=True)
fig = px.scatter(df, x="day", y="confirmed_cases", color="country", trendline="ols")
fig['layout']['yaxis']['autorange'] = "reversed"
fig.show()

results = px.get_trendline_results(fig)
print(results)

  country                                     px_fit_results
0      US  <statsmodels.regression.linear_model.Regressio...
1   India  <statsmodels.regression.linear_model.Regressio...


In [28]:
# Last 30 days - Percent Change
df = pd.concat([us_confirmed.iloc[:30,],india_confirmed.iloc[:30,]],ignore_index=True)
fig = px.scatter(df, x="day", y="percent_change", color="country", trendline="ols")
fig['layout']['xaxis']['autorange'] = "reversed"
fig['layout']['yaxis']['autorange'] = "reversed"
fig.show()

#results = px.get_trendline_results(fig)
#print(results)

In [29]:
india_confirmed.head()

Unnamed: 0,datetime,confirmed_cases,date,country,percent_change,day
0,2020-05-15T00:00:00Z,85784,2020-05-15,India,4.618462,1
1,2020-05-14T00:00:00Z,81997,2020-05-14,India,5.050285,2
2,2020-05-13T00:00:00Z,78055,2020-05-13,India,5.065148,3
3,2020-05-12T00:00:00Z,74292,2020-05-12,India,4.979652,4
4,2020-05-11T00:00:00Z,70768,2020-05-11,India,5.370676,5


In [30]:
us_confirmed.head()

Unnamed: 0,datetime,confirmed_cases,date,country,percent_change,day
0,2020-05-15T00:00:00Z,1442824,2020-05-15,US,1.766854,1
1,2020-05-14T00:00:00Z,1417774,2020-05-14,US,1.968346,2
2,2020-05-13T00:00:00Z,1390406,2020-05-13,US,1.535736,3
3,2020-05-12T00:00:00Z,1369376,2020-05-12,US,1.594725,4
4,2020-05-11T00:00:00Z,1347881,2020-05-11,US,1.400855,5


In [37]:
# US - Percent Change
df = us_confirmed
fig = px.scatter(df, x="day", y="percent_change", color="country", trendline="ols")
fig['layout']['xaxis']['autorange'] = "reversed"
fig.show()

In [40]:
# India - Percent Change
df = india_confirmed
fig = px.scatter(df, x="day", y="percent_change", color="country", trendline="ols")
fig['layout']['xaxis']['autorange'] = "reversed"
fig.update_layout(yaxis_type="log")
fig.show()