In [None]:
'''
    Tutorial from https://nbviewer.jupyter.org/github/goldmansachs/gs-quant/blob/master/gs_quant/tutorials/covid/Comparing%2C%20Reconciling%2C%20and%20Combining%20COVID-19%20Data%20Sources.ipynb
    to experiment with using GS Marquee API.
'''

In [1]:
import requests
import json

In [2]:
client_id = r''

In [3]:
client_secret = r''

In [4]:
auth_data = {
    'grant_type': 'client_credentials',
    'client_id': client_id,
    'client_secret': client_secret,
    'scope': 'read_content read_financial_data read_product_data read_user_profile'
}

In [5]:
# Create sessin instance
session = requests.Session()

In [6]:
# Make a POST to retrieve access_token
access_url = 'https://idfs.gs.com/as/token.oauth2'
auth_request = session.post(access_url, data = auth_data)
access_token_dict = json.loads(auth_request.text)
access_token = access_token_dict['access_token']

In [7]:
# Update session headers
session.headers.update({'Authorization' : 'Bearer ' + access_token})

In [8]:
# Test API connectivity
request_url = 'https://api.marquee.gs.com/v1/users/self'
request = session.get(url = request_url)
#print(request.text)

In [9]:
# User authentication
from gs_quant.session import GsSession, Environment

GsSession.use(client_id=client_id, client_secret=client_secret, scopes=('read_product_data',))

In [10]:
# Get COVID-19 Data
from gs_quant.data import Dataset
import datetime

def get_datasets(datasets):
    ds_dict = {}
    for dataset in datasets:
        try:
            df = Dataset(dataset).get_data(datetime.date(2019, 12, 31), datetime.datetime.today().date())

            keys = [x for x in ['countryId', 'subdivisionId'] if x in df.columns] + ['date']
            val_map = {'newConfirmed': 'totalConfirmed', 'newFatalities': 'totalFatalities'}
            vals = [x for x in list(val_map.keys()) if x in df.columns]

            df_t = df.groupby(keys).sum().groupby(level=0).cumsum().reset_index()[keys + vals].rename(columns=val_map)
            ds_dict[dataset] = df.reset_index().merge(df_t, on=keys, suffixes=('', '_y')).set_index('date')

        except Exception as err:
            print(f'Failed to obtain {dataset} with {getattr(err,"message",repr(err))}')
    return ds_dict

In [11]:
country_datasets = [
    'COVID19_COUNTRY_DAILY_ECDC',
    'COVID19_COUNTRY_DAILY_WHO',
    'COVID19_COUNTRY_DAILY_WIKI',
    'COVID19_COUNTRY_DAILY_CDC'   
]
df = get_datasets(country_datasets)

Failed to obtain COVID19_COUNTRY_DAILY_WHO with ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='api.gs.com', port=443): Read timed out. (read timeout=65)"))
Failed to obtain COVID19_COUNTRY_DAILY_CDC with {"statusCode":400,"reasonPhrase":"Bad Request","title":"Error","messages":["Missing condition: countryId","Request: 27705f4c-6217061"]}


In [167]:
from datetime import date
# Initialize dataset
who_dataset = Dataset('COVID19_COUNTRY_DAILY_WHO')
dataframe = who_dataset.get_data(start_date = date(2020,5,15))
#dataframe = who_dataset.get_data(countryId = 'US', start = date(2019, 1, 1))

In [117]:
dataframe

Unnamed: 0_level_0,countryId,countryName,totalConfirmed,newConfirmed,totalFatalities,newFatalities,daysSinceReported,regionName,updateTime
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-06-01,CN,CN,84588.0,18.0,4645.0,0.0,0.0,WPRO,2020-07-24 14:36:35.237
2020-06-02,CN,CN,84597.0,9.0,4645.0,0.0,0.0,WPRO,2020-07-24 14:36:35.237
2020-06-03,CN,CN,84602.0,5.0,4645.0,0.0,0.0,WPRO,2020-07-24 14:36:35.237
2020-06-04,CN,CN,84603.0,1.0,4645.0,0.0,0.0,WPRO,2020-07-24 14:36:35.237
2020-06-05,CN,CN,84614.0,11.0,4645.0,0.0,0.0,WPRO,2020-07-24 14:36:35.237
...,...,...,...,...,...,...,...,...,...
2020-07-20,CL,CL,330930.0,2084.0,8503.0,58.0,0.0,AMRO,2020-07-24 14:36:35.237
2020-07-21,CL,CL,333029.0,2099.0,8633.0,130.0,0.0,AMRO,2020-07-24 14:36:35.237
2020-07-22,CL,CL,334683.0,1654.0,8677.0,44.0,0.0,AMRO,2020-07-24 14:36:35.237
2020-07-23,CL,CL,336402.0,1719.0,8722.0,45.0,0.0,AMRO,2020-07-24 14:36:35.237


In [None]:
!pip install pandas numpy scipy plotly


In [14]:
import plotly.express as px

In [118]:
print(dataframe.query("countryName == 'CN'"))

           countryId countryName  totalConfirmed  newConfirmed  \
date                                                             
2020-06-01        CN          CN         84588.0          18.0   
2020-06-02        CN          CN         84597.0           9.0   
2020-06-03        CN          CN         84602.0           5.0   
2020-06-04        CN          CN         84603.0           1.0   
2020-06-05        CN          CN         84614.0          11.0   
2020-06-06        CN          CN         84620.0           6.0   
2020-06-07        CN          CN         84629.0           9.0   
2020-06-08        CN          CN         84634.0           5.0   
2020-06-09        CN          CN         84638.0           4.0   
2020-06-10        CN          CN         84641.0           3.0   
2020-06-11        CN          CN         84652.0          11.0   
2020-06-12        CN          CN         84659.0           7.0   
2020-06-13        CN          CN         84671.0          12.0   
2020-06-14

2020-07-24 2020-07-24 14:36:35.237  


In [165]:
fig = px.scatter(dataframe, x="countryId", y="totalConfirmed",
                size="totalConfirmed", color="countryId", 
                 hover_name="countryName",
                 log_y=True, size_max=50,
                )
# Add title
fig.update_layout(title_text="Total Confirmed Cases vs New Confirmed Cases")
#print(fig)
fig.show()

In [None]:
!pip install chart_studio

In [80]:
import plotly.graph_objects as go

In [121]:
# Read data fields
print(dataframe.columns)

print()
# Read data records
print(dataframe.index)

Index(['countryId', 'countryName', 'totalConfirmed', 'newConfirmed',
       'totalFatalities', 'newFatalities', 'daysSinceReported', 'regionName',
       'updateTime'],
      dtype='object')

DatetimeIndex(['2020-06-01', '2020-06-02', '2020-06-03', '2020-06-04',
               '2020-06-05', '2020-06-06', '2020-06-07', '2020-06-08',
               '2020-06-09', '2020-06-10',
               ...
               '2020-07-15', '2020-07-16', '2020-07-17', '2020-07-18',
               '2020-07-19', '2020-07-20', '2020-07-21', '2020-07-22',
               '2020-07-23', '2020-07-24'],
              dtype='datetime64[ns]', name='date', length=11610, freq=None)


In [122]:
data_dict = dataframe.to_dict()

In [123]:
data = dataframe.get("countryId")
print(data)
print(type(data))

date
2020-06-01    CN
2020-06-02    CN
2020-06-03    CN
2020-06-04    CN
2020-06-05    CN
              ..
2020-07-20    CL
2020-07-21    CL
2020-07-22    CL
2020-07-23    CL
2020-07-24    CL
Name: countryId, Length: 11610, dtype: object
<class 'pandas.core.series.Series'>


In [124]:
data = dataframe.reset_index()
data

Unnamed: 0,date,countryId,countryName,totalConfirmed,newConfirmed,totalFatalities,newFatalities,daysSinceReported,regionName,updateTime
0,2020-06-01,CN,CN,84588.0,18.0,4645.0,0.0,0.0,WPRO,2020-07-24 14:36:35.237
1,2020-06-02,CN,CN,84597.0,9.0,4645.0,0.0,0.0,WPRO,2020-07-24 14:36:35.237
2,2020-06-03,CN,CN,84602.0,5.0,4645.0,0.0,0.0,WPRO,2020-07-24 14:36:35.237
3,2020-06-04,CN,CN,84603.0,1.0,4645.0,0.0,0.0,WPRO,2020-07-24 14:36:35.237
4,2020-06-05,CN,CN,84614.0,11.0,4645.0,0.0,0.0,WPRO,2020-07-24 14:36:35.237
...,...,...,...,...,...,...,...,...,...,...
11605,2020-07-20,CL,CL,330930.0,2084.0,8503.0,58.0,0.0,AMRO,2020-07-24 14:36:35.237
11606,2020-07-21,CL,CL,333029.0,2099.0,8633.0,130.0,0.0,AMRO,2020-07-24 14:36:35.237
11607,2020-07-22,CL,CL,334683.0,1654.0,8677.0,44.0,0.0,AMRO,2020-07-24 14:36:35.237
11608,2020-07-23,CL,CL,336402.0,1719.0,8722.0,45.0,0.0,AMRO,2020-07-24 14:36:35.237


In [157]:
fig = px.line(data.query("countryId == 'CD'"), x="date", y="newConfirmed")

# Add title
fig.update_layout(title_text="New Confirmed Cases")

# Add slider
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list(
            [
                dict(count=1, label="1 month", step="month", stepmode="backward"),
                dict(count=6, label="6 month", step="month", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=1, label="1 year", step="year", stepmode="backward"),
                dict(step="all")
            ]
        )
    )
)

fig.show()

In [143]:
fig = px.line(data.query("countryId == 'CD'"), x="date", y=["totalConfirmed", "newConfirmed"])
# Add title
fig.update_layout(title_text="Total Confirmed Cases vs New Confirmed Cases")

# Add slider
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list(
            [
                dict(count=1, label="1 month", step="month", stepmode="backward"),
                dict(count=6, label="6 month", step="month", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=1, label="1 year", step="year", stepmode="backward"),
                dict(step="all")
            ]
        )
    )
)


# Result: Show graph
fig.show()