In [1]:
# Import WRDS library
import wrds
import pandas as pd

## Establish connection to WRDS database

We initiate the database connection and assign it as a variable ```db```.<br>
Here, we need to key in our username and password.<br>
One can create a .pgpass file, but this is not required.

In [2]:
# Establish live connection; requires user login (passwords will be masked)
db = wrds.Connection()

Enter your WRDS username [leeca]:cadenlee
Enter your password:········
WRDS recommends setting up a .pgpass file.
Create .pgpass file now [y/n]?: n
You can create this file yourself at any time
with the create_pgpass_file() function.
Loading library list...
Done


## Query libraries available on WRDS

In [3]:
# Show list of available libraries
lib = db.list_libraries()

In [4]:
print(f'WRDS contains {len(lib)} number of libraries.')

WRDS contains 301 number of libraries.


In [5]:
# Show list of available libraries
lib = db.list_libraries()
om = [x for x in lib if 'option' in x]
om

['optionm',
 'optionm_all',
 'optionmsamp_europe',
 'optionmsamp_us',
 'wrdsapps_link_crsp_optionm']

In [6]:
lib

['aha_sample',
 'audit',
 'audit_acct_os',
 'audit_common',
 'audit_corp_legal',
 'audit_europe',
 'auditsmp',
 'auditsmp_all',
 'bank',
 'bank_all',
 'block',
 'block_all',
 'boardex_trial',
 'boardsmp',
 'bvd',
 'bvd_amadeus_trial',
 'bvd_orbis_large',
 'bvd_orbis_medium',
 'bvd_orbis_small',
 'bvdsamp',
 'calcbench_trial',
 'calcbnch',
 'cboe',
 'cboe_all',
 'ciq',
 'ciq_common',
 'ciq_pplintel',
 'ciq_ratings',
 'ciqsamp',
 'ciqsamp_common',
 'ciqsamp_keydev',
 'ciqsamp_transcripts',
 'columnar',
 'comp',
 'comp_bank',
 'comp_bank_daily',
 'comp_execucomp',
 'comp_global',
 'comp_global_daily',
 'comp_na_annual_all',
 'comp_na_daily_all',
 'comp_na_monthly_all',
 'comp_ratings',
 'comp_segments_hist',
 'comp_segments_hist_daily',
 'compa',
 'compb',
 'compg',
 'compm',
 'compsamp_snapshot',
 'compseg',
 'comscore',
 'comscore_2018',
 'comscore_2019',
 'comscore_2020',
 'comscore_common',
 'contrib',
 'contrib_as_filed_financials',
 'contrib_ceo_turnover',
 'contrib_char_returns',
 

## Query the tables within a given library

Here, we shall explore in a bit more depth what data tables are available in the ```optionm_all``` library.

In [7]:
# Enquire existing available data tables
tables = db.list_tables(library='optionm_all')
print('There exists {} tables within the optionm_all library.'.format(len(tables)))
tables[0:5]

There exists 208 tables within the optionm_all library.


['distrd',
 'distrprojd1996',
 'distrprojd1997',
 'distrprojd1998',
 'distrprojd1999']

# Inspect given data tables

Here, we are interested in the following library.table:
1. cboe_all.cboe: to obtain VIX index data
2. optionm_all.secprd: to obtain S&P500 index data
3. optionm_all.opvold: to obtain S&P500 option data


In [8]:
db.describe_table(library = 'cboe_all', table = 'cboe' )

Approximately 13013 rows in cboe_all.cboe.


Unnamed: 0,name,nullable,type,comment
0,date,True,DATE,
1,vixo,True,DOUBLE_PRECISION,
2,vixh,True,DOUBLE_PRECISION,
3,vixl,True,DOUBLE_PRECISION,
4,vix,True,DOUBLE_PRECISION,
5,vxoo,True,DOUBLE_PRECISION,
6,vxoh,True,DOUBLE_PRECISION,
7,vxol,True,DOUBLE_PRECISION,
8,vxo,True,DOUBLE_PRECISION,
9,vxno,True,DOUBLE_PRECISION,


The ```cboe_all.cboe``` table contains the OHLC data for multiple volatility indices, including VIX, VNasdaq, VDJIA.

In [9]:
db.describe_table(library='optionm_all', table='secprd')

Approximately 59470480 rows in optionm_all.secprd.


Unnamed: 0,name,nullable,type,comment
0,secid,True,DOUBLE_PRECISION,
1,date,True,DATE,
2,low,True,DOUBLE_PRECISION,
3,high,True,DOUBLE_PRECISION,
4,close,True,DOUBLE_PRECISION,
5,volume,True,DOUBLE_PRECISION,
6,return,True,DOUBLE_PRECISION,
7,cfadj,True,DOUBLE_PRECISION,
8,open,True,DOUBLE_PRECISION,
9,cfret,True,DOUBLE_PRECISION,


for the ```optionm_all.secprd``` table, the key information are: ```secid```, ```date```, ```low```, ```high```, ```close```, ```open```, ```volume```.

In [10]:
db.describe_table(library = 'optionm_all', table = 'opvold')

Approximately 70234824 rows in optionm_all.opvold.


Unnamed: 0,name,nullable,type,comment
0,secid,True,DOUBLE_PRECISION,
1,date,True,DATE,
2,cp_flag,True,VARCHAR(1),
3,volume,True,DOUBLE_PRECISION,
4,open_interest,True,DOUBLE_PRECISION,


## Data extraction using SQL query on WRDS

The ```optionm_all.opvold``` table consists of the option volume and open interest, split by ```cp_flag```.

In [11]:
# Get VIX
# library = 'cboe_all', table = 'cboe' 
vix_fn = "../SQL/get_vix.sql"

# Get S&P 500 Index
# S&P 500 index price:  library='optionm_all', table='secprd'
spx_fn = "../SQL/get_spx.sql"

# Get S&P 500 Call, Put options volume and flag
# volume: library = optionm_all, table = opvold
opvol_fn = "../SQL/get_option_volume.sql"



In [12]:
fd = open(spx_fn, 'r')
sqlFile = fd.read()
fd.close()

print(sqlFile)

/*
Link to reference document: https://wrds-www.wharton.upenn.edu/documents/1504/IvyDB_US_v5.4_Reference_Manual.pdf
Requires logging into WRDS
*/

SELECT
    A.date AS trading_date,
    A.open AS open_price,
    A.high AS high_price,
    A.low AS low_price,
    A.close AS close_price
    
FROM optionm_all.secprd A
WHERE A.secid = 108105 /*security id of the S&P 500 index*/
AND A.date BETWEEN TO_DATE('2013-03-01', 'YYYY-MM-DD') AND TO_DATE('2023-02-28', 'YYYY-MM-DD')



In [13]:
def read_sql_script(fname, db, date_cols = ['trading_date']):
    fd = open(fname, 'r')
    sqlFile = fd.read()
    fd.close()

    df = db.raw_sql(sqlFile, date_cols=date_cols)

    return df

In [14]:
df_vix = read_sql_script(vix_fn, db)
print('VIX data extracted')

df_vix.head()

VIX data extracted


Unnamed: 0,trading_date,open_price,high_price,low_price,close_price
0,2013-03-01,16.1,16.82,15.14,15.36
1,2013-03-04,16.16,16.16,14.01,14.01
2,2013-03-05,13.44,13.66,13.24,13.48
3,2013-03-06,13.18,13.77,13.17,13.53
4,2013-03-07,13.45,13.56,13.03,13.06


In [15]:
df_spx = read_sql_script(spx_fn, db)
print('SPX data extracted')

df_spx.head()

SPX data extracted


Unnamed: 0,trading_date,open_price,high_price,low_price,close_price
0,2013-03-01,1514.68,1519.99,1501.48,1518.2
1,2013-03-04,1518.2,1525.27,1512.29,1525.2
2,2013-03-05,1525.2,1543.47,1525.2,1539.79
3,2013-03-06,1539.79,1545.25,1538.11,1541.46
4,2013-03-07,1541.46,1545.78,1541.46,1544.26


In [16]:
df_opvol = read_sql_script(opvol_fn, db)
print('option volume data extracted')

df_opvol.head()

option volume data extracted


Unnamed: 0,trading_date,call_or_put,open_interest,volume
0,2013-03-01,C,4458724.0,280745.0
1,2013-03-01,P,7870731.0,559167.0
2,2013-03-04,C,4325376.0,235395.0
3,2013-03-04,P,7517321.0,515620.0
4,2013-03-05,C,4391488.0,617249.0
