In [1]:
import numpy as np
import pandas as pd

In [2]:
raw_data = pd.read_csv('./Call Option Data/fqjo3s8eacwzxkcw.csv')

In [3]:
# Choose Amazon as the underlying stock
df = raw_data.loc[(raw_data.ticker=='AMZN')&(raw_data.cp_flag=='C'), 
                  ['date','exdate','strike_price','best_bid','best_offer','volume']].copy()
df['strike_price'] = df['strike_price']/10000 # scaling strike
df['option_price'] = df[['best_bid','best_offer']].mean(axis=1)
df.head()

Unnamed: 0,date,exdate,strike_price,best_bid,best_offer,volume,option_price
0,2018-01-02,2018-01-05,100.0,189.35,190.6,0,189.975
1,2018-01-02,2018-01-05,100.25,186.85,188.1,0,187.475
2,2018-01-02,2018-01-05,100.5,184.35,185.6,0,184.975
3,2018-01-02,2018-01-05,100.75,181.85,183.1,0,182.475
4,2018-01-02,2018-01-05,101.0,179.35,180.6,0,179.975


In [4]:
# Assume t0 = 2018-01-02
df.loc[df.date=='2018-01-02','exdate'].unique()

array(['2018-01-05', '2018-01-12', '2018-01-19', '2018-01-26',
       '2018-02-02', '2018-02-09', '2018-02-16', '2018-03-16',
       '2018-04-20', '2018-06-15', '2018-07-20', '2018-09-21',
       '2019-01-18', '2019-06-21', '2020-01-17'], dtype=object)

In [5]:
# Choose t1 = '2018-01-19', t2 = '2018-04-20' (arbitrarily,temporarily)

t1 = '2018-01-19'
t2 = '2018-04-20'

df_t1 = df.loc[df.exdate==t1, ['date','exdate','strike_price','volume','option_price']]
df_t2 = df.loc[df.exdate==t2, ['date','exdate','strike_price','volume','option_price']]

In [6]:
# t0 to proceed with
t0List = list(df_t1.date.unique())

print(len(t0List))
print(t0List) # should t0 be included when t0=t1? 

13
['2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05', '2018-01-08', '2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12', '2018-01-16', '2018-01-17', '2018-01-18', '2018-01-19']


In [7]:
result = pd.DataFrame()

for t0 in t0List:
    # t1: 20 largest trading volume
    df_t0t1 = df_t1[df_t1.date==t0].sort_values('volume',ascending=False).iloc[0:20].drop(columns='volume').reset_index(drop=True)
    # t2: 20 largest trading volume
    df_t0t2 = df_t2[df_t2.date==t0].sort_values('volume',ascending=False).iloc[0:20].drop(columns='volume').reset_index(drop=True)
    # concat horizontally
    tmp = pd.concat([df_t0t1,df_t0t2.drop(columns='date')],axis=1)
    # concat vertically
    result = pd.concat([result,tmp],axis=0)
    
result.columns = ['t0','t1','K1','pi1','t2','K2','pi2']
result = result.reset_index(drop=True)
print(result.shape)
result

(260, 7)


Unnamed: 0,t0,t1,K1,pi1,t2,K2,pi2
0,2018-01-02,2018-01-19,120.00,13.525,2018-04-20,120.0,61.050
1,2018-01-02,2018-01-19,110.00,92.525,2018-04-20,118.0,71.275
2,2018-01-02,2018-01-19,100.00,191.525,2018-04-20,112.0,108.400
3,2018-01-02,2018-01-19,117.50,27.125,2018-04-20,116.0,82.500
4,2018-01-02,2018-01-19,117.00,30.450,2018-04-20,150.0,3.675
...,...,...,...,...,...,...,...
255,2018-01-19,2018-01-19,132.25,0.015,2018-04-20,168.0,3.020
256,2018-01-19,2018-01-19,128.50,8.225,2018-04-20,172.0,2.305
257,2018-01-19,2018-01-19,128.00,12.825,2018-04-20,136.0,44.825
258,2018-01-19,2018-01-19,123.00,63.175,2018-04-20,124.0,103.650


In [12]:
result.to_csv('data_20180119_20180420.csv', header=True)

In [8]:
# K = [[np.array(result.loc[result.t0 == t0,'K1']),np.array(result.loc[result.t0==t0,'K2'])] for t0 in t0List]
# Pi = [[np.array(result.loc[result.t0 == t0,'pi1']),np.array(result.loc[result.t0==t0,'pi2'])] for t0 in t0List]