In [1]:
import numpy as np
import pandas as pd
from arbitragerepair import constraints, repair

In [2]:
raw_data = pd.read_csv('./Call Option Data/fqjo3s8eacwzxkcw.csv')

In [3]:
# Choose Amazon as the underlying stock
df = raw_data.loc[raw_data.ticker=='AMZN', 
                  ['date','exdate','cp_flag','strike_price','best_bid','best_offer','volume']].copy()
df['date'] = pd.to_datetime(df['date'])
df['exdate'] = pd.to_datetime(df['exdate'])
df['strike_price'] = df['strike_price']/10000 # scaling strike
df['option_price'] = df[['best_bid','best_offer']].mean(axis=1)
df = df.drop(columns = ['best_bid','best_offer'])
df.head()

Unnamed: 0,date,exdate,cp_flag,strike_price,volume,option_price
0,2018-01-02,2018-01-05,C,100.0,0,189.975
1,2018-01-02,2018-01-05,C,100.25,0,187.475
2,2018-01-02,2018-01-05,C,100.5,0,184.975
3,2018-01-02,2018-01-05,C,100.75,0,182.475
4,2018-01-02,2018-01-05,C,101.0,0,179.975


In [4]:
# Assume t0 = 2018-01-02
df.loc[df.date=='2018-01-02','exdate'].unique()

array(['2018-01-05T00:00:00.000000000', '2018-01-12T00:00:00.000000000',
       '2018-01-19T00:00:00.000000000', '2018-01-26T00:00:00.000000000',
       '2018-02-02T00:00:00.000000000', '2018-02-09T00:00:00.000000000',
       '2018-02-16T00:00:00.000000000', '2018-03-16T00:00:00.000000000',
       '2018-04-20T00:00:00.000000000', '2018-06-15T00:00:00.000000000',
       '2018-07-20T00:00:00.000000000', '2018-09-21T00:00:00.000000000',
       '2019-01-18T00:00:00.000000000', '2019-06-21T00:00:00.000000000',
       '2020-01-17T00:00:00.000000000'], dtype='datetime64[ns]')

In [5]:
# Choose t1 = '2018-01-19', t2 = '2018-04-20' (arbitrarily,temporarily)

t1 = '2018-01-19'
t2 = '2018-04-20'

df_t1 = df.loc[(df.exdate==t1)&(df.cp_flag=='C'), ['date','exdate','strike_price','volume','option_price']]
df_t2 = df.loc[(df.exdate==t2)&(df.cp_flag=='C'), ['date','exdate','strike_price','volume','option_price']]

# put options data
df_t1_P = df.loc[(df.exdate==t1)&(df.cp_flag=='P'), ['date','exdate','strike_price','option_price']]
df_t2_P = df.loc[(df.exdate==t2)&(df.cp_flag=='P'), ['date','exdate','strike_price','option_price']]

In [6]:
# t0 to proceed with
t0List = list(df_t1.date.unique())

print(len(t0List))
print(t0List) # should t0 be included when t0=t1? 

13
[numpy.datetime64('2018-01-02T00:00:00.000000000'), numpy.datetime64('2018-01-03T00:00:00.000000000'), numpy.datetime64('2018-01-04T00:00:00.000000000'), numpy.datetime64('2018-01-05T00:00:00.000000000'), numpy.datetime64('2018-01-08T00:00:00.000000000'), numpy.datetime64('2018-01-09T00:00:00.000000000'), numpy.datetime64('2018-01-10T00:00:00.000000000'), numpy.datetime64('2018-01-11T00:00:00.000000000'), numpy.datetime64('2018-01-12T00:00:00.000000000'), numpy.datetime64('2018-01-16T00:00:00.000000000'), numpy.datetime64('2018-01-17T00:00:00.000000000'), numpy.datetime64('2018-01-18T00:00:00.000000000'), numpy.datetime64('2018-01-19T00:00:00.000000000')]


In [7]:
# stock prices data
stock = pd.read_csv('./stocks.csv')
stock.rename(columns = {stock.columns[0] : 'date'}, inplace = True)
stock['date'] = pd.to_datetime(stock['date'], format="%d/%m/%y")

# AMZN stock prices
stock_AMZN = stock[['date','AMZN']].copy()
stock_AMZN.rename(columns = {'AMZN' : 'stock_price'}, inplace = True)

In [9]:
# Arbitrage repair function
def arbitrageRepair(columnT, columnK, columnC, columnF):
    # normalise strikes and call prices
    normaliser = constraints.Normalise()
    T = np.array(columnT)
    K = np.array(columnK)
    C = np.array(columnC)
    F = np.array(columnF)
    normaliser.fit(T, K, C, F)
    T1, K1, C1 = normaliser.transform(T, K, C)
    
    # construct arbitrage constraints and detect violation
    mat_A, vec_b, _, _ = constraints.detect(T1, K1, C1, verbose=False)
    
    # repair arbitrage - l1-norm objective
    epsilon = repair.l1(mat_A, vec_b, C1)
    
    # de-normalise
    K0, C0 = normaliser.inverse_transform(K1, C1 + epsilon)
    
    return K0, C0

In [10]:
result = pd.DataFrame()

for t0 in t0List:
    ## T1:
    # t1: 20 largest trading volume
    df_t0t1 = df_t1[df_t1.date==t0].sort_values('volume',ascending=False).iloc[0:20].drop(columns='volume').reset_index(drop=True)
    
    # merge with put options data and stock price data
    df_t0t1 = df_t0t1.merge(df_t1_P, on = ['date','exdate','strike_price'], suffixes = ('_C','_P'))
    df_t0t1 = pd.merge(df_t0t1, stock_AMZN, on = 'date')
    
    # calculate the forward price:
    # df_t0t1['forward_price'] = df_t0t1['stock_price']/((df_t0t1['stock_price']-(df_t0t1['option_price_C']-df_t0t1['option_price_P']))/df_t0t1['strike_price'])
    df_t0t1['forward_price'] = df_t0t1['stock_price']
    
    # calculate the expiry:
    df_t0t1['expiry'] = (df_t0t1['exdate']-df_t0t1['date']).apply(lambda x: x.days/365)
    
    # arbitrage repair:
    K0_t1, C0_t1 = arbitrageRepair(df_t0t1['expiry'], df_t0t1['strike_price'], df_t0t1['option_price_C'], df_t0t1['forward_price'])
    
    # temporary result at t0:
    result_t1 = df_t0t1[['date','exdate']].copy()
    result_t1['strike_price'] = K0_t1
    result_t1['option_price'] = C0_t1
    
    
    ## T2:
    # t2: 20 largest trading volume
    df_t0t2 = df_t2[df_t2.date==t0].sort_values('volume',ascending=False).iloc[0:20].drop(columns='volume').reset_index(drop=True)
    
    # merge with put options data and stock price data
    df_t0t2 = pd.merge(df_t0t2, df_t2_P, on = ['date','exdate','strike_price'], suffixes = ('_C','_P'))
    df_t0t2 = pd.merge(df_t0t2, stock_AMZN, on = 'date')
    
    # calculate the forward price:
    # df_t0t2['forward_price'] = df_t0t2['stock_price']/((df_t0t2['stock_price']-(df_t0t2['option_price_C']-df_t0t2['option_price_P']))/df_t0t2['strike_price'])
    df_t0t2['forward_price'] = df_t0t2['stock_price']
    
    # calculate the expiry:
    df_t0t2['expiry'] = (df_t0t2['exdate']-df_t0t2['date']).apply(lambda x: x.days/365)
    
    # arbitrage repair:
    K0_t2, C0_t2 = arbitrageRepair(df_t0t2['expiry'], df_t0t2['strike_price'], df_t0t2['option_price_C'], df_t0t2['forward_price'])
    
    # temporary result at t0:
    result_t2 = df_t0t2[['date','exdate']].copy()
    result_t2['strike_price'] = K0_t2
    result_t2['option_price'] = C0_t2
    
    
    ## Concat horizontally
    result_tmp = pd.concat([result_t1, result_t2.drop(columns='date')], axis=1)
    
    ## Concat vertically
    result = pd.concat([result,result_tmp], axis=0)
    
result.columns = ['t0','t1','K1','pi1','t2','K2','pi2']
result = result.reset_index(drop=True)

  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['ra

GLPK Simplex Optimizer, v4.65
81 rows, 40 columns, 234 non-zeros
      0: obj =   0.000000000e+00 inf =   2.742e+01 (4)
     27: obj =   1.211816853e+01 inf =   0.000e+00 (0)
*    46: obj =   6.979755069e+00 inf =   0.000e+00 (0)
OPTIMAL LP SOLUTION FOUND
GLPK Simplex Optimizer, v4.65
81 rows, 40 columns, 234 non-zeros
      0: obj =   0.000000000e+00 inf =   3.594e+01 (2)
     31: obj =   2.203394421e+01 inf =   1.243e-14 (0)
*    41: obj =   2.194731813e+01 inf =   0.000e+00 (0)
OPTIMAL LP SOLUTION FOUND
GLPK Simplex Optimizer, v4.65
81 rows, 40 columns, 234 non-zeros
      0: obj =   0.000000000e+00 inf =   1.945e+01 (4)
     28: obj =   9.642018560e+00 inf =   0.000e+00 (0)
*    47: obj =   4.967933438e+00 inf =   0.000e+00 (0)
OPTIMAL LP SOLUTION FOUND
GLPK Simplex Optimizer, v4.65
81 rows, 40 columns, 234 non-zeros
      0: obj =   0.000000000e+00 inf =   2.574e+01 (3)
     34: obj =   1.979870493e+01 inf =   7.327e-13 (0)
*    42: obj =   1.979870493e+01 inf =   7.327e-13 (0)
OP

  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['raw_index'].values.astype(int)
  raw_index = df['ra

In [11]:
print(result.shape)
result.head()

(260, 7)


Unnamed: 0,t0,t1,K1,pi1,t2,K2,pi2
0,2018-01-02,2018-01-19,100.0,23.936454,2018-04-20,80.0,49.40179
1,2018-01-02,2018-01-19,110.0,20.385049,2018-04-20,101.5,46.7012
2,2018-01-02,2018-01-19,115.0,18.609347,2018-04-20,102.5,46.575591
3,2018-01-02,2018-01-19,116.0,18.254206,2018-04-20,105.0,46.261569
4,2018-01-02,2018-01-19,117.0,17.899066,2018-04-20,108.0,45.884742


In [15]:
# result.to_csv('data_20180119_20180420.csv', header=True)

In [12]:
K = [[np.array(result.loc[result.t0 == t0,'K1']),np.array(result.loc[result.t0==t0,'K2'])] for t0 in t0List]
Pi = [[np.array(result.loc[result.t0 == t0,'pi1']),np.array(result.loc[result.t0==t0,'pi2'])] for t0 in t0List]