We try to predict the next update (tick) arrival time.
We want to include shocks on the underlying and trade size in our model

The standard ACD Model looks like this :
$\psi(i) = \alpha_0 + \sum_{0<j=<p} \alpha_j x_{i-j} + \sum_{0=<j=<q} \beta_j \psi_{i-j}$

We can try to compare the performance with the following model :

Let's keep it for calls :
With K:Strike, F:Underlying

Moneyness : $ \omega  =  K/F$   

Shocks on the underlying : $ \lambda(F) = Time Weighted TradingVolumePastXTrades  $

Shocks on the Option : $ \lambda(O) =  Time Weighted TradingVolumePastXTrades $

dt : weighted time since volumes by volume

$\psi(i) = \alpha_0 + \sum_{1=<j=<p} \alpha_j log(x_{i-j}) + \sum_{0=<j=<q} \beta_j \psi_{i-j} + \sum_{0=<j=<s} \gamma_j \omega_{i-j} \lambda (O)_{i-j}   + \sum_{0=<j=<r} \delta_j \omega_{i-j} \lambda (U)_{i-j}$

As proposed by Bauwens and Giot (2000) we can take the logs to avoid constraints on the variables (keep stationarity)

In [1]:
import json
from pprint import pprint
import scipy.optimize as optimize
import pandas as pd
import numpy as np
from datetime import datetime
from enum import Enum
import sys
import timeit

In [2]:
def getBookContract(aStringNumberContract, aTypeOption):
    aContract = ''
    if aStringNumberContract == "000" and aTypeOption == "1":
        aContract = CONTRACTTYPE.FUTURE #replace with enums
    elif aTypeOption == "2":
        aContract = CONTRACTTYPE.CALL 
    elif aTypeOption == "3":
        aContract = CONTRACTTYPE.PUT 
    else:
        print("error, Contract Type" + str(aStringNumberContract) + " , " + str(aTypeOption) + " not recognized")
    
    return aContract

class CONTRACTTYPE(Enum):
    UNSET = 0
    FUTURE = 1
    CALL = 2
    PUT = 3

In [3]:
data = json.load(open('feeddata_10.json'))

# Typical ACD Model on Futures:

In [4]:

def residualTotalEACD(params_, pqs_):
    check1 = []
    check2 = []
    
    #variables of Interest:
    errs = []
    psis = []
    xs = []
    
    p = pqs_[0]
    q = pqs_[1]
    
    alpha0 = params_[0]
    alphas = params_[1:p+1]
    betas = params_[p+1:q+p+1]    
    
    initialized = False
    first = True
    for idx, tick in enumerate(data):
    
        myBook = tick["book"]
           
        if myBook[5] == '1' and  tick['type']=='tick':   #let's forget about mini futures
            myContract = getBookContract(myBook[8:11],myBook[3])
    
            if myContract is CONTRACTTYPE.FUTURE:
                if initialized and not first :

                    #calculate alphas and betas parts
                    alphaSum = 0.0
                    betaSum = 0.0            
                    for idx2,alpha in enumerate(alphas): 
                        #print(xs)
                        alphaSum = alphaSum + alpha*xs[idx2] #doesn't matter if sum in reverse order, just means that alphas are in reverse order too
                    for idx2,beta in enumerate(betas):
                        #print(psis)
                        if len(psis)>idx2:
                            betaSum = betaSum + beta*psis[idx2]

                    expectedTime = alpha0 + alphaSum +  betaSum #expected time (psi)
                    psis.append(expectedTime)
                    xs.append((datetime.utcfromtimestamp(tick["created"]/1000000) - myPastTime).microseconds/1000)


                    errs.append((expectedTime - xs[p])*(expectedTime - xs[p]))
                    check1.append(expectedTime)
                    check2.append(xs[p])

                elif first:
                    first = False
                else:
                    xs.append((datetime.utcfromtimestamp(tick["created"]/1000000) - myPastTime).microseconds/1000)
                    if len(xs) == p:
                        initialized = True

                myPastTime = datetime.utcfromtimestamp(tick["created"]/1000000)

                #maintain right array size
                if len(xs)>p:
                    xs.pop(0)
                if len(psis)>q:
                    psis.pop(0)  

                if idx > 2000 :
                    break
    #return errs,check1,check2
    return np.average(errs)

In [5]:
# if 'xs' in globals() or 'psis' in globals():
#     sys.stderr.write('xs or/and psis variables already exist')
#else:


#parameters:
alpha0 = 0.0
alphas = [1,1,1,1,1,1]
betas = [1,1,1,1]

params0 = [alpha0] + alphas + betas
#errs,check1,check2 = residualTotalEACD(params, len(alphas), len(betas))


# LSE

In [21]:
def constraint1(x):
    return -np.sum(x)+len(x)



cons = (
            {'type': 'ineq',
             'fun' : constraint1
            }
          ,
              {'type': 'ineq',
               'fun' : lambda x: x
              }
    
        )


In [7]:

optimize.minimize(residualTotalEACD,params0,args=([len(alphas), len(betas)]), method='COBYLA', constraints=cons, options={'maxiter':10000})

     fun: 1987.1733927191476
   maxcv: 1.0135269667632516e-19
 message: 'Optimization terminated successfully.'
    nfev: 8687
  status: 1
 success: True
       x: array([  1.34372277e+00,  -9.46518187e-20,  -7.80014531e-21,
        -9.89682264e-20,  -9.51935771e-20,  -1.00768977e-19,
         2.65618311e-01,   3.67002166e-01,   7.79119699e-22,
         2.93608638e-01,  -1.01352697e-19])

In [8]:
# we just check what we would get with random variables
paramsTest = [1,1,1]

for idx,param in enumerate(paramsTest):
    paramsTest[idx] = paramsTest[idx]*(np.random.rand()*5)
print(residualTotalEACD(paramsTest, [len(alphas), len(betas)]))
print(paramsTest)

29779.4724982
[1.9482149313223558, 1.4148608211989293, 2.5403746265998093]


In [9]:
params0

[0.0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [10]:
params0[len(alphas)+1:len(alphas)+len(betas)+1]

[1, 1, 1, 1]

# New Model :

In [4]:
#dataExtraction to save calculation during optimization
#only taking the strike of interest and the future
#precalculating tick times

def extractInterestingData(nbOfDataPoints_, aOptionStrike):
    myTicksOfInterest = []
    myCounter = 0
    for idx, tick in enumerate(data):
        myBook =tick["book"]
        if myBook[5] == '1':   #it as to be a MAXI
            if tick['type']=='lastdone' and getBookContract(myBook[8:11],myBook[3]) is CONTRACTTYPE.FUTURE: 
                myTicksOfInterest.append({'contractType' : CONTRACTTYPE.FUTURE,
                                     'type' : 'lastdone',
                                     'volume' : tick['volume'],
                                     'received': datetime.utcfromtimestamp(tick['received']/1000000),
                                     'created' : datetime.utcfromtimestamp(tick['created']/1000000)
                                    })
                
            elif tick['type']=='lastdone' and getBookContract(myBook[8:11],myBook[3]) is CONTRACTTYPE.CALL and myBook[8:11]==str(aOptionStrike):
                    myTicksOfInterest.append({'contractType' : CONTRACTTYPE.CALL,
                                     'type' : 'lastdone',
                                     'volume' : tick['volume'],
                                     'received': datetime.utcfromtimestamp(tick['received']/1000000),
                                     'created' : datetime.utcfromtimestamp(tick['created']/1000000)
                                    })
                    
            elif tick['type']=='tick' and getBookContract(myBook[8:11],myBook[3]) is CONTRACTTYPE.CALL and myBook[8:11]==str(aOptionStrike):
                    myCounter = myCounter + 1
                    myTicksOfInterest.append({'contractType' : CONTRACTTYPE.CALL,
                                     'type' : 'tick',
                                     'received': datetime.utcfromtimestamp(tick['received']/1000000),
                                     'created' : datetime.utcfromtimestamp(tick['created']/1000000)
                                    })
                    
        if myCounter >= nbOfDataPoints_:
            break
    return myTicksOfInterest


In [None]:

@deprecated
# def residualTotalModACD(params_, pqs_):
#     check1 = []
#     check2 = []
    
#     #variables of Interest:
#     errs = []
#     psis = []
#     xs = []
#     volumeO = []
#     volumeF = []
#     TO = []
#     TF = []
    
    
#     p = pqs_[0]
#     q = pqs_[1]
#     r = pqs_[2]
#     s = pqs_[3]
    
#     alpha0 = params_[0]
#     alphas = params_[1:p+1]
#     betas = params_[p+1:q+p+1]
#     gammas =  params_[q+p+1:q+p+1+r]
#     deltas = params_[q+p+r+1:q+p+r+s+1]
    
#     initialized = False
#     first = True
#     counter = 0
#     for idx, tick in enumerate(myDataForRun):
        
#         myBook = tick["book"]
# #         print(myBook[8:11])
#         if myBook[5] == '1' and  tick['type']=='lastdone' and getBookContract(myBook[8:11],myBook[3]) is CONTRACTTYPE.FUTURE: 
#             volumeF.append(tick['volume'])
#             TF.append(datetime.utcfromtimestamp(tick['received']/1000000))
#         if myBook[5] == '1' and  tick['type']=='lastdone' and getBookContract(myBook[8:11],myBook[3]) is CONTRACTTYPE.CALL and myBook[8:11]=='335': 
#             volumeO.append(tick['volume'])
#             TO.append(datetime.utcfromtimestamp(tick['received']/1000000))
        
#         if myBook[5] == '1' and  tick['type']=='tick':   #let's forget about mini futures
#             myContract = getBookContract(myBook[8:11],myBook[3])
            
            
#             if myContract is CONTRACTTYPE.CALL and myBook[8:11]=='335': #let's take a single option strike to see
# #                 print('passed')
#                 if len(xs)>p:
#                     xs.pop(0)
#                 if len(psis)>q:
#                     psis.pop(0)  
#                 if len(volumeO)>r:
#                     volumeO.pop(0)
#                     TO.pop(0)
#                 if len(volumeF)>s:
#                     volumeF.pop(0)  
#                     TF.pop(0)
                
#                 if initialized and not first :
# #                     print(counter)
#                     counter = counter + 1
#                     #calculate alphas and betas parts
#                     alphaSum = 0.0
#                     betaSum = 0.0
#                     gammaSum = 0.0 
#                     deltaSum = 0.0
#                     for idx2,alpha in enumerate(alphas): 
#                         #print(xs)
#                         alphaSum = alphaSum + alpha*xs[idx2] #doesn't matter if sum in reverse order, just means that alphas are in reverse order too
#                     for idx2,beta in enumerate(betas):
#                         #print(psis)
#                         if len(psis)>idx2:
#                             betaSum = betaSum + beta*psis[idx2] # our first estimation have to be thrown away when we do that
#                     for idx2,gamma in enumerate(gammas):
#                             gammaSum = gammaSum + gamma*volumeO[idx2]*(datetime.utcfromtimestamp(tick['received']/1000000)-TO[idx2]).microseconds/1000
#                     for idx2,delta in enumerate(deltas):
#                             deltaSum = deltaSum + delta*volumeF[idx2]*(datetime.utcfromtimestamp(tick['received']/1000000)-TF[idx2]).microseconds/1000

#                     expectedTime = alpha0 + alphaSum +  betaSum + gammaSum + deltaSum #expected time (psi)
#                     psis.append(expectedTime)
#                     xs.append((datetime.utcfromtimestamp(tick["created"]/1000000) - myPastTime).microseconds/1000)


#                     errs.append((expectedTime - xs[p])*(expectedTime - xs[p]))
#                     check1.append(expectedTime)
#                     check2.append(xs[p])

#                 elif first:
#                     first = False
#                 else:
#                     xs.append((datetime.utcfromtimestamp(tick["created"]/1000000) - myPastTime).microseconds/1000)
#                     #I forgot the logic, why do we append here ????
#                     if len(xs)>p:
#                         xs.pop(0)
#                     if len(xs) == p and len(volumeO) == r and len(volumeF) == s:
# #                         print("initialized sequenced")
#                         initialized = True

#                 myPastTime = datetime.utcfromtimestamp(tick["created"]/1000000)

#                 #maintain right array size
#             if len(xs)>p:
#                 xs.pop(0)
#             if len(psis)>q:
#                 psis.pop(0)  
#             if len(volumeO)>r:
#                 volumeO.pop(0)
#                 TO.pop(0)
#             if len(volumeF)>s:
#                 volumeF.pop(0)  
#                 TF.pop(0)

# #             print(len(volumeO))
#             if counter > 200 :
#                 break
#     #return errs,check1,check2
#     if len(errs)==0:
#         sys.stderr.write("no run in function")
#         return 10000
#     else :
#         return np.mean(errs)

In [5]:

def residualTotalModACD(params_, pqs_):
    check1 = []
    check2 = []
    
    #variables of Interest:
    errs = []
    psis = []
    xs = []
    volumeO = []
    volumeF = []
    TO = []
    TF = []
    
    
    p = pqs_[0]
    q = pqs_[1]
    r = pqs_[2]
    s = pqs_[3]
    
    alpha0 = params_[0]
    alphas = params_[1:p+1]
    betas = params_[p+1:q+p+1]
    gammas =  params_[q+p+1:q+p+1+r]
    deltas = params_[q+p+r+1:q+p+r+s+1]
    
    initialized = False
    first = True
    counter = 0
    for idx, tick in enumerate(myDataForRun):
        
        if tick['type']=='lastdone' and tick['contractType'] is CONTRACTTYPE.FUTURE: 
            volumeF.append(tick['volume'])
            TF.append(tick['received'])
        if tick['type']=='lastdone' and tick['contractType'] is CONTRACTTYPE.CALL: 
            volumeO.append(tick['volume'])
            TO.append(tick['received'])
            
            
        if tick['type']=='tick' and tick['contractType'] is CONTRACTTYPE.CALL: 
#               print('passed')
            if len(xs)>p:
                xs.pop(0)
            if len(psis)>q:
                psis.pop(0)  
            if len(volumeO)>r:
                volumeO.pop(0)
                TO.pop(0)
            if len(volumeF)>s:
                volumeF.pop(0)  
                TF.pop(0)
                
            if initialized and not first :
#                 print(counter)
                counter = counter + 1
                #calculate alphas and betas parts
                alphaSum = 0.0
                betaSum = 0.0
                gammaSum = 0.0 
                deltaSum = 0.0
                for idx2,alpha in enumerate(alphas): 
                    #print(xs)
                    alphaSum = alphaSum + alpha*np.log(xs[idx2]) #doesn't matter if sum in reverse order, just means that alphas are in reverse order too
                for idx2,beta in enumerate(betas):
                    #print(psis)
                    if len(psis)>idx2:
                        betaSum = betaSum + beta*psis[idx2] # our first estimation have to be thrown away when we do that
                for idx2,gamma in enumerate(gammas):
                        gammaSum = gammaSum + gamma*volumeO[idx2]*(tick['received']-TO[idx2]).microseconds/1000
                for idx2,delta in enumerate(deltas):
                        deltaSum = deltaSum + delta*volumeF[idx2]*(tick['received']-TF[idx2]).microseconds/1000

                expectedTime = alpha0 + alphaSum +  betaSum + gammaSum + deltaSum #expected time (psi)
                psis.append(expectedTime)
                xs.append((tick["created"] - myPastTime).microseconds/1000)


                errs.append((expectedTime - xs[p])*(expectedTime - xs[p]))
                check1.append(expectedTime)
                check2.append(xs[p])

            elif first:
                first = False
            else:
                xs.append((tick["created"] - myPastTime).microseconds/1000)
                #I forgot the logic, why do we append here ????
                if len(xs)>p:
                    xs.pop(0)
                if len(xs) == p and len(volumeO) == r and len(volumeF) == s:
#                         print("initialized sequenced")
                    initialized = True

            myPastTime = tick["created"]

            #maintain right array size
        if len(xs)>p:
            xs.pop(0)
        if len(psis)>q:
            psis.pop(0)  
        if len(volumeO)>r:
            volumeO.pop(0)
            TO.pop(0)
        if len(volumeF)>s:
            volumeF.pop(0)  
            TF.pop(0)

#             print(len(volumeO))
        if counter > 200 :
            break
    #return errs,check1,check2
    if len(errs)==0:
        sys.stderr.write("no run in function")
        return 10000
    else :
        return np.mean(errs)

In [17]:
# if 'xs' in globals() or 'psis' in globals():
#     sys.stderr.write('xs or/and psis variables already exist')
#else:


#parameters:
alpha0 = 0.0
alphas = [1,1,1,1,1,1]
betas = [1,1,1,1]
gammas = [1,1]
deltas = [1,1]
params0 = [alpha0] + alphas + betas + gammas + deltas
#errs,check1,check2 = residualTotalEACD(params, len(alphas), len(betas))



In [24]:
# p = len(alphas)
# q = len(betas)
# r = len(gammas)
# s = len(deltas)

# alpha0 = params_[0]
# alphas = params_[1:p+1]
# betas = params_[p+1:q+p+1]
# gammas =  params_[q+p+1:q+p+1+r]
# deltas = params_[q+p+r+1:q+p+r+s+1]

In [7]:
myDataForRun = extractInterestingData(200,'335')

In [24]:

optimize.minimize(residualTotalModACD,params0,args=([len(alphas), len(betas), len(gammas), len(deltas)]), method='COBYLA', options={'maxiter':300})

     fun: 3256.4226494726431
   maxcv: 0.0
 message: 'Maximum number of function evaluations has been exceeded.'
    nfev: 300
  status: 2
 success: False
       x: array([ 1.85814412,  1.69498708,  1.57528066,  1.71796936,  1.44789092,
        1.42736389,  1.43252312, -0.28156762,  0.03987361, -0.18763136,
       -0.02109759,  0.0114972 ,  0.02646825,  0.0048248 , -0.01006318])

In [8]:

start_time = timeit.default_timer()
print(residualTotalModACD(params0,[len(alphas), len(betas), len(gammas), len(deltas)]))
print(timeit.default_timer() - start_time)

2.26242372431e+108
0.003540839239608248


In [20]:
for i in range(0,100):
    for idx,param in enumerate(params0):
        params0[idx] = param*np.random.rand()*2
        start_time = timeit.default_timer()
        print(residualTotalModACD(params0,[len(alphas), len(betas), len(gammas), len(deltas)]))
        print("execution time in ms:",(timeit.default_timer() - start_time)*1000)

101557.523031
execution time in ms: 3.2546073820185484
101598.021346
execution time in ms: 1.777611987677119
101612.486597
execution time in ms: 1.7883395870512686
101509.288121
execution time in ms: 1.909268889676241
101584.907692
execution time in ms: 1.7980919501496828
101782.889233
execution time in ms: 1.7690786698949523
101714.029314
execution time in ms: 1.8007738500500636
92577.3728566
execution time in ms: 1.7849262599156646
128259.015203
execution time in ms: 1.7417720531511804
103616.058333
execution time in ms: 1.7454291893272966
103612.754589
execution time in ms: 2.0358058011424873
118654.230467
execution time in ms: 2.2815653518364343
114268.717856
execution time in ms: 1.8066252679318495
66373.1148387
execution time in ms: 1.8219852398715375
66255.9909998
execution time in ms: 1.7715167606411342
66255.9909998
execution time in ms: 1.8624575467356408
66358.2801615
execution time in ms: 1.7756615150119615
66374.0573332
execution time in ms: 1.7524996526390169
66230.608967

execution time in ms: 1.8461223385202175
3642.05569849
execution time in ms: 1.7590824976423391
3642.05510192
execution time in ms: 1.7585948795613149
3641.78040867
execution time in ms: 1.7568882160503563
3641.95769424
execution time in ms: 1.7573758341313805
3641.91694341
execution time in ms: 1.771029142446423
3642.0068451
execution time in ms: 1.7517682252901068
3642.006325
execution time in ms: 1.7439663348568502
3641.78259508
execution time in ms: 1.7571320250908684
3641.78358084
execution time in ms: 1.8129643038946597
3641.78358084
execution time in ms: 1.7481110891139906
3605.99063297
execution time in ms: 1.7607891611532978
3611.23504944
execution time in ms: 1.7315320718580551
3611.36998406
execution time in ms: 1.731288262817543
3612.22553353
execution time in ms: 1.7671281972297948
3612.22553353
execution time in ms: 1.7471358528382552
3612.23200726
execution time in ms: 1.7407968167617582
3612.23365812
execution time in ms: 1.7656653427593483
3612.35457043
execution time 

execution time in ms: 1.7673720062703069
4099.15760965
execution time in ms: 1.7729796151115806
4099.15772623
execution time in ms: 1.7588386884881402
4099.15771829
execution time in ms: 1.7656653427593483
4099.15771828
execution time in ms: 1.7642024882889018
4099.15767186
execution time in ms: 1.7417720530374936
4099.15767186
execution time in ms: 1.7790748420338787
4099.15767186
execution time in ms: 1.8000424228148404
4122.25852246
execution time in ms: 1.8763546642048823
4122.26010787
execution time in ms: 1.7800500784233009
4122.26006762
execution time in ms: 1.755425361466223
4122.2588955
execution time in ms: 1.7512806072090825
4122.2588955
execution time in ms: 1.7705415243653988
4122.25889606
execution time in ms: 1.7364082534641057
4122.25889599
execution time in ms: 1.7795624602285898
4122.2505018
execution time in ms: 1.76176439754272
4122.2505018
execution time in ms: 1.8005300408958647
4122.2504857
execution time in ms: 1.8336880755214224
4122.25051617
execution time in 

execution time in ms: 1.761520588388521
4167.03395619
execution time in ms: 1.7598139248775624
4167.03395617
execution time in ms: 1.7568882160503563
4167.03395617
execution time in ms: 1.7581072613666038
4167.03395617
execution time in ms: 1.7351892080341713
4167.00728817
execution time in ms: 1.7454291893272966
4167.00728913
execution time in ms: 1.7425034803864037
4167.00728913
execution time in ms: 1.73762729889404
4167.00728841
execution time in ms: 1.740553007834933
4167.00728841
execution time in ms: 1.7483548982681896
4167.00728867
execution time in ms: 1.743478716662139
4167.00728868
execution time in ms: 1.76176439754272
4167.16008195
execution time in ms: 1.7315320718580551
4167.16008195
execution time in ms: 1.765177724678324
4167.16008201
execution time in ms: 1.7566444068961573
4167.16008208
execution time in ms: 1.7464044257167188
4167.16008208
execution time in ms: 1.7498177526249492
4167.1600821
execution time in ms: 1.749817752738636
4167.1600821
execution time in ms:

execution time in ms: 1.7446977620920734
4159.74067752
execution time in ms: 1.737139680699329
4159.74067752
execution time in ms: 1.830030939458993
4159.74067752
execution time in ms: 1.8395394934032083
4159.74067756
execution time in ms: 1.8029681317557333
4159.74067756
execution time in ms: 1.764933915524125
4159.74067756
execution time in ms: 1.7381149169750643
4159.74067756
execution time in ms: 1.734213971758436
4159.74124905
execution time in ms: 1.7351892080341713
4159.74124905
execution time in ms: 1.740796816875445
4159.74124904
execution time in ms: 1.722267326954352
4159.74124904
execution time in ms: 1.8058938405829394
4159.74124904
execution time in ms: 1.8680651554632277
4159.74124904
execution time in ms: 1.869284200893162
4159.74124904
execution time in ms: 1.787120541735021
4159.74124904
execution time in ms: 1.7756615150119615
4152.84345084
execution time in ms: 1.7685910517002412
4152.84345084
execution time in ms: 1.887569881773743
4152.84345084
execution time in m