This notebook defines some convenience functions that will be used in further analyses.

In [1]:
using DataFrames
using Dates

## Data characteristics

In order to get a feeling about the size of the dataset, let's take a look at the number of rows of the table.

In [5]:
nObs = size(dat, 1)

2025129

Hence, be prepared that each operation will take quite some time.

Let's now take a look at missing values:

In [6]:
missVals = [any(isna(dat[:, ii])) for ii=1:size(dat, 2)]

12-element Array{Any,1}:
 false
 false
  true
  true
  true
 false
 false
 false
 false
 false
 false
 false

## Define option type

An option is determined by a unique combination of *strike*, *expiry* and *call/put*.

In [7]:
type Option
    strike::Int64
    expiry::Date
    iscall::Bool
end

Default constructor: option type is set to call option.

In [8]:
function Option(strike::Int64, expiry::Date)
    return Option(strike, expiry, true)
end

function Option(df::DataFrame)
    return Option(df[1, :Strike], df[1, :Expiry], df[1, :IsCall])
end

Option (constructor with 4 methods)

Define writemime methods for customized display of option objects:

In [9]:
import Base.writemime
function writemime(io::IO, ::MIME"text/html", opt::Option)
    opt.iscall ? typ = "Call" : typ = "Put"
    write(io, "<p><strong>$(typ)</strong> option:")
    write(io, "<ul><li>strike:&nbsp&nbsp&nbsp $(opt.strike)</li>")
    write(io, "<li>expiry:&nbsp&nbsp $(opt.expiry)</li></ul></p>")
end
function writemime(io::IO, ::MIME"text/html", opts::Array{Option,1 })
    nOpts = size(opts, 1)
    write(io, "<p><strong>Array</strong> of $nOpts <strong>options</strong>:")
    nToShow = 4
    for ii=1:min(nToShow, nOpts)
        opts[ii].iscall ? typ = "Call" : typ = "Put"
        write(io, "<p><strong>$(typ)</strong> option:")
        write(io, "<ul><li>strike:&nbsp&nbsp&nbsp $(opts[ii].strike)</li>")
        write(io, "<li>expiry:&nbsp&nbsp $(opts[ii].expiry)</li></ul></p>")
    end
    if nOpts > nToShow
        write(io, "<p><strong>...</strong>")
    end
end

writemime (generic function with 19 methods)

In [10]:
opts = Option[Option(dat[ii, :]) for ii=1:30]

## Get unique options / expiries / days

In [11]:
function getObs(df::DataFrame, opt::Option)
    # get all observations for given option
    datExp = df[:Expiry]
    datStr = df[:Strike]
    datCall = df[:IsCall]
    inds = Int64[]
    
    for ii=1:size(datExp, 1)
        #if datExp[ii]::Date == opt[1, 2]::Date
         #   if datStr[ii]::Int64 == opt[1, 1]::Int64
          #      if datCall[ii]::Bool == opt[1, 3]::Bool
        if datExp[ii]::Date == opt.expiry
            if datStr[ii]::Int64 == opt.strike
                if datCall[ii]::Bool == opt.iscall
                    push!(inds, ii)
                end
            end
        end
    end
    return df[inds, :]
end

function getObs(df::DataFrame, dat::Date, col::Symbol)
    # find observations with dat in col
    dats = df[col]
    inds = Int64[]
    
    for ii=1:size(dats, 1)
        if dats[ii] == dat
            push!(inds, ii)
        end
    end
    return df[inds, :]
end


getObs (generic function with 2 methods)

In [12]:
expDate = Date(2011,12,16)
@time expData = getObs(dat, expDate, :Date)
size(expData)

elapsed time: 0.546490682 seconds (195458848 bytes allocated, 22.84% gc time)


(966,12)

In [13]:
function getAllExpiry(df::DataFrame)
    return unique(df[:Expiry])
end
@time expDates = getAllExpiry(dat)
size(expDates)

elapsed time: 0.233406412 seconds (36045232 bytes allocated, 12.78% gc time)


(97,)

In [14]:
function getAllDays(df::DataFrame)
    return unique(df[:Date])
end
@time tradeDays = getAllDays(dat)
size(tradeDays)

elapsed time: 0.129417371 seconds (32552232 bytes allocated, 26.39% gc time)


(1908,)

Benchmark result:

## Find unique options and expiry dates

In [None]:
function getOptionData(opt::Option, data::DataFrame)
    nObs = size(data, 1)
    validInds = falses(nObs)
    for ii=1:nObs
        if data[ii, :Strike] == opt.strike
            if data[ii, :Expiry] == opt.expiry
                if data[ii, :IsCall] == int(opt.iscall)
                    validInds[ii] = true
                end
            end
        end
    end
    return Timedata(dat1[validInds, [:Option_Price, :Bid, 
            :Ask, :Volume, :Open_Interest, :DAX, :EONIA_matched, :Time_to_Maturity]],
    array(dat1[validInds, :Date]))
end
    

Get some helper look-up tables: in which sections to we have to search for individual options? And in which for all options of given expiration date?

In [9]:
nPreAlloc = 20000
expDates = Array(Date, nPreAlloc)
strikes = Array(Int64, nPreAlloc)
optTypes = Array(Bool, nPreAlloc)
firstListings = Array(Date, nPreAlloc)

nOptsFound = 0

@time begin
    for ii=1:nObs
        currExpDate, currStrike = dat[ii, :Expiry], dat[ii, :Strike]
        currDate, currType = dat[ii, :Date], dat[ii, :IsCall]
        # does (expDate, strike, type) combination already occur?
    
        optPresent = false
        for kk=nOptsFound:-1:1
            if (expDates[kk] == currExpDate) && (strikes[kk] == currStrike) && (optTypes[kk] == currType)
                # go to next observation
                optPresent = true
                break
            end
        end
    
        if !optPresent
            nOptsFound = nOptsFound + 1
            expDates[nOptsFound] = currExpDate
            strikes[nOptsFound] = currStrike
            optTypes[nOptsFound] = currType
            firstListings[nOptsFound] = currDate
        end
    end
end

allOpts = DataFrame(expDates = expDates[1:nOptsFound], 
                    strikes = strikes[1:nOptsFound], 
                    firstListings = firstListings[1:nOptsFound])

elapsed time: 639.105780513 seconds (224997735064 bytes allocated, 23.64% gc time)


Unnamed: 0,expDates,strikes,firstListings
1,2006-12-15,1800,2006-07-03
2,2006-12-15,1800,2006-07-03
3,2006-12-15,2000,2006-07-03
4,2006-12-15,2000,2006-07-03
5,2006-12-15,2200,2006-07-03
6,2006-12-15,2200,2006-07-03
7,2006-12-15,2400,2006-07-03
8,2006-12-15,2400,2006-07-03
9,2006-12-15,2600,2006-07-03
10,2006-12-15,2800,2006-07-03


In [8]:
size(allOpts)

(12917,3)

In [31]:
function findprev(A, start)
    for i = start:-1:1
        A[i] != 0 && return i
    end
    0
end
findlast(A) = findprev(A, length(A))

findlast (generic function with 1 method)

In [38]:
startDay = allDays[550]
endDay = allDays[1005]

@time begin
    startInd = findfirst(dat[:Date] == startDay)
    endInd = findlast(dat[:Date] == endDay)
end


elapsed time: 9.087e-6 seconds (80 bytes allocated)


0

In [40]:
size(uniqueOpts)

(12917,3)

In [41]:
13000 * 0.00009

1.1700000000000002

## Bid ask prices

Do bid-ask prices make sense, or are they observed too infrequently? Percentage of **missing bid-ask prices**:

In [6]:
[sum(isna(dat[:Bid]))/nObs sum(isna(dat[:Ask]))/nObs]

1x2 Array{Float64,2}:
 0.789053  0.777314

##  Get smaller subset

## Get list of options

In [51]:
function getAllOptions(dat1::DataFrame)
    optsUnique = unique(dat1[:, [:Strike, :Expiry, :IsCall]])
    nOpts = size(optsUnique, 1)
    return Option[Option(optsUnique[ii, :]) for ii=1:nOpts]
end

getAllOptions (generic function with 1 method)

In [52]:
allOpts = getAllOptions(dat1)

Group options by common expiry date: get Timematr with DAX prices and option prices for all options of a given expiry date

In [60]:
function getOptionsWithExpiry(optList::Array{Option, 1}, expiry::Date)
    # find options with given expiry
    nOpts = size(optList, 1)
    isValid = falses(nOpts)
    for ii=1:nOpts
        if optList[ii].expiry == expiry
            isVald[ii] = true
        end
    end
    return opts = optList[isValid]
end

getOptionsWithExpiry (generic function with 1 method)

In [None]:
function getCohortPrices(dat::DataFrame, optList::Array{Option, 1}, expiry::Date)
    validOpts = getOptionsWithExpiry(optList, expiry)
    
    
    
    
    return Timematr() 
end

Group all options that are listed at a given date (all strikes, all maturities)