In [1]:
using DataFrames
using Dates

#### Loading data

- loading the data as exported by MATLAB

In [2]:
@time dat = readtable("../data/input_data/dat.txt", separator = '\t', nastrings = ["NaN"])

head(dat)

elapsed time: 15.378337427 seconds (3959495156 bytes allocated, 26.75% gc time)


Unnamed: 0,Date,Option_Price,Bid,Ask,Volume,Open_Interest,Strike,Expiry,DAX,EONIA_matched,Time_to_Maturity,IsCall
1,2006-07-03,3931.1,,,1,104,1800,2006-12-15,5712.69,0.031667592146348,0.466666666666667,1
2,2006-07-03,0.1,,,0,5515,1800,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,0
3,2006-07-03,3734.0,,,0,2152,2000,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,1
4,2006-07-03,0.1,,,0,20941,2000,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,0
5,2006-07-03,3536.9,,,0,2,2200,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,1
6,2006-07-03,0.1,,,0,4626,2200,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,0


- some element types are not perfect yet:

In [3]:
eltypes(dat)

12-element Array{Type{T<:Top},1}:
 UTF8String
 Float64   
 Float64   
 Float64   
 Int64     
 Int64     
 Int64     
 UTF8String
 Float64   
 Float64   
 Float64   
 Int64     

- transform dates to `Date` type, `IsCall` to `Bool`:

In [4]:
@time begin
    dat[:Date] = Date(array(dat[:, 1]));
    dat[:Expiry] = Date(array(dat[:, :Expiry]));
    dat[:IsCall] = bool(array(dat[:, :IsCall]));
end

head(dat)

elapsed time: 88.838943767 seconds (10922092392 bytes allocated, 59.99% gc time)


Unnamed: 0,Date,Option_Price,Bid,Ask,Volume,Open_Interest,Strike,Expiry,DAX,EONIA_matched,Time_to_Maturity,IsCall
1,2006-07-03,3931.1,,,1,104,1800,2006-12-15,5712.69,0.031667592146348,0.466666666666667,True
2,2006-07-03,0.1,,,0,5515,1800,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,False
3,2006-07-03,3734.0,,,0,2152,2000,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,True
4,2006-07-03,0.1,,,0,20941,2000,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,False
5,2006-07-03,3536.9,,,0,2,2200,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,True
6,2006-07-03,0.1,,,0,4626,2200,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,False


#### Option IDs

- create IDs for individual options: searching a single option determined through three separate columns is too costly

In [5]:
function optParamsToString(expi::Date, stri::Int, isc::Bool)
    y, m, d = yearmonthday(expi)
    dateStr = string(y, lpad(m, 2, "0"), lpad(d, 2, "0"))
    
    if isc
        return string("c_", dateStr, "_", stri)
    else
        return string("p_", dateStr, "_", stri)
    end
end

optParamsToString (generic function with 1 method)

In [6]:
nObs = size(dat, 1)
optIDs = Array(String, nObs)

@time begin
    for ii=1:nObs
        optIDs[ii] = optParamsToString(dat[ii, :Expiry], dat[ii, :Strike], dat[ii, :IsCall]) 
    end
end

optIDs[1:4]

elapsed time: 11.969897848 seconds (2390821024 bytes allocated, 57.61% gc time)


4-element Array{String,1}:
 "c_20061215_1800"
 "p_20061215_1800"
 "c_20061215_2000"
 "p_20061215_2000"

#### Create data table with option IDs and option parameters

- create a table that contains for each option `ID` its option parameters: expiry, strike, type

In [7]:
uniqueOpts = unique(optIDs)

nOpts = size(uniqueOpts, 1)

12917

- define function that decomposes option ID into its parameters:

In [8]:
function decodeOptID(id::String)
    opt = id
    x1, x2, x3 = split(opt, "_")
    if x1 == "c"
        isCall = true
    else
        isCall = false
    end
    expi = Date(x2, "yyyymmdd")
    stri = int(x3)

    return (expi, stri, isCall)
end

decodeOptID (generic function with 1 method)

- decode each option ID

In [9]:
@time begin
    expis = Array(Date, nOpts)
    stris = Array(Int, nOpts)
    iscs = Array(Bool, nOpts)
    for ii=1:nOpts
        x1, x2, x3 = decodeOptID(uniqueOpts[ii])
        expis[ii] = x1
        stris[ii] = x2
        iscs[ii] = x3
    end
end

opts = DataFrame(ID = uniqueOpts, Expiry = expis, Strike = stris, IsCall = iscs)

head(opts)

elapsed time: 1.2064926 seconds (137424592 bytes allocated, 58.20% gc time)


Unnamed: 0,ID,Expiry,Strike,IsCall
1,c_20061215_1800,2006-12-15,1800,True
2,p_20061215_1800,2006-12-15,1800,False
3,c_20061215_2000,2006-12-15,2000,True
4,p_20061215_2000,2006-12-15,2000,False
5,c_20061215_2200,2006-12-15,2200,True
6,p_20061215_2200,2006-12-15,2200,False


#### Create data table for underlying

- get **underlying**: get value for each day

In [10]:
function getAllDAXobs(df::DataFrame)
    vals1, vals2 = Date[], Float64[]
    valsSet = Set{Int}()
    nObs = size(df, 1)
    for ii=1:nObs
        currDat, currVal = df[ii, :Date], df[ii, :DAX]
        currKey = Dates.value(currDat)
        if !in(currKey, valsSet)
            push!(valsSet, currKey)
            push!(vals1, currDat)
            push!(vals2, currVal)
        end
    end
    return DataFrame(Date = vals1, DAX = vals2)
end

@time daxVals = getAllDAXobs(dat[[:Date, :DAX]])
size(daxVals, 1)

elapsed time: 1.604510392 seconds (188643360 bytes allocated, 57.81% gc time)


1908

#### Create data table for cohort / date

In [11]:
function getAllCohortParams(df::DataFrame)
    vals1, vals2, vals3, vals4 = Date[], Date[], Float64[], Float64[]
    valsSet = Set{Array{Int, 1}}()
    nObs = size(df, 1)
    for ii=1:nObs
        currDat, currExp = df[ii, :Date], df[ii, :Expiry]
        currEON, currTTM = df[ii, :EONIA_matched], df[ii, :Time_to_Maturity]
        currKeys = Int[Dates.value(currDat), Dates.value(currExp)]
        if !in(currKeys, valsSet)
            push!(valsSet, currKeys)
            push!(vals1, currDat)
            push!(vals2, currExp)
            push!(vals3, currEON)
            push!(vals4, currTTM)
        end
    end
    return DataFrame(Date = vals1, Expiry = vals2, EONIA_matched = vals3, Time_to_Maturity = vals4)
end

@time cohortParams = getAllCohortParams(dat[[:Date, :Expiry, :EONIA_matched, :Time_to_Maturity]])
size(cohortParams, 1)

elapsed time: 4.986424601 seconds (727240500 bytes allocated, 71.37% gc time)


21053

In [12]:
head(cohortParams)

Unnamed: 0,Date,Expiry,EONIA_matched,Time_to_Maturity
1,2006-07-03,2006-12-15,0.031667592146348,0.466666666666667
2,2006-07-03,2006-09-15,0.0297573099811956,0.211764705882353
3,2006-07-03,2006-08-18,0.02903277602483,0.133333333333333
4,2006-07-03,2006-07-21,0.0283102283088403,0.0549019607843137
5,2006-07-03,2007-06-15,0.0342512630396996,0.949019607843137
6,2006-07-03,2007-03-16,0.0330875802131789,0.709803921568627


#### Create data table for option prices

In [13]:
optPrices = DataFrame(Date = dat[:Date], ID = optIDs, Price = dat[:Option_Price])

head(optPrices)

Unnamed: 0,Date,ID,Price
1,2006-07-03,c_20061215_1800,3931.1
2,2006-07-03,p_20061215_1800,0.1
3,2006-07-03,c_20061215_2000,3734.0
4,2006-07-03,p_20061215_2000,0.1
5,2006-07-03,c_20061215_2200,3536.9
6,2006-07-03,p_20061215_2200,0.1


#### Create data table with all observations

In [16]:
keys = DataFrame(Date = dat[:Date], ID = optIDs)
obsData = dat[[:Bid, :Ask, :Volume, :Open_Interest]]
addObs = [keys obsData]

head(addObs)

Unnamed: 0,Date,ID,Bid,Ask,Volume,Open_Interest
1,2006-07-03,c_20061215_1800,,,1,104
2,2006-07-03,p_20061215_1800,,,0,5515
3,2006-07-03,c_20061215_2000,,,0,2152
4,2006-07-03,p_20061215_2000,,,0,20941
5,2006-07-03,c_20061215_2200,,,0,2
6,2006-07-03,p_20061215_2200,,,0,4626


#### Write relational database to disk

In [17]:
writetable("../data/rel_data/opts.csv", opts)
writetable("../data/rel_data/daxVals.csv", daxVals)
writetable("../data/rel_data/cohortParams.csv", cohortParams)
writetable("../data/rel_data/optPrices.csv", optPrices)
writetable("../data/rel_data/addObs.csv", addObs)

#### Example usage

- get options with given expiry

In [11]:
optPrices1 = DataFrame(Date = optData[:Date], ID = optIDs, Price = optData[:Option_Price])
optPrices2 = optData[[:Bid, :Ask, :Volume, :Open_Interest, :EONIA_matched, :Time_to_Maturity, :DAX]]
optPrices = [optPrices1 optPrices2]

Unnamed: 0,Date,ID,Price,Bid,Ask,Volume,Open_Interest,EONIA_matched,Time_to_Maturity,DAX
1,2006-07-03,c_20061215_1800,3931.1,,,1,104,0.031667592146348,0.466666666666667,5712.69
2,2006-07-03,p_20061215_1800,0.1,,,0,5515,0.0316675921463482,0.466666666666667,5712.69
3,2006-07-03,c_20061215_2000,3734.0,,,0,2152,0.0316675921463482,0.466666666666667,5712.69
4,2006-07-03,p_20061215_2000,0.1,,,0,20941,0.0316675921463482,0.466666666666667,5712.69
5,2006-07-03,c_20061215_2200,3536.9,,,0,2,0.0316675921463482,0.466666666666667,5712.69
6,2006-07-03,p_20061215_2200,0.1,,,0,4626,0.0316675921463482,0.466666666666667,5712.69
7,2006-07-03,c_20061215_2400,3339.8,,,0,2009,0.0316675921463482,0.466666666666667,5712.69
8,2006-07-03,p_20061215_2400,0.1,,,0,13367,0.0316675921463482,0.466666666666667,5712.69
9,2006-07-03,p_20061215_2600,0.2,,,0,2297,0.0316675921463482,0.466666666666667,5712.69
10,2006-07-03,c_20061215_2800,2945.9,,,0,624,0.0316675921463482,0.466666666666667,5712.69


In [25]:
expDate = Date(2006,12,15)

assocOpts = opts[opts[:Expiry] .== expDate, :ID]

153-element DataArray{String,1}:
 "c_20061215_1800"
 "p_20061215_1800"
 "c_20061215_2000"
 "p_20061215_2000"
 "c_20061215_2200"
 "p_20061215_2200"
 "c_20061215_2400"
 "p_20061215_2400"
 "p_20061215_2600"
 "c_20061215_2800"
 "p_20061215_2800"
 "c_20061215_3000"
 "p_20061215_3000"
 ⋮                
 "c_20061215_7000"
 "p_20061215_7000"
 "c_20061215_7050"
 "c_20061215_7100"
 "c_20061215_7150"
 "c_20061215_7200"
 "c_20061215_7250"
 "c_20061215_7300"
 "c_20061215_7350"
 "c_20061215_7400"
 "p_20061215_7400"
 "p_20061215_6750"

- get associated observations

In [34]:
@time begin
    optsLarge = join(optPrices, opts, on = :ID)
    inds = optsLarge[:Expiry] .== expDate

    obsExpDate = optsLarge[inds, [:Date, :ID, :Price, :Expiry, :DAX]]
end

head(obsExpDate)

elapsed time: 3.559302199 seconds (417774400 bytes allocated, 32.71% gc time)


Unnamed: 0,Date,ID,Price,Expiry,DAX
1,2006-07-03,c_20061215_1800,3931.1,2006-12-15,5712.69
2,2006-07-04,c_20061215_1800,3951.6,2006-12-15,5729.01
3,2006-07-05,c_20061215_1800,3850.0,2006-12-15,5625.63
4,2006-07-06,c_20061215_1800,3916.5,2006-12-15,5695.47
5,2006-07-07,c_20061215_1800,3904.9,2006-12-15,5681.85
6,2006-07-10,c_20061215_1800,3930.7,2006-12-15,5706.32


In [35]:
size(obsExpDate)

(18114,5)

In [37]:
expDateObservations = unstack(obsExpDate, :Date, :ID, :Price)

head(expDateObservations)

Unnamed: 0,ID,c_20061215_1800,c_20061215_2000,c_20061215_2200,c_20061215_2400,c_20061215_2800,c_20061215_3000,c_20061215_3200,c_20061215_3400,c_20061215_3600,c_20061215_3800,c_20061215_3900,c_20061215_4000,c_20061215_4050,c_20061215_4100,c_20061215_4150,c_20061215_4200,c_20061215_4250,c_20061215_4300,c_20061215_4350,c_20061215_4400,c_20061215_4450,c_20061215_4500,c_20061215_4550,c_20061215_4600,c_20061215_4650,c_20061215_4700,c_20061215_4750,c_20061215_4800,c_20061215_4850,c_20061215_4900,c_20061215_4950,c_20061215_5000,c_20061215_5050,c_20061215_5100,c_20061215_5150,c_20061215_5200,c_20061215_5250,c_20061215_5300,c_20061215_5350,c_20061215_5400,c_20061215_5450,c_20061215_5500,c_20061215_5550,c_20061215_5600,c_20061215_5650,c_20061215_5700,c_20061215_5750,c_20061215_5800,c_20061215_5850,c_20061215_5900,c_20061215_5950,c_20061215_6000,c_20061215_6050,c_20061215_6100,c_20061215_6150,c_20061215_6200,c_20061215_6250,c_20061215_6300,c_20061215_6350,c_20061215_6400,c_20061215_6450,c_20061215_6500,c_20061215_6550,c_20061215_6600,c_20061215_6650,c_20061215_6700,c_20061215_6750,c_20061215_6800,c_20061215_6850,c_20061215_6900,c_20061215_6950,c_20061215_7000,c_20061215_7050,c_20061215_7100,c_20061215_7150,c_20061215_7200,c_20061215_7250,c_20061215_7300,c_20061215_7350,c_20061215_7400,p_20061215_1800,p_20061215_2000,p_20061215_2200,p_20061215_2400,p_20061215_2600,p_20061215_2800,p_20061215_3000,p_20061215_3200,p_20061215_3400,p_20061215_3600,p_20061215_3800,p_20061215_3900,p_20061215_3950,p_20061215_4000,p_20061215_4050,p_20061215_4100,p_20061215_4150,p_20061215_4200,p_20061215_4250,p_20061215_4300,p_20061215_4350,p_20061215_4400,p_20061215_4450,p_20061215_4500,p_20061215_4550,p_20061215_4600,p_20061215_4650,p_20061215_4700,p_20061215_4750,p_20061215_4800,p_20061215_4850,p_20061215_4900,p_20061215_4950,p_20061215_5000,p_20061215_5050,p_20061215_5100,p_20061215_5150,p_20061215_5200,p_20061215_5250,p_20061215_5300,p_20061215_5350,p_20061215_5400,p_20061215_5450,p_20061215_5500,p_20061215_5550,p_20061215_5600,p_20061215_5650,p_20061215_5700,p_20061215_5750,p_20061215_5800,p_20061215_5850,p_20061215_5900,p_20061215_5950,p_20061215_6000,p_20061215_6050,p_20061215_6100,p_20061215_6150,p_20061215_6200,p_20061215_6250,p_20061215_6300,p_20061215_6350,p_20061215_6400,p_20061215_6450,p_20061215_6500,p_20061215_6550,p_20061215_6600,p_20061215_6650,p_20061215_6700,p_20061215_6750,p_20061215_6800,p_20061215_6900,p_20061215_7000,p_20061215_7400
1,2006-07-03,3931.1,3734.0,3536.9,3339.8,2945.9,2749.2,2552.7,2356.7,2161.2,1966.6,1869.7,1773.2,1725.1,1677.1,1629.3,1581.6,1534.0,1486.6,1439.4,1392.3,1345.5,1298.9,1252.6,1206.5,1160.7,1115.2,1070.0,1025.3,980.9,936.9,893.4,850.3,807.8,765.9,724.5,683.7,643.6,604.3,565.6,527.8,490.9,454.9,420.0,386.1,353.2,321.7,291.6,262.9,235.5,209.9,185.7,163.1,142.2,123.3,105.9,90.4,76.5,64.4,53.7,44.5,36.7,30.1,24.6,20.0,16.2,13.1,10.6,8.6,6.9,5.7,4.6,3.8,3.2,2.7,2.3,1.9,1.6,1.4,1.2,1.1,0.1,0.1,0.1,0.1,0.2,0.5,0.9,1.5,2.6,4.2,6.8,8.4,9.4,10.5,11.7,13.0,14.4,16.0,17.7,19.5,21.6,23.8,26.3,29.0,31.9,35.1,38.6,42.4,46.5,51.0,55.9,61.2,67.0,73.2,80.0,87.3,95.2,103.7,112.9,122.8,133.6,144.9,157.2,170.6,184.9,200.3,216.9,234.5,253.7,274.2,296.3,319.7,344.9,371.6,400.1,430.2,462.1,496.0,531.5,568.6,607.2,647.3,688.7,731.4,775.1,819.8,865.3,911.5,,1005.5,1101.2,1197.9,1589.4
2,2006-07-04,3951.6,3754.5,3557.3,3360.1,2966.0,2769.0,2572.2,2375.8,2179.9,1984.9,1887.8,1791.0,1742.8,1694.8,1646.8,1599.0,1551.4,1503.9,1456.6,1409.6,1362.7,1316.1,1269.7,1223.5,1177.7,1132.2,1087.0,1042.2,997.7,953.6,910.0,866.8,824.1,782.0,740.3,699.3,659.0,619.3,580.3,542.2,504.9,468.5,433.1,398.7,365.5,333.5,302.6,273.1,245.1,218.4,193.3,170.1,148.5,128.9,111.1,95.1,81.1,68.8,58.1,48.9,41.0,34.3,28.5,23.7,19.6,16.1,13.3,10.9,8.9,7.3,6.0,4.9,4.0,3.2,2.6,2.2,1.8,1.4,1.2,0.9,0.1,0.1,0.1,0.1,0.1,0.2,0.4,0.8,1.6,2.9,5.0,6.5,7.3,8.3,9.4,10.6,12.0,13.5,15.1,17.0,19.0,21.2,23.6,26.3,29.2,32.3,35.8,39.6,43.7,48.1,53.0,58.2,63.8,70.0,76.6,83.7,91.4,99.6,108.6,118.2,128.5,139.7,151.7,164.6,178.5,193.4,209.6,226.7,245.2,264.9,286.1,308.8,333.0,359.0,386.7,416.6,447.9,481.3,516.5,553.5,592.2,632.3,673.7,716.2,759.8,804.2,849.4,895.2,,988.6,1083.6,1179.7,1570.2
3,2006-07-05,3850.0,3652.8,3455.7,3258.5,2864.3,2667.5,2470.9,2274.8,2079.5,1885.4,1788.9,1693.0,1645.3,1597.7,1550.3,1503.1,1456.1,1409.4,1362.9,1316.6,1270.7,1225.0,1179.7,1134.7,1090.0,1045.7,1001.9,958.4,915.4,872.8,830.7,789.1,748.1,707.6,667.8,628.6,590.1,552.4,515.5,479.7,444.7,410.7,378.0,346.4,316.2,287.0,259.5,233.4,208.7,185.8,164.4,144.5,126.2,109.6,94.4,80.7,68.6,57.8,48.3,40.1,33.1,27.2,22.3,18.2,14.8,12.0,9.8,7.9,6.5,5.3,4.3,3.5,2.9,2.3,1.9,1.6,1.3,1.1,0.9,0.7,0.1,0.1,0.1,0.1,0.1,0.3,0.7,1.3,2.4,4.3,7.4,9.5,10.8,12.2,13.7,15.5,17.4,19.5,21.8,24.4,27.2,30.2,33.6,37.2,41.2,45.5,50.1,55.1,60.5,66.4,72.6,79.4,86.6,94.3,102.6,111.5,120.9,131.1,141.8,153.5,165.9,179.3,193.5,209.1,225.6,243.2,262.2,282.5,304.3,327.3,352.0,378.3,406.3,435.6,466.8,499.4,533.4,569.2,606.4,644.7,684.6,725.7,768.0,811.4,855.8,901.0,946.9,993.4,,1088.0,1183.9,1280.7,1672.4
4,2006-07-06,3916.5,3719.3,3522.1,3324.9,2930.9,2734.1,2537.6,2341.6,2146.2,1951.8,1855.0,1758.7,1710.7,1662.8,1615.0,1567.5,1520.0,1472.8,1425.7,1378.9,1332.3,1285.9,1239.8,1194.0,1148.5,1103.4,1058.6,1014.2,970.2,926.6,883.5,841.0,798.9,757.4,716.6,676.3,636.8,597.8,559.6,522.3,485.8,450.3,415.9,382.4,350.3,319.3,289.9,261.6,235.0,209.9,186.2,164.4,144.1,125.5,108.6,93.1,79.3,67.0,56.2,46.8,38.7,31.7,25.9,21.0,17.0,13.7,11.0,8.9,7.2,5.8,4.7,3.9,3.2,2.6,2.2,1.8,1.5,1.3,1.1,0.9,0.1,0.1,0.1,0.1,0.3,0.5,1.0,1.7,2.9,4.7,7.5,9.4,10.4,11.6,12.9,14.3,15.9,17.6,19.5,21.6,23.8,26.3,29.0,31.9,35.1,38.6,42.4,46.6,51.1,56.0,61.3,67.1,73.3,80.0,87.4,95.1,103.5,112.6,122.3,132.6,143.8,155.8,168.6,182.4,197.3,213.1,230.3,248.8,268.5,289.5,312.1,336.4,362.2,389.6,418.7,449.2,481.6,515.5,551.0,588.0,626.6,666.4,707.6,749.9,793.4,837.8,883.1,929.1,,1022.9,1118.5,1215.1,1606.6
5,2006-07-07,3904.9,3707.6,3510.4,3313.3,2919.2,2722.4,2526.0,2330.0,2134.6,1940.3,1843.6,1747.3,1699.3,1651.5,1603.8,1556.2,1508.9,1461.7,1414.7,1367.9,1321.3,1275.0,1228.9,1183.1,1137.7,1092.5,1047.7,1003.4,959.4,915.8,872.8,830.2,788.1,746.7,705.9,665.7,626.2,587.5,549.4,512.3,476.1,440.7,406.6,373.6,341.6,311.1,281.8,253.9,227.7,202.7,179.6,158.0,137.9,119.6,103.0,88.1,74.7,62.9,52.6,43.7,36.0,29.5,24.0,19.5,15.8,12.7,10.2,8.3,6.7,5.4,4.4,3.6,2.9,2.4,2.0,1.7,1.4,1.2,1.0,0.8,0.1,0.1,0.1,0.1,0.3,0.5,1.0,1.8,3.0,4.9,7.8,9.7,10.9,12.1,13.4,14.9,16.5,18.3,20.2,22.3,24.6,27.1,29.9,32.8,36.1,39.6,43.5,47.6,52.2,57.1,62.4,68.2,74.4,81.2,88.5,96.3,104.7,113.9,123.7,134.4,145.6,157.8,170.8,184.9,200.1,216.2,233.7,252.4,272.4,293.9,316.8,341.3,367.2,395.1,424.4,455.4,488.1,522.5,558.5,596.1,635.0,675.3,717.0,759.8,803.6,848.4,893.9,940.2,,1034.4,1130.2,1227.0,1618.7
6,2006-07-10,3930.7,3733.4,3536.1,3338.9,2944.6,2747.7,2551.0,2354.8,2159.1,1964.3,1867.3,1770.7,1722.6,1674.6,1626.7,1578.9,1531.4,1484.0,1436.7,1389.7,1342.9,1296.4,1250.1,1204.0,1158.3,1112.9,1067.9,1023.3,979.1,935.3,892.0,849.1,806.8,765.1,724.0,683.4,643.5,604.4,565.9,528.2,491.5,455.6,420.8,387.0,354.5,323.1,293.3,264.7,237.6,212.1,188.2,165.8,145.3,126.3,109.1,93.4,79.5,67.1,56.2,46.7,38.6,31.6,25.8,20.9,16.9,13.6,11.0,8.8,7.1,5.8,4.7,3.8,3.1,2.6,2.1,1.8,1.5,1.2,1.0,0.9,0.1,0.1,0.1,0.1,0.2,0.4,0.8,1.5,2.5,4.1,6.6,8.3,9.2,10.3,11.5,12.8,14.3,15.8,17.6,19.5,21.6,23.9,26.4,29.2,32.2,35.5,39.1,43.1,47.4,52.1,57.2,62.7,68.7,75.2,82.2,89.8,98.0,106.7,116.2,126.4,137.2,149.0,161.5,175.0,189.6,205.1,221.8,239.9,259.2,280.1,302.1,326.0,351.4,378.5,407.0,437.5,469.6,503.3,538.6,575.6,614.0,653.9,695.0,737.4,780.9,825.3,870.7,916.7,,1010.6,1106.2,1202.9,1594.5


In [33]:
size(expDateObservations)

(119,154)

#### Split data into tables

Unnamed: 0,Date,DAX
1,2006-07-03,5712.69
2,2006-07-04,5729.01
3,2006-07-05,5625.63
4,2006-07-06,5695.47
5,2006-07-07,5681.85
6,2006-07-10,5706.32


In [19]:
function getAllOptions_num_dates(df::DataFrame)
    arrData = [df[:Strike].data df[:Expiry].data df[:IsCall].data]
    return unique(arrData, 1)
end

@time allOpts = getAllOptions_num_dates(optData)
size(allOpts)

elapsed time: 0.47628593 seconds (93393484 bytes allocated, 7.37% gc time)


(12917,3)

In [36]:
# sort options
df = DataFrame(Strike = allOpts[:, 1], Expiry = allOpts[:, 2], IsCall = allOpts[:, 3])
df_sorted = sort(df, cols = [:Expiry, :IsCall, :Strike])

head(df_sorted)

Unnamed: 0,Strike,Expiry,IsCall
1,4200,732513,0
2,4300,732513,0
3,4400,732513,0
4,4500,732513,0
5,4600,732513,0
6,4700,732513,0


In [39]:
# create string identifiers (allows usage as column names)
ids = String[string("o", x) for x in 1:size(df_sorted, 1)]

12917-element Array{String,1}:
 "o1"    
 "o2"    
 "o3"    
 "o4"    
 "o5"    
 "o6"    
 "o7"    
 "o8"    
 "o9"    
 "o10"   
 "o11"   
 "o12"   
 "o13"   
 ⋮       
 "o12906"
 "o12907"
 "o12908"
 "o12909"
 "o12910"
 "o12911"
 "o12912"
 "o12913"
 "o12914"
 "o12915"
 "o12916"
 "o12917"

In [41]:
# get expiry dates as Date
expy = df_sorted[:Expiry]
expyDats = Date[Date(Dates.UTD(int(x))) for x in expy]

12917-element Array{Date,1}:
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 ⋮         
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19

In [43]:
opts = DataFrame(ID = ids, Expiry = expyDats, Strike = df_sorted[:Strike], IsCall = df_sorted[:IsCall])
head(opts)

Unnamed: 0,ID,Expiry,Strike,IsCall
1,o1,2006-07-21,4200,0
2,o2,2006-07-21,4300,0
3,o3,2006-07-21,4400,0
4,o4,2006-07-21,4500,0
5,o5,2006-07-21,4600,0
6,o6,2006-07-21,4700,0


In [52]:
obs_optParams = [array(optData[[:Expiry, :Strike]]) optData[:IsCall]]

2025129x3 Array{Int64,2}:
 732660  1800  1
 732660  1800  0
 732660  2000  1
 732660  2000  0
 732660  2200  1
 732660  2200  0
 732660  2400  1
 732660  2400  0
 732660  2600  0
 732660  2800  1
 732660  2800  0
 732660  3000  1
 732660  3000  0
      ⋮         
 735250  9650  0
 735250  9700  1
 735250  9700  0
 735250  9750  1
 735250  9750  0
 735250  9800  1
 735250  9800  0
 735250  9850  1
 735250  9900  1
 735250  9900  0
 735250  9950  1
 735250  9950  0

In [59]:
numDates = Int[Dates.value(x) for x in opts[:Expiry]]
opts_optParams = [numDates array(opts[[:Strike, :IsCall]])]

12917x3 Array{Int64,2}:
 732513   4200  0
 732513   4300  0
 732513   4400  0
 732513   4500  0
 732513   4600  0
 732513   4700  0
 732513   4800  0
 732513   4850  0
 732513   4900  0
 732513   4950  0
 732513   5000  0
 732513   5050  0
 732513   5100  0
      ⋮          
 735586  10000  1
 735586  10100  1
 735586  10200  1
 735586  10300  1
 735586  10400  1
 735586  10500  1
 735586  10600  1
 735586  10700  1
 735586  11000  1
 735586  11400  1
 735586  11600  1
 735586  12000  1

- get option ID for each observation

In [72]:
function findEqualRow(rowToFind::Array{Int, 2}, listedRows::Array{Int, 2})
    nRows, nCols = size(listedRows)
    equTrue = trues(nRows)
    for jj=1:nCols
        equTrue = equTrue & (listedRows[:, jj] .== rowToFind[1, jj])
    end
    return find(equTrue)
end

findEqualRow (generic function with 1 method)

In [73]:
findEqualRow(obs_optParams[1200, :], opts_optParams)

1-element Array{Int64,1}:
 226

In [74]:
obs_optParams[1200,:]

1x3 Array{Int64,2}:
 732569  6500  0

In [75]:
opts_optParams[226, :]

1x3 Array{Int64,2}:
 732569  6500  0

In [82]:
@time begin 
    nObs = size(obs_optParams, 1)
    obs_optIDs = Array(String, nObs)
    for ii=1:nObs
        currParams = obs_optParams[ii, :]
        ind = findEqualRow(currParams, opts_optParams)[1]
        obs_optIDs[ii] = opts[ind, :ID]
    end
end

elapsed time: 2269.044490644 seconds (684330751480 bytes allocated, 85.27% gc time)


In [83]:
obs_optIDs

2025129-element Array{String,1}:
 "o530"  
 "o457"  
 "o531"  
 "o458"  
 "o532"  
 "o459"  
 "o533"  
 "o460"  
 "o461"  
 "o534"  
 "o462"  
 "o535"  
 "o463"  
 ⋮       
 "o12200"
 "o12257"
 "o12201"
 "o12258"
 "o12202"
 "o12259"
 "o12203"
 "o12260"
 "o12261"
 "o12204"
 "o12262"
 "o12205"

In [86]:
optData[:ID] = obs_optIDs
head(optData)

Unnamed: 0,Date,Option_Price,Bid,Ask,Volume,Open_Interest,Strike,Expiry,DAX,EONIA_matched,Time_to_Maturity,IsCall,ID
1,732495,3931.1,,,1,104,1800,732660,5712.69,0.031667592146348,0.466666666666667,True,o530
2,732495,0.1,,,0,5515,1800,732660,5712.69,0.0316675921463482,0.466666666666667,False,o457
3,732495,3734.0,,,0,2152,2000,732660,5712.69,0.0316675921463482,0.466666666666667,True,o531
4,732495,0.1,,,0,20941,2000,732660,5712.69,0.0316675921463482,0.466666666666667,False,o458
5,732495,3536.9,,,0,2,2200,732660,5712.69,0.0316675921463482,0.466666666666667,True,o532
6,732495,0.1,,,0,4626,2200,732660,5712.69,0.0316675921463482,0.466666666666667,False,o459


In [91]:
writetable("../data/rel_data/optData.csv", optData)
writetable("../data/rel_data/underlying.csv", underlying)
writetable("../data/rel_data/opts.csv", opts)

- transform to dates

In [3]:
function num2date(numb::Int64)
    return Date(Dates.UTD(numb))
end
function num2date(numb::Array{Int64})
    nDats = size(numb, 1)
    dats = Array(Date, nDats)
    for ii=1:nDats
        dats[ii] = num2date(numb[ii])
    end
    return dats
end

@time begin
    optData[:Date] = num2date(optData[:Date].data)
    optData[:Expiry] = num2date(optData[:Expiry].data)
end
optData

elapsed time: 0.053756241 seconds (33650864 bytes allocated)


Unnamed: 0,Date,Option_Price,Bid,Ask,Volume,Open_Interest,Strike,Expiry,DAX,EONIA_matched,Time_to_Maturity,IsCall
1,2006-07-03,3931.1,,,1,104,1800,2006-12-15,5712.69,0.031667592146348,0.466666666666667,true
2,2006-07-03,0.1,,,0,5515,1800,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,false
3,2006-07-03,3734.0,,,0,2152,2000,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,true
4,2006-07-03,0.1,,,0,20941,2000,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,false
5,2006-07-03,3536.9,,,0,2,2200,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,true
6,2006-07-03,0.1,,,0,4626,2200,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,false
7,2006-07-03,3339.8,,,0,2009,2400,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,true
8,2006-07-03,0.1,,,0,13367,2400,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,false
9,2006-07-03,0.2,,,0,2297,2600,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,false
10,2006-07-03,2945.9,,,0,624,2800,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,true


In [6]:
df = DataFrame(a = [3, 4], b = [5, 3], "4" = [4, 2])

LoadError: syntax: keyword argument is not a symbol: "4"
while loading In[6], in expression starting on line 1

- arbitrarily select an expiry date

In [6]:
chosenExpiry = unique(optData[:Expiry])[35]

2008-08-15

- get all option data with given expiry date

In [8]:
inds = optData[:Expiry] .== chosenExpiry
expiryData = optData[inds, :]

Unnamed: 0,Date,Option_Price,Bid,Ask,Volume,Open_Interest,Strike,Expiry,DAX,EONIA_matched,Time_to_Maturity,IsCall
1,2008-05-19,87.1,,,785,750,6700,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,false
2,2008-05-19,104.2,,,2,2,6800,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,false
3,2008-05-19,113.7,,,66,66,6850,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,false
4,2008-05-19,124.2,,,4,4,6900,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,false
5,2008-05-19,135.5,,,380,380,6950,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,false
6,2008-05-19,147.6,,,5,5,7000,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,false
7,2008-05-19,306.8,,,32,30,7200,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,true
8,2008-05-19,275.6,,,5,5,7250,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,true
9,2008-05-19,246.0,,,5,5,7300,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,true
10,2008-05-19,218.2,,,75,75,7350,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,true


In [10]:
writetable("../data/singleExpiry.csv", expiryData)