In [1]:
using DataFrames
using Dates

#### Loading data

In [16]:
@time optData = readtable("../data/intmed_data/optData.csv")

head(optData)

elapsed time: 12.954250909 seconds (3564920424 bytes allocated, 11.75% gc time)


Unnamed: 0,Date,Option_Price,Bid,Ask,Volume,Open_Interest,Strike,Expiry,DAX,EONIA_matched,Time_to_Maturity,IsCall
1,732495,3931.1,,,1,104,1800,732660,5712.69,0.031667592146348,0.466666666666667,True
2,732495,0.1,,,0,5515,1800,732660,5712.69,0.0316675921463482,0.466666666666667,False
3,732495,3734.0,,,0,2152,2000,732660,5712.69,0.0316675921463482,0.466666666666667,True
4,732495,0.1,,,0,20941,2000,732660,5712.69,0.0316675921463482,0.466666666666667,False
5,732495,3536.9,,,0,2,2200,732660,5712.69,0.0316675921463482,0.466666666666667,True
6,732495,0.1,,,0,4626,2200,732660,5712.69,0.0316675921463482,0.466666666666667,False


#### Split data into tables

- get **underlying**: get value for each day, transform dates to `Date` type

In [18]:
underlyingRaw = unique([optData[:Date].data optData[:DAX].data], 1)

dats = Date[Date(Dates.UTD(int(x))) for x in underlyingRaw[:, 1]]
underlying = DataFrame(Date = dats, DAX = underlyingRaw[:, 2])

head(underlying)

Unnamed: 0,Date,DAX
1,2006-07-03,5712.69
2,2006-07-04,5729.01
3,2006-07-05,5625.63
4,2006-07-06,5695.47
5,2006-07-07,5681.85
6,2006-07-10,5706.32


In [19]:
function getAllOptions_num_dates(df::DataFrame)
    arrData = [df[:Strike].data df[:Expiry].data df[:IsCall].data]
    return unique(arrData, 1)
end

@time allOpts = getAllOptions_num_dates(optData)
size(allOpts)

elapsed time: 0.47628593 seconds (93393484 bytes allocated, 7.37% gc time)


(12917,3)

In [36]:
# sort options
df = DataFrame(Strike = allOpts[:, 1], Expiry = allOpts[:, 2], IsCall = allOpts[:, 3])
df_sorted = sort(df, cols = [:Expiry, :IsCall, :Strike])

head(df_sorted)

Unnamed: 0,Strike,Expiry,IsCall
1,4200,732513,0
2,4300,732513,0
3,4400,732513,0
4,4500,732513,0
5,4600,732513,0
6,4700,732513,0


In [39]:
# create string identifiers (allows usage as column names)
ids = String[string("o", x) for x in 1:size(df_sorted, 1)]

12917-element Array{String,1}:
 "o1"    
 "o2"    
 "o3"    
 "o4"    
 "o5"    
 "o6"    
 "o7"    
 "o8"    
 "o9"    
 "o10"   
 "o11"   
 "o12"   
 "o13"   
 ⋮       
 "o12906"
 "o12907"
 "o12908"
 "o12909"
 "o12910"
 "o12911"
 "o12912"
 "o12913"
 "o12914"
 "o12915"
 "o12916"
 "o12917"

In [41]:
# get expiry dates as Date
expy = df_sorted[:Expiry]
expyDats = Date[Date(Dates.UTD(int(x))) for x in expy]

12917-element Array{Date,1}:
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 2006-07-21
 ⋮         
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19
 2014-12-19

In [43]:
opts = DataFrame(ID = ids, Expiry = expyDats, Strike = df_sorted[:Strike], IsCall = df_sorted[:IsCall])
head(opts)

Unnamed: 0,ID,Expiry,Strike,IsCall
1,o1,2006-07-21,4200,0
2,o2,2006-07-21,4300,0
3,o3,2006-07-21,4400,0
4,o4,2006-07-21,4500,0
5,o5,2006-07-21,4600,0
6,o6,2006-07-21,4700,0


In [52]:
obs_optParams = [array(optData[[:Expiry, :Strike]]) optData[:IsCall]]

2025129x3 Array{Int64,2}:
 732660  1800  1
 732660  1800  0
 732660  2000  1
 732660  2000  0
 732660  2200  1
 732660  2200  0
 732660  2400  1
 732660  2400  0
 732660  2600  0
 732660  2800  1
 732660  2800  0
 732660  3000  1
 732660  3000  0
      ⋮         
 735250  9650  0
 735250  9700  1
 735250  9700  0
 735250  9750  1
 735250  9750  0
 735250  9800  1
 735250  9800  0
 735250  9850  1
 735250  9900  1
 735250  9900  0
 735250  9950  1
 735250  9950  0

In [59]:
numDates = Int[Dates.value(x) for x in opts[:Expiry]]
opts_optParams = [numDates array(opts[[:Strike, :IsCall]])]

12917x3 Array{Int64,2}:
 732513   4200  0
 732513   4300  0
 732513   4400  0
 732513   4500  0
 732513   4600  0
 732513   4700  0
 732513   4800  0
 732513   4850  0
 732513   4900  0
 732513   4950  0
 732513   5000  0
 732513   5050  0
 732513   5100  0
      ⋮          
 735586  10000  1
 735586  10100  1
 735586  10200  1
 735586  10300  1
 735586  10400  1
 735586  10500  1
 735586  10600  1
 735586  10700  1
 735586  11000  1
 735586  11400  1
 735586  11600  1
 735586  12000  1

- get option ID for each observation

In [72]:
function findEqualRow(rowToFind::Array{Int, 2}, listedRows::Array{Int, 2})
    nRows, nCols = size(listedRows)
    equTrue = trues(nRows)
    for jj=1:nCols
        equTrue = equTrue & (listedRows[:, jj] .== rowToFind[1, jj])
    end
    return find(equTrue)
end

findEqualRow (generic function with 1 method)

In [73]:
findEqualRow(obs_optParams[1200, :], opts_optParams)

1-element Array{Int64,1}:
 226

In [74]:
obs_optParams[1200,:]

1x3 Array{Int64,2}:
 732569  6500  0

In [75]:
opts_optParams[226, :]

1x3 Array{Int64,2}:
 732569  6500  0

In [82]:
@time begin 
    nObs = size(obs_optParams, 1)
    obs_optIDs = Array(String, nObs)
    for ii=1:nObs
        currParams = obs_optParams[ii, :]
        ind = findEqualRow(currParams, opts_optParams)[1]
        obs_optIDs[ii] = opts[ind, :ID]
    end
end

elapsed time: 2269.044490644 seconds (684330751480 bytes allocated, 85.27% gc time)


In [83]:
obs_optIDs

2025129-element Array{String,1}:
 "o530"  
 "o457"  
 "o531"  
 "o458"  
 "o532"  
 "o459"  
 "o533"  
 "o460"  
 "o461"  
 "o534"  
 "o462"  
 "o535"  
 "o463"  
 ⋮       
 "o12200"
 "o12257"
 "o12201"
 "o12258"
 "o12202"
 "o12259"
 "o12203"
 "o12260"
 "o12261"
 "o12204"
 "o12262"
 "o12205"

In [86]:
optData[:ID] = obs_optIDs
head(optData)

Unnamed: 0,Date,Option_Price,Bid,Ask,Volume,Open_Interest,Strike,Expiry,DAX,EONIA_matched,Time_to_Maturity,IsCall,ID
1,732495,3931.1,,,1,104,1800,732660,5712.69,0.031667592146348,0.466666666666667,True,o530
2,732495,0.1,,,0,5515,1800,732660,5712.69,0.0316675921463482,0.466666666666667,False,o457
3,732495,3734.0,,,0,2152,2000,732660,5712.69,0.0316675921463482,0.466666666666667,True,o531
4,732495,0.1,,,0,20941,2000,732660,5712.69,0.0316675921463482,0.466666666666667,False,o458
5,732495,3536.9,,,0,2,2200,732660,5712.69,0.0316675921463482,0.466666666666667,True,o532
6,732495,0.1,,,0,4626,2200,732660,5712.69,0.0316675921463482,0.466666666666667,False,o459


In [91]:
writetable("../data/rel_data/optData.csv", optData)
writetable("../data/rel_data/underlying.csv", underlying)
writetable("../data/rel_data/opts.csv", opts)

- transform to dates

In [3]:
function num2date(numb::Int64)
    return Date(Dates.UTD(numb))
end
function num2date(numb::Array{Int64})
    nDats = size(numb, 1)
    dats = Array(Date, nDats)
    for ii=1:nDats
        dats[ii] = num2date(numb[ii])
    end
    return dats
end

@time begin
    optData[:Date] = num2date(optData[:Date].data)
    optData[:Expiry] = num2date(optData[:Expiry].data)
end
optData

elapsed time: 0.053756241 seconds (33650864 bytes allocated)


Unnamed: 0,Date,Option_Price,Bid,Ask,Volume,Open_Interest,Strike,Expiry,DAX,EONIA_matched,Time_to_Maturity,IsCall
1,2006-07-03,3931.1,,,1,104,1800,2006-12-15,5712.69,0.031667592146348,0.466666666666667,true
2,2006-07-03,0.1,,,0,5515,1800,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,false
3,2006-07-03,3734.0,,,0,2152,2000,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,true
4,2006-07-03,0.1,,,0,20941,2000,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,false
5,2006-07-03,3536.9,,,0,2,2200,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,true
6,2006-07-03,0.1,,,0,4626,2200,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,false
7,2006-07-03,3339.8,,,0,2009,2400,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,true
8,2006-07-03,0.1,,,0,13367,2400,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,false
9,2006-07-03,0.2,,,0,2297,2600,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,false
10,2006-07-03,2945.9,,,0,624,2800,2006-12-15,5712.69,0.0316675921463482,0.466666666666667,true


In [6]:
df = DataFrame(a = [3, 4], b = [5, 3], "4" = [4, 2])

LoadError: syntax: keyword argument is not a symbol: "4"
while loading In[6], in expression starting on line 1

- arbitrarily select an expiry date

In [6]:
chosenExpiry = unique(optData[:Expiry])[35]

2008-08-15

- get all option data with given expiry date

In [8]:
inds = optData[:Expiry] .== chosenExpiry
expiryData = optData[inds, :]

Unnamed: 0,Date,Option_Price,Bid,Ask,Volume,Open_Interest,Strike,Expiry,DAX,EONIA_matched,Time_to_Maturity,IsCall
1,2008-05-19,87.1,,,785,750,6700,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,false
2,2008-05-19,104.2,,,2,2,6800,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,false
3,2008-05-19,113.7,,,66,66,6850,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,false
4,2008-05-19,124.2,,,4,4,6900,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,false
5,2008-05-19,135.5,,,380,380,6950,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,false
6,2008-05-19,147.6,,,5,5,7000,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,false
7,2008-05-19,306.8,,,32,30,7200,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,true
8,2008-05-19,275.6,,,5,5,7250,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,true
9,2008-05-19,246.0,,,5,5,7300,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,true
10,2008-05-19,218.2,,,75,75,7350,2008-08-15,7225.94,0.0474534715097305,0.250980392156863,true


In [10]:
writetable("../data/singleExpiry.csv", expiryData)