In [4]:
# ThoroughBet Simulation


## Load necessary modules

In [27]:
import numpy as np

from utils import timestamp, YEAR
from utils.arrayview import ArrayView, TimeseriesView


from prediction.models.preprocessing import Model
from prediction.models.prediction import factornames_trimmed
from prediction.models.parameters import factor_build_end

## Load data

In [28]:
av = ArrayView.from_file('../datadev/brain_final2cut.av.bcolz')

In [30]:
tsav = {}
sl = 0
while True:
    try:
        tsav[sl] = ArrayView.from_file('../datadev/brain_final2_slice_%i.av.bcolz' % sl)
    except ValueError:
        break
    sl += 1

## Preprocessing

In [31]:
mod = Model(av, oos_start=factor_build_end+YEAR)

In [32]:
HIGH_KURTOSIS_FACTORS_hashed = set(['z64f5be67e', 'z90adc182a', 'z7081bf371', 'z34b808e99', 'z757be272e', 'z5a85cd6a9', 'zf991b634a', 'z62651f605',
                                    'zd002b7067', 'z2ef7fedca', 'z6f11029f7', 'z412893062', 'z919b9585a', 'z89b0eda37', 'z31780b3f4', 'z6631693d3',
                                    'z0b27f29ad', 'zd7cd94e4c', 'zf5b2aef2a'])
PRICE_FACTORS_hashed = set(['zb392bb74a', 'z6809c316d', 'zd678f0538', 'z027f9f0f5', 'z88e79930c', 'z4a72dc02f', 'z1a3573928', 'z7b15df227'])

In [33]:
factors = mod._preprocess_factors(factornames_trimmed, high_kurtosis_factors=HIGH_KURTOSIS_FACTORS_hashed, price_factors=PRICE_FACTORS_hashed, verbose=True)

INFO:models:Getting factors from av and rescaling...


. . . . .

INFO:models:Filling in missing values...
INFO:models:Computing each factor as linear combination of all the others...


 . . . . .

INFO:models:Number of missing patterns: 10443


 . . . . . . . . . . 10000


## Fit conditional logit model to data

In [34]:
coefs, step1probs = mod.fit_slices(tsav, factors, depth=3, lmbd=10, verbose=True, fit_afresh=True)[:2]

INFO:models:Fitting...
INFO:models:Slice: 0





INFO:models:ll[-1851.6033229657546, -1998.6071724230685, -1998.6071724230685]
INFO:models:pr2[0.174621237456554, 0.1378185257174348, 0.1378185257174348]
INFO:models:Slice: 1





INFO:models:ll[-1854.8089622579307, -1994.3388084416135, -1994.3388084416135]
INFO:models:pr2[0.17242800791742552, 0.13965986022332655, 0.13965986022332655]
INFO:models:Slice: 2





INFO:models:ll[-1858.297599771405, -1991.6021727256239, -1991.6021727256239]
INFO:models:pr2[0.17046013553599648, 0.1408404206900068, 0.1408404206900068]
INFO:models:Slice: 3





INFO:models:ll[-1859.4523284504523, -1988.0848318949927, -1988.0848318949927]
INFO:models:pr2[0.16932650910723956, 0.14235777044475184, 0.14235777044475184]
INFO:models:Slice: 4





INFO:models:ll[-1872.8383367349022, -1975.0470900066396, -1975.0470900066396]
INFO:models:pr2[0.16062598782885096, 0.14798213709254471, 0.14798213709254471]
INFO:models:Slice: 5





INFO:models:ll[-1887.9521239369496, -1964.941503433573, -1964.941503433573]
INFO:models:pr2[0.15193222721268829, 0.15234159784615209, 0.15234159784615209]
INFO:models:Slice: 6





INFO:models:ll[-1914.7368860848851, -1949.833046370755, -1949.833046370755]
INFO:models:pr2[0.13699354058272784, 0.15885925272315449, 0.15885925272315449]
INFO:models:Slice: 7





INFO:models:ll[-1925.5553811555162, -1942.2966968713106, -1942.2966968713106]
INFO:models:pr2[0.13084608382851137, 0.16211036730524719, 0.16211036730524719]
INFO:models:Slice: 8





INFO:models:ll[-1926.5295781463274, -1942.0676593556445, -1942.0676593556445]
INFO:models:pr2[0.12899280223685305, 0.16220917206570629, 0.16220917206570629]
INFO:models:Slice: 9





INFO:models:ll[-1932.309038966162, -1936.1950033115759, -1936.1950033115759]
INFO:models:pr2[0.12542470679859341, 0.16474258399171815, 0.16474258399171815]
INFO:models:Compute step 1 probabilities...


. . . . . . . . . . 10


INFO:models:Done.


In [35]:
import pickle

In [36]:
def write_simdata(step1probs, oos, coefs, filename = 'simdata.p'):
    '''
    <step1probs> is expected to be a matrix N_slices x len(av). 
    <oos> is a boolean mask denoting the out of sample range. len(oos) shoud equal len(av)
    <coefs> is a coefficient matrix with the size N_slices x 3
    '''
    f = file('../datadev/' + filename, 'wb')
    pickle.dump([step1probs[:, oos], oos, coefs], f)
    f.close()

In [37]:
write_simdata(step1probs, mod.oos, coefs)