In [300]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from keras.optimizers import SGD, Adam, RMSprop, Adadelta
from keras.utils import np_utils, plot_model
from sys import stdout

In [301]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('font', family = 'serif', size = 17)
mpl.rcParams['xtick.major.size'] = 5
mpl.rcParams['xtick.minor.size'] = 2
mpl.rcParams['ytick.major.size'] = 5
mpl.rcParams['ytick.minor.size'] = 2

In [302]:
fp = open('data/train.txt','r')
raw_x = []
raw_y = []
for line in fp:
    tmp = list(map(float, line.split(' ')))
    a = tmp[-1]
    raw_x.append(np.array(tmp[:-1]))
    if(a > 0.5):
        raw_y.append(2)
    elif(a < -0.5):
        raw_y.append(1)
    else:
        raw_y.append(0)
fp.close()
raw_y = np.array(raw_y)

In [303]:
def getMat(arr, win):
    ans = []
    N = len(arr) - win + 1
    batch_size = N/50
    for i in range(len(arr) - win + 1):
        if(i%batch_size == 0):
            stdout.write('=')
            stdout.flush()
        ans.append(np.array([np.array(vec) for vec in arr[i:i+win]]))
    return ans

In [304]:
print np.shape(raw_x), np.shape(raw_y)
n_feat = np.shape(raw_x)[1]
print n_feat

(1262394, 13) (1262394,)
13


### Getting price information

In [None]:
fp = open('data/prcs.txt','r')
prc = []
for line in fp:
    prc.append(np.array(map(float, line.split(' '))))
print np.shape(prc)
ask = prc[0]
bid = prc[1]
print ask[:5]
print bid[:5]

(2, 1262394)
[ 10027.  10027.  10027.  10027.  10027.]
[ 10026.  10026.  10026.  10026.  10026.]


### Generating matrix data

In [None]:
win = 25
raw_x = getMat(raw_x , win)
raw_y = raw_y[win-1:]
ask = ask[win-1:]
bid = bid[win-1:]
print "\n",np.shape(raw_x), np.shape(raw_y), np.shape(ask), np.shape(bid)



In [None]:
N_train = len(raw_y)*7/10
N_test = len(raw_y) - N_train
x_train = raw_x[:N_train]
x_test = raw_x[N_train:]
y_train = raw_y[:N_train]
y_test = raw_y[N_train:]
ask_test = ask[N_train:]
bid_test = bid[N_train:]

In [None]:
def comp_cls_wts(y, pwr = 0.64):
    dic = {}
    for x in set(y):
        dic[x] = (len(y))**pwr/(list(y).count(x))**pwr
    return dic
cls_wts = comp_cls_wts(y_train)
cls_wts

In [None]:
y_train = np_utils.to_categorical(y_train, 3)
y_test = np_utils.to_categorical(y_test, 3)

In [None]:
x_train = np.array(x_train).reshape(N_train, win, n_feat, 1)
x_test = np.array(x_test).reshape(N_test, win, n_feat, 1)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [None]:
n_cls = 3

In [None]:
n_cls = 3
model = Sequential()
model.add(Conv2D(32, kernel_size = (7, 2*n_feat - 1), input_shape = (win, n_feat, 1), padding = 'same'))
model.add(Activation('relu'))
model.add(Dropout(0.12))
model.add(Conv2D(64, kernel_size = (7, n_feat), padding='valid'))
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(n_cls, activation = 'softmax'))
model.summary()

In [None]:
optimizer = SGD()
loss = 'categorical_crossentropy'
metrics = ['accuracy']
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
x_train = x_train[:240000]
y_train = y_train[:240000]
x_test = x_test[:100000]
y_test = y_test[:100000]

In [None]:
def getPnl(act, ask, bid, fee = 0.5):
    pnl = 0.
    pos = 0.
    prc = 0.
    n = len(act)
    flip = 0
    for i in range(n):
        if act[i] == 2 and pos < 1: 
            pnl -= (ask[i] + fee/(1-pos))*(1-pos)
            prc = (ask[i] + fee/(1-pos))
            pos = 1
            flip += 1
        elif act[i] == 1 and pos > -1:
            pnl += (bid[i] - fee/(pos+1))*(pos+1)
            prc = (bid[i] - fee/(pos+1))
            pos = -1
            flip += 1
    pnl += pos*prc
    return flip, pnl
print "start training"

In [None]:
steps = []
train_loss = []
test_loss = []
train_accu = []
test_accu = []
in_pnl = []
out_pnl = []
in_flip = []
out_flip = []
for i in range(400):
    steps.append(i)
    res = model.fit(x_train, y_train, batch_size=512, epochs = 10, verbose = 0, validation_data=(x_test, y_test), 
                    class_weight = cls_wts)
    lr, ar = model.evaluate(x_train, y_train, batch_size=128)
    train_loss.append(lr)
    train_accu.append(ar)
    le, ae = model.evaluate(x_test, y_test, batch_size=128)
    test_loss.append(le)
    test_accu.append(ae)
    print "\n The loss of the model is ",le
    print "The accuracy of the model is ",ae
    in_pred = model.predict_classes(x_train)
    out_pred = model.predict_classes(x_test)
    inf, inp = getPnl(in_pred, ask, bid)
    outf, outp = getPnl(out_pred, ask_test, bid_test)
    in_pnl.append(inp)
    in_flip.append(inf)
    out_pnl.append(outp)
    out_flip.append(outf)
    print "The (flip, pnl) for the in sample is ", inf, inp
    print "The (flip, pnl) for the out sample is ", outf, outp

In [None]:
fig, axes = plt.subplots(2,2, figsize = (12, 12))
fig.subplots_adjust(hspace = 0.4, wspace = 0.4)

axes[0][0].set_title('Loss')
axes[0][0].plot(steps, train_loss, label = 'train loss')
axes[0][0].plot(steps, test_loss, label = 'test loss')
axes[0][0].set_xlabel('# of steps')
axes[0][0].legend()

axes[0][1].set_title('Accuracy')
axes[0][1].plot(steps, train_accu, label = 'train accuracy')
axes[0][1].plot(steps, test_accu, label = 'test accuracy')
axes[0][1].set_xlabel('# of steps')
axes[0][1].legend()

axes[1][0].set_title('# of flips')
axes[1][0].plot(steps, in_flip, label ='in sample')
axes[1][0].plot(steps, out_flip, label ='out sample')
axes[1][0].set_xlabel('# of steps')
axes[1][0].legend()

axes[1][1].set_title('Total pnl')
axes[1][1].plot(steps, in_pnl, label ='in sample')
axes[1][1].plot(steps, out_pnl, label ='out sample')
axes[1][1].set_xlabel('# of steps')
axes[1][1].legend()
