In [7]:
# Imports and pre-defined functions
from keras.models import Sequential, Model, Input
from keras.layers import LSTM, Dense, Dropout, BatchNormalization
import numpy as np
import pandas as pd
from tcn import TCN

def readTrain():
    train = pd.read_csv("./train_data.csv")
    data = train[['MidPrice', 'BidPrice1', 'BidVolume1', 'AskPrice1', 'AskVolume1']]
    data['BidVolume1'] -= data['AskVolume1']
    data = data.drop(columns=['AskVolume1'])
    return data

def readPredict():
    test = pd.read_csv("./test_data.csv")
    data = test[['MidPrice', 'BidPrice1', 'BidVolume1', 'AskPrice1', 'AskVolume1']]
    data['BidVolume1'] -= data['AskVolume1']
    data = data.drop(columns=['AskVolume1'])
    return data

def normalize(train):
    train_norm = train.apply(lambda x: (x - np.mean(x)) / (np.std(x)))
    return train_norm

def buildY_data(train):
    y_train = []
    
    for i in range(train.shape[0] - 35):
        if not (i % 10000):
            print("BUILDING ITERATIONS: ", i)
        y_train.append(1000*(np.sum(np.array(train['MidPrice'][i+10:i+30]))/20 - train['MidPrice'][i+9]))
        
    return np.array(y_train)

def buildX_data(train):
    training = np.array(train)
    X_train = []
    
    for i in range(train.shape[0] - 35):
        if not (i % 10000):
            print("BUILDING ITERATIONS: ", i)
        X_train.append(np.array(list(np.array(training[i:i+10]))))
    
    return np.array(X_train)

def shuffle(X,Y):
    np.random.seed(10)
    randomList = np.arange(X.shape[0])
    np.random.shuffle(randomList)
    return X[randomList], Y[randomList]

def splitData(X,Y,rate):
    X_train = X[int(X.shape[0]*rate):]
    Y_train = Y[int(Y.shape[0]*rate):]
    X_val = X[:int(X.shape[0]*rate)]
    Y_val = Y[:int(Y.shape[0]*rate)]
    return X_train, Y_train, X_val, Y_val

In [45]:
# read csv
train = readTrain()
print(train)

# Normalization
Y_train = buildY_data(train)
train_norm = normalize(train)
X_train = buildX_data(train_norm)

# shuffle the data, and random seed is 10
X_train, Y_train = shuffle(X_train, Y_train)

# split training data and validation data
X_train, Y_train, X_val, Y_val = splitData(X_train, Y_train, 0.15)
print("X: ", X_train, "Y", Y_train)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()


        MidPrice  BidPrice1  BidVolume1  AskPrice1
0         3.7865      3.786      8600.0      3.787
1         3.7835      3.783    -75000.0      3.784
2         3.7835      3.783    -18400.0      3.784
3         3.7845      3.784    129000.0      3.785
4         3.7835      3.783    147600.0      3.784
5         3.7835      3.783    281900.0      3.784
6         3.7860      3.784    409200.0      3.788
7         3.7860      3.784     47300.0      3.788
8         3.7860      3.784    -69400.0      3.788
9         3.7855      3.784    -71300.0      3.787
10        3.7860      3.784    -54100.0      3.788
11        3.7870      3.786    -83900.0      3.788
12        3.7875      3.787     -5200.0      3.788
13        3.7855      3.785     22400.0      3.786
14        3.7855      3.785     22500.0      3.786
15        3.7845      3.784    304800.0      3.785
16        3.7845      3.784    291200.0      3.785
17        3.7835      3.783    340600.0      3.784
18        3.7840      3.783    

In [84]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

model = Sequential()
i = Input(batch_shape=(None, 10, 4))

o = TCN(nb_filters=128, return_sequences=True, name='TCN_1')(i)  # The TCN layers are here.
o = TCN(nb_filters=64, return_sequences=True, name="TCN_2")(o)
o = TCN(nb_filters=32, return_sequences=True, name="TCN_3")(o)
o = TCN(nb_filters=16, return_sequences=False, name="TCN_4")(o)
o = Dense(30)(o)
o = Dense(10)(o)
o = Dense(1)(o)

m = Model(inputs=[i], outputs=[o])
model.add(m)
model.summary()

model.compile(loss='mse', optimizer='adam')
callback = EarlyStopping(monitor="loss", patience=1, verbose=1, mode="auto")
model.fit(X_train, Y_train, epochs=10, batch_size=200, validation_data=(X_val, Y_val), callbacks=[callback])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model_26 (Model)             (None, 1)                 406895    
Total params: 406,895
Trainable params: 406,895
Non-trainable params: 0
_________________________________________________________________
Train on 365504 samples, validate on 64500 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1e1d456d8>

In [85]:
# read csv
predict = readPredict()

# Normalization
p = normalize(predict)
predict_norm = np.array(p)

X_predict = []    
y_predict_add = []
for i in range(0, predict_norm.shape[0], 10):
    if not (i % 10000):
        print("BUILDING ITERATIONS: ", i)
    X_predict.append(np.array(predict_norm[i:i+10]))
    y_predict_add.append([predict['MidPrice'][i+9]])

print(X_predict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


BUILDING ITERATIONS:  0
[array([[ 1.95438877,  1.95516143, -0.07608349,  1.95360567],
       [ 1.95438877,  1.95516143,  0.07611993,  1.95360567],
       [ 2.00238003,  1.99782393, -0.81314848,  2.00692451],
       [ 2.01837712,  2.01915518,  0.65109653,  2.01758828],
       [ 2.02370948,  2.01915518,  0.3081636 ,  2.02825205],
       [ 1.94372404,  1.94449581,  0.02656828,  1.9429419 ],
       [ 1.94372404,  1.94449581,  0.02656828,  1.9429419 ],
       [ 1.98638295,  1.9871583 ,  0.08270992,  1.98559697],
       [ 1.98638295,  1.9871583 ,  0.06458745,  1.98559697],
       [ 1.99704767,  1.9871583 , -0.16378104,  2.00692451]]), array([[ 2.0077124 ,  2.00848955, -0.66816871,  2.00692451],
       [ 2.0077124 ,  2.00848955,  0.00907946,  2.00692451],
       [ 2.0077124 ,  2.00848955,  0.03822749,  2.00692451],
       [ 2.0077124 ,  2.00848955, -0.02830606,  2.00692451],
       [ 2.0077124 ,  2.00848955, -0.29317295,  2.00692451],
       [ 2.0077124 ,  2.00848955, -0.26339127,  2.00692451

In [86]:
y_pred = model.predict(np.array(X_predict))
y_pred /= 1000
cnt=0
print(predict)
for i in range(0, predict_norm.shape[0], 10):
    y_pred[cnt][0]+=predict['MidPrice'][i+9]
    cnt+=1
print(y_pred)

      MidPrice  BidPrice1  BidVolume1  AskPrice1
0       3.4255      3.425    -74400.0      3.426
1       3.4255      3.425     45700.0      3.426
2       3.4300      3.429   -656000.0      3.431
3       3.4315      3.431    499400.0      3.432
4       3.4320      3.431    228800.0      3.433
5       3.4245      3.424      6600.0      3.425
6       3.4245      3.424      6600.0      3.425
7       3.4285      3.428     50900.0      3.429
8       3.4285      3.428     36600.0      3.429
9       3.4295      3.428   -143600.0      3.431
10      3.4305      3.430   -541600.0      3.431
11      3.4305      3.430     -7200.0      3.431
12      3.4305      3.430     15800.0      3.431
13      3.4305      3.430    -36700.0      3.431
14      3.4305      3.430   -245700.0      3.431
15      3.4305      3.430   -222200.0      3.431
16      3.4315      3.431     14500.0      3.432
17      3.4315      3.431     31600.0      3.432
18      3.4315      3.431   -529000.0      3.432
19      3.4315      

In [87]:
import csv
def predict_writeout(predict):
    # write csv
    with open("./predict.csv", "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(
            ["Id", "MidPrice"])
        for i in range(0, len(predict)):
            writer.writerow([(1 + i), predict[i][0]])

predict_writeout(y_pred)