In [None]:
import scipy
import sklearn
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import os

In [None]:
from backtester.dataSource.csv_data_source import CsvDataSource

In [None]:
# Load the data
cachedFolderName = '/data/'
dataSetId = 'trainingData1'
startDate = '2017/01/06'
endDate = '2017/02/09'
instrumentIds = ['MQK']
downloadUrl = 'https://github.com/Auquan/auquan-historical-data/raw/master/qq2Data'

ds = CsvDataSource(cachedFolderName='historicalData/',
                             dataSetId=dataSetId,
                             instrumentIds=instrumentIds,
                             downloadUrl = downloadUrl,
                             timeKey = 'datetime',
                             timeStringFormat = '%Y-%m-%d %H:%M:%S',
                             startDateStr=startDate,
                             endDateStr=endDate,
                             liveUpdates=False,
                             pad=True)


In [None]:
#Loading our data
def loadData(ds, id):
    data = ds._bookDataByInstrument[id].getBookData()

    data['Stock Price'] =  (data['stockTopBidPrice'] +\
                           data['stockTopAskPrice']) / 2.0
    data['Future Price'] = (data['futureTopBidPrice'] +\
                           data['futureTopAskPrice']) / 2.0
    data['Y(Target)'] = data['basis'].shift(-5)
    del data['benchmark_score']
    del data['FairValue']
    return data

data = loadData(ds,instrumentIds[0])

In [None]:
# Training Data
dataSetId =  'trainingData1'
ds_training = CsvDataSource(cachedFolderName='historicalData/',
                             dataSetId=dataSetId,
                             instrumentIds=instrumentIds,
                             downloadUrl = downloadUrl,
                             timeKey = 'datetime',
                             timeStringFormat = '%Y-%m-%d %H:%M:%S',
                             liveUpdates=False,
                             pad=True)

training_data = loadData(ds_training, instrumentIds[0])

# Validation Data
dataSetId =  'trainingData2'
ds_validation = CsvDataSource(cachedFolderName='historicalData/',
                             dataSetId=dataSetId,
                             instrumentIds=instrumentIds,
                             downloadUrl = downloadUrl,
                             timeKey = 'datetime',
                             timeStringFormat = '%Y-%m-%d %H:%M:%S',
                             liveUpdates=False,
                             pad=True)
validation_data = loadData(ds_validation, instrumentIds[0])

# Test Data
dataSetId =  'trainingData3'
ds_test = CsvDataSource(cachedFolderName='historicalData/',
                             dataSetId=dataSetId,
                             instrumentIds=instrumentIds,
                             downloadUrl = downloadUrl,
                             timeKey = 'datetime',
                             timeStringFormat = '%Y-%m-%d %H:%M:%S',
                             liveUpdates=False,
                             pad=True)
out_of_sample_test_data = loadData(ds_test, instrumentIds[0])

In [None]:
def prepareData(data, period):
    data['Y(Target)'] = data['basis'].rolling(period).mean().shift(-period)
    if 'FairValue' in data.columns:
        del data['FairValue']
    data.dropna(inplace=True)

period = 5
prepareData(training_data, period)
prepareData(validation_data, period)
prepareData(out_of_sample_test_data, period)

In [None]:
def difference(dataDf, period):
    return dataDf.sub(dataDf.shift(period), fill_value=0)

def ewm(dataDf, halflife):
    return dataDf.ewm(halflife=halflife,ignore_na=False,min_periods=0,adjust=True).mean()

def rsi(data, period):
    data_upside = data.sub(data.shift(1), fill_value=0)
    data_downside = data_upside.copy()
    data_downside[data_upside > 0] = 0
    data_upside[data_upside < 0] = 0
    avg_upside = data_upside.rolling(period).mean()
    avg_downside = - data_downside.rolling(period).mean()
    rsi = 100 - (100 * avg_downside / (avg_downside + avg_upside))
    rsi[avg_downside == 0] = 100
    rsi[(avg_downside == 0) & (avg_upside == 0)] = 0

    return rsi

In [None]:
def create_features_again(data):
    basis_X = pd.DataFrame(index = data.index, columns =  [])
    
    basis_X['mom10'] = difference(data['basis'],11)
    
    basis_X['emabasis2'] = ewm(data['basis'],2)
    basis_X['emabasis5'] = ewm(data['basis'],5)
    basis_X['emabasis10'] = ewm(data['basis'],10)

    basis_X['basis'] = data['basis']

    basis_X['totalaskvolratio'] = (data['stockTotalAskVol']-data['futureTotalAskVol'])/100000
    basis_X['totalbidvolratio'] = (data['stockTotalBidVol']-data['futureTotalBidVol'])/100000
    
    basis_X = basis_X.fillna(0)
    
    basis_y = data['Y(Target)']
    basis_y.dropna(inplace=True)
    
    print("Any null data in y: %s, X: %s"%(basis_y.isnull().values.any(), basis_X.isnull().values.any()))
    print("Length y: %s, X: %s"%(len(basis_y.index), len(basis_X.index)))
    
    return basis_X, basis_y

In [None]:
basis_X_test, basis_y_test = create_features_again(validation_data)
basis_X_train, basis_y_train = create_features_again(training_data)

In [None]:
from tensorflow import keras

In [None]:
model_cnn = keras.models.Sequential()
model_cnn.add(keras.layers.InputLayer(input_shape=[7, 1]))
for dilation_rate in (1, 2, 4):
    model_cnn.add(
      keras.layers.Conv1D(filters=4,
                          kernel_size=2,
                          strides=1,
                          dilation_rate=dilation_rate,
                          padding="causal",
                          activation="relu")
    )
model_cnn.add(keras.layers.Conv1D(filters=2, kernel_size=1))
model_cnn.add(keras.layers.MaxPooling1D(pool_size=2))
model_cnn.add(keras.layers.Flatten())
model_cnn.add(keras.layers.Dense(1))
model_cnn.compile(optimizer='adam', loss='mae')