In [None]:
import sys  
# import DatasetCreation from Data preparation in order to load training data
sys.path.insert(0, '.../src/Ann/DatasetCreation/')


In [None]:
# MAKE COMPUTABLE FOR CPU (no nvidia card available)
# TODO: MICROSOFT AZURE CONFIGURATION
import tensorflow as tf
from keras import backend as K

num_cores = 8

num_CPU = 1
num_GPU = 0

config = tf.ConfigProto(intra_op_parallelism_threads=num_cores,\
        inter_op_parallelism_threads=num_cores, allow_soft_placement=True,\
        device_count = {'CPU' : num_CPU, 'GPU' : num_GPU})

session = tf.Session(config=config)

K.set_session(session)

In [None]:
# HELPER FUNCTION: prevents data bias toward upwards market trend
def balance_short_long(targets_train, training_indizes):
    
    n_more_longs_than_shorts = sum(targets_train[training_indizes])
    algebraic_sign = 0
    if  n_more_longs_than_shorts > 0:
        algebraic_sign = 1
    else:
        algebraic_sign = -1
    i = 0
    
    # for loop not optimal (vectorization is), but only a couple thousand operations each run
    while  n_more_longs_than_shorts*algebraic_sign > 0:
        random_index = np.random.randint(0, len(training_indizes))
        if targets_train[training_indizes[random_index]]*algebraic_sign > 0:
            training_indizes = np.delete(training_indizes, random_index)
            i = i+1
             n_more_longs_than_shorts =  n_more_longs_than_shorts - algebraic_sign

    print(sum(targets_train[training_indizes]))
    return training_indizes

In [None]:
# CREATE TRAINING DATASET
import time 
import ann3xTrainingDatasetService

start = time.process_time()

service = ann3xTrainingDatasetService.ann3xTrainingDatasetService()

# not sampling with randomized 0.3/0.7 datasets, because of strong korrelation of different stocks through beta (overall market movement)
# solution: test on future market activity

dly_cnn_input, \
dly_mlp_input, \
wkly_cnn_input, \
wkly_mlp_input, \
dly_indicator_cnn_input, \
wkly_indicator_cnn_input, \
targets, changes = service.get_inputs_and_target(
            year_array = [2013,2014,2015,2016,2017,2018],
            quarter_array = [1,2,3,4],
            time_steps_dly = 100,
            time_steps_wkly = 50,
            indicator_length_dly = 20,
            indicator_length_wkly = 20)


print("Time to load training data in seconds:")
print(time.process_time() - start)

In [None]:
# CREATE TEST DATASET
import time

start = time.process_time()

service = ann3xTrainingDatasetService.ann3xTrainingDatasetService()

dly_cnn_input_test, \
dly_mlp_input_test, \
wkly_cnn_input_test, \
wkly_mlp_input_test, \
dly_indicator_cnn_input_test, \
wkly_indicator_cnn_input_test, \
targets_test, changes_test = service.get_inputs_and_target(
            year_array = [2019],
            quarter_array = [1,2,3,4],
            time_steps_dly = 100,
            time_steps_wkly = 50,
            indicator_length_dly = 20,
            indicator_length_wkly = 20)


print("Time to load testing data in seconds:")
print(time.process_time() - start)

In [None]:
# CUSTOMIZED LOSS FUNCTION
def trading_loss(y_true, y_pred):
    # Squared Mean Error with 4 times factored error for trading loss
    # loss function still up for discussion, because MSE is not optimal for binary classification (long-short)
    # Nevertheless it was the best performing loss function in current constellation
    
    # check if prediction resulted in trading loss
    wrong_pred_factor = (tf.cast((y_true*y_pred)<0, 'float32')*3)
    
    loss = (0.5-K.mean(y_pred * y_true + (y_pred * y_true)*wrong_pred_factor, axis=-1))
    return (loss)

In [None]:
# ANN MODEL CLASS
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras.layers import Conv2D, MaxPooling2D, Conv1D, MaxPooling1D
from keras.layers import Flatten
from keras.layers import Input
from keras.layers import LSTM
from keras.models import Model

def create_mlp(dim, regress=False):
    model = Sequential()
    model.add(Dense(256, input_dim=dim, activation="tanh"))
    model.add(Dropout(0.5))
    model.add(Dense(128,activation="tanh"))
    model.add(Dropout(0.4))
    model.add(Dense(64,activation="tanh"))
    model.add(Dropout(0.3))
    model.add(Dense(32,activation="tanh"))
    model.add(Dropout(0.2))
    model.add(Dense(16, activation="tanh"))
    model.add(Dropout(0.1))
    model.add(Dense(8, activation="tanh"))
    model.add(Dense(3, activation="tanh"))
    # check to see if the regression node should be added
    if regress:
        model.add(Dense(1, activation="tanh")))
    return model

def create_cnn(n_days, n_features, filters, kernel_size, regress=False):
    # CNN with 1D convolution for time series
    model = Sequential()
    
    # Convolutional layers
    if n_days > 50:
        model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu', input_shape=(n_days,n_features)))
        model.add(Dropout(0.3))
        model.add(MaxPooling1D(2))
        model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu'))
        model.add(Dropout(0.3))
        model.add(MaxPooling1D(2))
    else:
        model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu', input_shape=(n_days,n_features)))
        model.add(Dropout(0.3))
        model.add(MaxPooling1D(2))
        
    model.add(MaxPooling1D(2))
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu'))
    model.add(Dropout(0.2))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Flatten())
    if (n_features > 5):
        model.add(Dense(75, activation='tanh'))
        
    # Fully connected layers
    model.add(Dense(36, activation='tanh'))
    model.add(Dropout(0.1))
    model.add(Dense(3, activation='tanh'))
    if regress:
        model.add(Dense(1, activation="tanh"))
    # return the CNN
    return model

def create_lstm(n_days, n_features,  regress=False):
    model = Sequential()
    model.add(LSTM(32, return_sequences=False, dropout_U = 0.1, dropout_W = 0.1, input_shape=(n_days,n_features)))
    model.add(Dense(16, activation='tanh'))
    model.add(Dense(8, activation='tanh'))
    model.add(Dense(3, activation='tanh'))
    if regress:
        model.add(Dense(1, activation="tanh"))
    return model


In [None]:
# import the necessary packages
from sklearn.model_selection import train_test_split
from keras.layers.core import Dense
from keras.models import Model
from keras.optimizers import Adam, SGD
from keras.layers import concatenate
import keras
import numpy as np
import argparse
import locale
import os


# create the MLP and CNN models
mlp = create_mlp(dly_mlp_input_train.shape[1], regress=False)
mlp_wkly = create_mlp(wkly_mlp_input_train.shape[1], regress=False)
cnn = create_cnn(dly_cnn_input_train.shape[1], dly_cnn_input_train.shape[2], 256 , 3, regress=False)
cnn_wkly = create_cnn(wkly_cnn_input_train.shape[1], wkly_cnn_input_train.shape[2], 256 , 3, regress=False)

# create the input to our final set of layers as the *output* of the MLP and CNN models
combinedInput = concatenate([cnn.output, mlp.output, mlp_wkly.output, cnn_wkly.output])

# our final FC layer
x = Dense(18, activation="tanh")(combinedInput)
x = Dense(9, activation="tanh")(x)
x = Dense(3, activation="tanh")(x)
x = Dense(1, activation="tanh")(x)

model = Model(inputs=[cnn.input, mlp.input, mlp_wkly.input, cnn_wkly.input], outputs=x)

model.compile(  loss = trading_loss, optimizer='SGD', metrics = ['accuracy'])

print("[INFO] training model...")

epochs = 1000
history = model.fit(
            [dly_cnn_input_train, dly_mlp_input_train, wkly_mlp_input_train, wkly_cnn_input_train], targets_train,
            validation_data=([dly_cnn_input_test, dly_mlp_input_test, wkly_mlp_input_test, wkly_cnn_input_test], targets_test),
            epochs=epochs, batch_size=32)
i= epochs

In [None]:
from matplotlib import pyplot as plt
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.xscale('log')
plt.show()

In [None]:
# PRINT MODEL OUTPUT FOR ANALYSIS
import numpy as np
import matplotlib.pyplot as plt


predictions = model.predict([dly_cnn_input_test, dly_mlp_input_test, wkly_mlp_input_test, wkly_cnn_input_test])

predictions = predictions.flatten()
actual_changes = changes_test

Y = np.arange(len(actual_changes))

print('---ANN output---')
fig = plt.figure()
ax = plt.subplot(111)
ax.bar(Y, predictions, width=1, color='b')

In [None]:
# SAVE MODEL 
import pymongo
import pandas as pd
import numpy as np
from sklearn import datasets
import pickle
import time
import pymongo

def save_model_to_db(model, client, db, dbconnection, model_name):
    import pickle
    import time
    import pymongo
    #pickling the model
    pickled_model = pickle.dumps(model)
    
    #saving model to mongoDB
    # creating connection
    myclient = pymongo.MongoClient(client)
    
    #creating database in mongodb
    mydb = myclient[db]
    
    #creating collection
    mycon = mydb[dbconnection]
    info = mycon.insert_one({model_name: pickled_model, 'name': model_name, 'created_time':time.time()})
    print(info.inserted_id, ' saved with this id successfully!')
    
    details = {
        'inserted_id':info.inserted_id,
        'model_name':model_name,
        'created_time':time.time()
    }
    
    return details

def load_saved_model_from_db(model_name, client, db, dbconnection):
    json_data = {}
    
    #saving model to mongoDB
    # creating connection
    myclient = pymongo.MongoClient(client)
    
    #creating database in mongodb
    mydb = myclient[db]
    
    #creating collection
    mycon = mydb[dbconnection]
    data = mycon.find({'name': model_name})
    
    
    for i in data:
        json_data = i
    #fetching model from db
    pickled_model = json_data[model_name]
    
    return pickle.loads(pickled_model)

In [None]:
import keras.losses

# add customized loss to keras losses, in order to be able to save model in MongoDB
keras.losses.trading_loss = trading_loss

#saving model to mongo
details = save_model_to_db(model = model, client ='mongodb://localhost:27017/', db = 'stockTraderDB', 
                 dbconnection = 'models', model_name = 'TripleXAnnModel')