In [1]:
import pandas as pd
import glob
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from keras.models import Sequential
from keras import layers
from keras.layers import Activation, Dense, Dropout
from keras.callbacks import EarlyStopping
from hyperas import optim
from hyperas.distributions import choice, uniform
from hyperopt import Trials, STATUS_OK, tpe


Using TensorFlow backend.


In [6]:
def data():
    """
    Data providing function:

    This function is separated from create_model() so that hyperopt
    won't reload data for each evaluation run.
    """
    df = pd.read_csv("./stockPrice.csv", header=0, na_values='.')

    lol = df.copy()
    lol["1pred"] = (df["close"].shift(-1) - df["close"]) / df["close"]
    lol["3pred"] = (df["close"].shift(-3) - df["close"]) / df["close"]
    lol["5pred"] = (df["close"].shift(-5) - df["close"]) / df["close"]
    lol["10pred"] = (df["close"].shift(-10) - df["close"]) / df["close"]

    lol.loc[df["close"].shift(-1) > df["close"] , '1predB'] = 0
    lol.loc[df["close"].shift(-1) < df["close"] , '1predB'] = 1
    lol.loc[df["close"].shift(-1) > df["close"] , '3predB'] = 0
    lol.loc[df["close"].shift(-1) < df["close"] , '3predB'] = 1
    lol.loc[df["close"].shift(-1) > df["close"] , '5predB'] = 0
    lol.loc[df["close"].shift(-1) < df["close"] , '5predB'] = 1
    lol.loc[df["close"].shift(-1) > df["close"] , '10predB'] = 0
    lol.loc[df["close"].shift(-1) < df["close"] , '10predB'] = 1

    lol["5max"] = lol["close"].rolling(window=5).max()
    lol["10max"] = lol["close"].rolling(window=10).max()
    lol["20max"] = lol["close"].rolling(window=20).max()

    lol["5low"] = lol["close"].rolling(window=5).min()
    lol["10low"] = lol["close"].rolling(window=10).min()
    lol["20low"] = lol["close"].rolling(window=20).min()

    lol["vol%"] =  (df["vol"] - df["vol"].shift(1)) /df["vol"].shift(1)

    lol = lol.dropna()    

    dfC = pd.DataFrame()
    dfC["vol"] = lol["vol%"]
    dfC["sma10"] = lol["sma10"] / lol["close"]
    dfC["sma20"] = lol["sma20"] / lol["close"]
    dfC["sma50"] = lol["sma50"] / lol["close"]
    dfC["sma100"] = lol["sma100"] / lol["close"]
    dfC["vwap"] = lol["vwap"]
    dfC["bbmid"] = lol["bbmid"] / lol["close"]
    dfC["bbUpper"] = lol["bbUpper"] / lol["close"]
    dfC["bbLower"] = lol["bbLower"] / lol["close"]
    dfC["cci"] = lol["cci"] 
    dfC["rsi"] = lol["rsi"] 
    dfC["5max"] = lol["5max"] / lol["close"]
    dfC["10max"] = lol["10max"] / lol["close"]
    dfC["20max"] = lol["20max"] / lol["close"]
    dfC["5low"] = lol["5low"] / lol["close"]
    dfC["10low"] = lol["10low"] / lol["close"]
    dfC["20low"] = lol["20low"] / lol["close"]
    dfC["1pred"] = lol["1pred"]
    dfC["3pred"] = lol["3pred"]
    dfC["5pred"] = lol["5pred"]
    dfC["10pred"] = lol["10pred"]
    dfC["1predB"] = lol["1predB"]
    dfC["3predB"] = lol["3predB"]
    dfC["5predB"] = lol["5predB"]
    dfC["10predB"] = lol["10predB"]

    dfC.replace([np.inf, -np.inf], np.nan)
    dfC.dropna(inplace=True)

    X = dfC[['sma10', 'sma20', 'sma50', 'sma100', 'vwap', 'bbmid', 'bbUpper', 'bbLower', 'cci', 'rsi', '5max', '10max', '20max', '5low', '10low', '20low']]
    Y = dfC[["5predB"]]


    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

    return x_train, y_train, x_test, y_test


def create_model(x_train, y_train, x_test, y_test):
    """
    Model providing function:

    Create Keras model with double curly brackets dropped-in as needed.
    Return value has to be a valid python dictionary with two customary keys:
        - loss: Specify a numeric evaluation metric to be minimized
        - status: Just use STATUS_OK and see hyperopt documentation if not feasible
    The last one is optional, though recommended, namely:
        - model: specify the model just created so that we can later use it again.
    """
    input_dim = x_train.shape[1]  # Number of features
    output_dim = y_train.shape[1]  # Number of features
    print(input_dim)

        with tf.device('/gpu:1'):
        model = Sequential()

        model.add(Dense(512, input_dim=input_dim))
        model.add(Activation({{choice(['relu', 'sigmoid','softmax'])}}))
        model.add(Dense({{choice([256, 512, 1024])}}))
        model.add(Activation({{choice(['relu', 'sigmoid','softmax'])}}))
        model.add(Dense({{choice([256, 512, 1024])}}))
        model.add(Activation({{choice(['relu', 'sigmoid','softmax'])}}))

        # If we choose 'four', add an additional fourth layer
        if {{choice(['three', 'four'])}} == 'four':
            # We can also choose between complete sets of layers
    #         model.add({{choice([Dropout(0.5), Activation('linear')])}})
            model.add(Dense({{choice([256, 512, 1024])}}))
            model.add(Activation({{choice(['relu', 'sigmoid','softmax'])}}))

        model.add(Dense(output_dim))
        model.add(Activation({{choice(['relu', 'sigmoid','softmax'])}}))

        model.compile(loss={{choice(['binary_crossentropy', 'categorical_crossentropy'])}}, metrics=['accuracy'],
                      optimizer={{choice(['rmsprop', 'adam', 'sgd'])}})

        es = EarlyStopping(monitor='val_loss', mode='min', patience=20)

        result = model.fit(x_train, y_train,
                batch_size={{choice([16, 32])}},
                 epochs=100,
                 validation_split=0.1,
                 callbacks=[es])

        #get the highest validation accuracy of the training epochs
        validation_acc = np.amax(result.history['val_acc']) 
        print('Best validation acc of epoch:', validation_acc)
        return {'loss': -validation_acc, 'status': STATUS_OK, 'model': model}



best_run, best_model = optim.minimize(model=create_model,
                                      data=data,
                                      algo=tpe.suggest,
                                      max_evals=5,
                                      notebook_name='kerasPrice',
                                      trials=Trials())

X_train, Y_train, X_test, Y_test = data()
print("Evalutation of best performing model:")
print(best_model.evaluate(X_test, Y_test))
print("Best performing model chosen hyper-parameters:")
print(best_run)

>>> Imports:
#coding=utf-8

try:
    import pandas as pd
except:
    pass

try:
    import glob
except:
    pass

try:
    import matplotlib.pyplot as plt
except:
    pass

try:
    from sklearn.preprocessing import MinMaxScaler
except:
    pass

try:
    import seaborn as sns
except:
    pass

try:
    import numpy as np
except:
    pass

try:
    import tensorflow as tf
except:
    pass

try:
    from sklearn.model_selection import train_test_split
except:
    pass

try:
    from sklearn import preprocessing
except:
    pass

try:
    from sklearn.metrics import accuracy_score
except:
    pass

try:
    from sklearn.metrics import confusion_matrix
except:
    pass

try:
    from keras.models import Sequential
except:
    pass

try:
    from keras import layers
except:
    pass

try:
    from keras.layers import Activation, Dense, Dropout
except:
    pass

try:
    from keras.callbacks import EarlyStopping
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
   

 - ETA: 2s - loss: 0.6932 - acc: 0.4896                                                                                
                                                                                                                       
 - ETA: 0s - loss: 0.6932 - acc: 0.5122                                                                                
                                                                                                                       
 - ETA: 0s - loss: 0.6931 - acc: 0.5142                                                                                
                                                                                                                       
 - ETA: 0s - loss: 0.6931 - acc: 0.5142                                                                                
                                                                                                                       
 - 1s 708us/step - loss: 0.6931 - acc: 0

                                                                                                                       
 - ETA: 0s - loss: 0.6931 - acc: 0.5058                                                                                
                                                                                                                       
 - ETA: 0s - loss: 0.6931 - acc: 0.5085                                                                                
                                                                                                                       
 - 0s 197us/step - loss: 0.6929 - acc: 0.5152 - val_loss: 0.6949 - val_acc: 0.4419                                     

Epoch 5/100                                                                                                            
  32/1543 [..............................]                                                                             
 - ETA: 0s - loss: 0.6906 - acc: 0.5938

KeyboardInterrupt: 

In [3]:
print("Evalutation of best performing model:")
print(best_model.evaluate(X_test, Y_test))
print("Best performing model chosen hyper-parameters:")
print(best_run)

Evalutation of best performing model:
[7.808515387814061, 0.510204081186632]
Best performing model chosen hyper-parameters:
{'Activation': 0, 'Activation_1': 0, 'Activation_2': 0, 'Activation_3': 1, 'Dense': 1, 'Dense_1': 1, 'Dense_2': 2, 'add': 0, 'batch_size': 1, 'optimizer': 2}


In [4]:
# x_train, y_train, x_test, y_test = data()
# input_dim = x_train.shape[1]  # Number of features
# output_dim = y_train.shape[1]  # Number of features
# print(input_dim)
# model = Sequential()

# model.add(Dense(512, input_dim=input_dim))
# model.add(Activation('relu'))
# model.add(Dense(512))
# model.add(Activation('relu'))
# model.add(Dense(1024))
# model.add(Activation('relu'))
# model.add(Dense(output_dim))
# model.add(Activation('sigmoid'))

# model.compile(loss='categorical_crossentropy', metrics=['accuracy'],
#               optimizer='adam')

# es = EarlyStopping(monitor='val_loss', mode='min', patience=30)

# model_output = model.fit(x_train, y_train,
#                         batch_size=32,
#                         epochs=100,
#                         validation_split=0.1,
#                         callbacks=[es])



In [5]:
# loss, accuracy = model.evaluate(x_train, y_train, verbose=False)
# print("Training Accuracy: {:.4f}".format(accuracy))
# loss, accuracy = model.evaluate(x_test, y_test, verbose=False)
# print("Testing Accuracy:  {:.4f}".format(accuracy))


# print('Training Accuracy : ' , np.mean(model_output.history["acc"]))
# print('Validation Accuracy : ' , np.mean(model_output.history["val_acc"]))



# # Plot training & validation accuracy values
# plt.plot(model_output.history['acc'])
# plt.plot(model_output.history['val_acc'])
# plt.title('Model accuracy')
# plt.ylabel('Accuracy')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Test'], loc='upper left')
# plt.show()

# # Plot training & validation loss values
# plt.plot(model_output.history['loss'])
# plt.plot(model_output.history['val_loss'])
# plt.title('model_output loss')
# plt.ylabel('Loss')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Test'], loc='upper left')
# plt.show()