In [394]:
# https://github.com/DarkKnight1991/Stock-Price-Prediction/blob/master/stock_pred_main.py
# https://towardsdatascience.com/predicting-stock-price-with-lstm-13af86a74944

import numpy as np
import os
import sys
import time
import pandas as pd 
from tqdm._tqdm_notebook import tqdm_notebook
import pickle
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger
from keras import optimizers
# from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import logging

from hyperas import optim
from hyperas.distributions import choice, uniform
from hyperopt import Trials, STATUS_OK, tpe

In [395]:
TIME_STEPS = 10
test_set_size_percentage = 20 
COLUMNS = ['close', 'sma10', 'sma20', 'sma50', 'sma100', 'vwap', 'bbmid', 'bbUpper', 'bbLower', 'cci', 'rsi', '5max', '10max', '20max', '5low', '10low', '20low']

In [396]:
# function to create train, validation, test data given stock data and sequence length
def load_data(stock, seq_len):
    data_raw = stock.as_matrix() # convert to numpy array
    data = []
    
    # create all possible sequences of length seq_len
    for index in range(len(data_raw) - seq_len): 
        data.append(data_raw[index: index + seq_len])
    
    data = np.array(data);
    test_set_size = int(np.round(test_set_size_percentage/100*data.shape[0]));
    train_set_size = data.shape[0] - (test_set_size);
    
    x_train = data[:train_set_size,:-1,:]
    y_train = data[:train_set_size,-1,:]
    
    
    x_test = data[train_set_size:,:-1,:]
    y_test = data[train_set_size:,-1,:]
    
    return [x_train, y_train, x_test, y_test]

In [397]:
def normalize_data(df):
    min_max_scaler = MinMaxScaler()
    for column in df:
        df[column] = min_max_scaler.fit_transform(df[column].values.reshape(-1,1))

    return df

In [408]:
def getData():
    TIME_STEPS = 10
    test_set_size_percentage = 20 
    COLUMNS = ['close', 'sma10', 'sma20', 'sma50', 'sma100', 'vwap', 'bbmid', 'bbUpper', 'bbLower', 'cci', 'rsi', '5max', '10max', '20max', '5low', '10low', '20low']

    df = pd.read_csv("./stockPrice.csv", header=0, na_values='.')

    lol = df.copy()
    lol["1pred"] = (df["close"].shift(-1) - df["close"]) / df["close"]
    lol["3pred"] = (df["close"].shift(-3) - df["close"]) / df["close"]
    lol["5pred"] = (df["close"].shift(-5) - df["close"]) / df["close"]
    lol["10pred"] = (df["close"].shift(-10) - df["close"]) / df["close"]

    lol.loc[df["close"].shift(-1) > df["close"] , '1predB'] = 0
    lol.loc[df["close"].shift(-1) < df["close"] , '1predB'] = 1
    lol.loc[df["close"].shift(-1) > df["close"] , '3predB'] = 0
    lol.loc[df["close"].shift(-1) < df["close"] , '3predB'] = 1
    lol.loc[df["close"].shift(-1) > df["close"] , '5predB'] = 0
    lol.loc[df["close"].shift(-1) < df["close"] , '5predB'] = 1
    lol.loc[df["close"].shift(-1) > df["close"] , '10predB'] = 0
    lol.loc[df["close"].shift(-1) < df["close"] , '10predB'] = 1

    lol["5max"] = lol["close"].rolling(window=5).max()
    lol["10max"] = lol["close"].rolling(window=10).max()
    lol["20max"] = lol["close"].rolling(window=20).max()

    lol["5low"] = lol["close"].rolling(window=5).min()
    lol["10low"] = lol["close"].rolling(window=10).min()
    lol["20low"] = lol["close"].rolling(window=20).min()

    lol["vol%"] =  (df["vol"] - df["vol"].shift(1)) /df["vol"].shift(1)

    lol = lol.dropna()    

    dfC = pd.DataFrame()
    dfC["vol"] = lol["vol%"]
    dfC["sma10"] = lol["sma10"] / lol["close"]
    dfC["sma20"] = lol["sma20"] / lol["close"]
    dfC["sma50"] = lol["sma50"] / lol["close"]
    dfC["sma100"] = lol["sma100"] / lol["close"]
    dfC["vwap"] = lol["vwap"]
    dfC["bbmid"] = lol["bbmid"] / lol["close"]
    dfC["bbUpper"] = lol["bbUpper"] / lol["close"]
    dfC["bbLower"] = lol["bbLower"] / lol["close"]
    dfC["cci"] = lol["cci"] 
    dfC["rsi"] = lol["rsi"] 
    dfC["5max"] = lol["5max"] / lol["close"]
    dfC["10max"] = lol["10max"] / lol["close"]
    dfC["20max"] = lol["20max"] / lol["close"]
    dfC["5low"] = lol["5low"] / lol["close"]
    dfC["10low"] = lol["10low"] / lol["close"]
    dfC["20low"] = lol["20low"] / lol["close"]
    dfC["1pred"] = lol["1pred"]
    dfC["3pred"] = lol["3pred"]
    dfC["5pred"] = lol["5pred"]
    dfC["10pred"] = lol["10pred"]
    dfC["1predB"] = lol["1predB"]
    dfC["3predB"] = lol["3predB"]
    dfC["5predB"] = lol["5predB"]
    dfC["10predB"] = lol["10predB"]
    dfC["close"] = lol["close"]

    dfC.replace([np.inf, -np.inf], np.nan)
    dfC.dropna(inplace=True)

    df = dfC[COLUMNS]
    min_max_scaler = MinMaxScaler()
    for column in df:
        df[column] = min_max_scaler.fit_transform(df[column].values.reshape(-1,1))

    
    data_raw = df.as_matrix() # convert to numpy array
    data = []

    # create all possible sequences of length seq_len
    for index in range(len(data_raw) - TIME_STEPS): 
        data.append(data_raw[index: index + TIME_STEPS])

    data = np.array(data);
    test_set_size = int(np.round(test_set_size_percentage/100*data.shape[0]));
    train_set_size = data.shape[0] - (test_set_size);

    x_train = data[:train_set_size,:-1,:]
    y_train = data[:train_set_size,-1,:]


    x_test = data[train_set_size:,:-1,:]
    y_test = data[train_set_size:,-1,:]
    
    
    
    # Choose only close prices
    # x_train, y_train, x_test, y_test
#     y_train = y_train[:,0]
#     y_test = y_test[:,0]

    return x_train, y_train, x_test, y_test





In [409]:
def create_model(x_train, y_train, x_test, y_test):
    """
    Model providing function:

    Create Keras model with double curly brackets dropped-in as needed.
    Return value has to be a valid python dictionary with two customary keys:
        - loss: Specify a numeric evaluation metric to be minimized
        - status: Just use STATUS_OK and see hyperopt documentation if not feasible
    The last one is optional, though recommended, namely:
        - model: specify the model just created so that we can later use it again.
    """
    input_dim = x_train.shape[2]  # Number of features
    output_dim = y_train.shape[2]  # Number of features
    print(input_dim)
    TIME_STEPS = 10

#     ''''''''''''''''''''''''''''''''''''''''''''''
    model = Sequential()

 # (batch_size, timesteps, data_dim)
#     model.add(LSTM({{choice([256, 512, 1024])}}, batch_input_shape=(20, TIME_STEPS, input_dim),
#                         dropout=0.0, recurrent_dropout=0.0, stateful=True, return_sequences=True,
#                         kernel_initializer='random_uniform'))
#     model.add(Dropout({{uniform(0, 1)}}))
    model.add(LSTM(input_dim = input_dim,
                   {{choice([256, 512, 1024])}},
                        return_sequences=False))    
    model.add(Dropout({{uniform(0, 1)}}))
    model.add(Dense({{choice([256, 512, 1024])}}))
    model.add(Activation({{choice(['relu', 'sigmoid','softmax'])}}))
    model.add(Dense({{choice([256, 512, 1024])}}))
    model.add(Activation({{choice(['relu', 'sigmoid','softmax'])}}))

    model.add(Dense(output_dim))
    model.add(Activation({{choice(['sigmoid','softmax'])}}))
    
    
    optimizer = optimizers.RMSprop(lr=params["lr"])
    # optimizer = optimizers.SGD(lr=0.000001, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    
#     optimizer = optimizers.RMSprop(lr=params["lr"])
    # optimizer = optimizers.SGD(lr=0.000001, decay=1e-6, momentum=0.9, nesterov=True)
#     model.compile(loss='mean_squared_error', optimizer={{choice(['rmsprop', 'adam', 'sgd'])}})

    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,
                       patience=40, min_delta=0.0001)
                                                             
    result = model.fit(x_train, y_train,
             batch_size=1,
             epochs=300,
             callbacks=[es],
             shuffle=False,
             validation_split=0.1)
                                                             
    score, acc = model.evaluate(x_test, y_test, verbose=0)
    print('Test accuracy:', acc)
    return {'loss': -acc, 'status': STATUS_OK, 'model': model}

SyntaxError: positional argument follows keyword argument (<ipython-input-409-5f78b5b3e3d6>, line 26)

In [410]:
# best_run, best_model = optim.minimize(model=create_model,
#                                       data=getData,
#                                       algo=tpe.suggest,
#                                       max_evals=10,
#                                       notebook_name='kpiLstmHyperas',
#                                       trials=Trials())

X_train, Y_train, X_test, Y_test = getData()
print(Y_train)
print(Y_train.shape)

print("Evalutation of best performing model:")
# print(best_model.evaluate(X_test, Y_test))
print("Best performing model chosen hyper-parameters:")
# print(best_run)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

(1952, 17)
Evalutation of best performing model:
Best performing model chosen hyper-parameters:


In [None]:
# def create_model(x_train, y_train, x_test, y_test):
#     """
#     Model providing function:

#     Create Keras model with double curly brackets dropped-in as needed.
#     Return value has to be a valid python dictionary with two customary keys:
#         - loss: Specify a numeric evaluation metric to be minimized
#         - status: Just use STATUS_OK and see hyperopt documentation if not feasible
#     The last one is optional, though recommended, namely:
#         - model: specify the model just created so that we can later use it again.
#     """
#     input_dim = x_train.shape[1]  # Number of features
#     output_dim = y_train.shape[1]  # Number of features
#     print(input_dim)

#     model = Sequential()

#     model.add(Dense(512, input_dim=input_dim))
#     model.add(Activation({{choice(['relu', 'sigmoid','softmax'])}}))
#     model.add(Dense({{choice([256, 512, 1024])}}))
#     model.add(Activation({{choice(['relu', 'sigmoid','softmax'])}}))
#     model.add(Dense({{choice([256, 512, 1024])}}))
#     model.add(Activation({{choice(['relu', 'sigmoid','softmax'])}}))

#     # If we choose 'four', add an additional fourth layer
#     if {{choice(['three', 'four'])}} == 'four':
#         model.add(Dense(100))
#         # We can also choose between complete sets of layers
#         model.add({{choice([Dropout(0.5), Activation('linear')])}})
#         model.add(Activation({{choice(['relu', 'sigmoid','softmax'])}}))
        
#     model.add(Dense(output_dim))
#     model.add(Activation('sigmoid'))

#     model.compile(loss='binary_crossentropy', metrics=['accuracy'],
#                   optimizer={{choice(['rmsprop', 'adam', 'sgd'])}})

#     result = model.fit(x_train, y_train,
#              batch_size={{choice([16, 32, 64])}},
#              epochs=100,
#              validation_data=(x_test, y_test))

#     score, acc = model.evaluate(x_test, y_test, verbose=0)
#     print('Test accuracy:', acc)
#     return {'loss': -acc, 'status': STATUS_OK, 'model': model}



# best_run, best_model = optim.minimize(model=create_model,
#                                       data=data,
#                                       algo=tpe.suggest,
#                                       max_evals=10,
#                                       notebook_name='kpiLstmHyperas',
#                                       trials=Trials())