In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
import pandas as pd
import numpy as np
import os
import time
import random
from collections import deque
from sklearn import preprocessing
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

In [55]:
DAYS = 90
STOCK_USED = 'PG'
FUTURE_PREDICT = 15
EPOCHS = 10
BATCH = 30
TRAINING_TEST_SPLIT = 0.85

In [56]:
def classify(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0
    
def preprocess(df):
    df = df.drop('future', 1)
    
    
    for col in df.columns:
        if col != 'target':
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)
            
    df.dropna(inplace=True)
    
    stock_sequences = []
    previous_days = deque(maxlen=DAYS)
    
    for i in df.values:
        previous_days.append([x for x in i[:-1]])
        
        if len(previous_days)==DAYS:
            stock_sequences.append([np.array(previous_days), i[-1]])
    
    random.shuffle(stock_sequences)
    
    buy = []
    sell = []
    
    for sequence, target in stock_sequences:
        if target == 1:
            buy.append([sequence, target])
        else:
            sell.append([sequence, target])
            
    limit = min(len(buy), len(sell))
    
    buy = buy[:limit]
    sell = sell[:limit]
    
    stock_sequences = buy+sell
    random.shuffle(stock_sequences)
    
    X = []
    y = []
    
    
    
    for sequence, target in stock_sequences:
        X.append(sequence)
        y.append(target)
        
        
    return np.array(X), np.array(y)

def preprocess_data(df):
    df = df.drop('future', 1)
    
    for col in df.columns:
        if col != 'target':
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)
    
    df.dropna(inplace=True)
    
    stock_sequences = []
    previous_days = deque(maxlen=DAYS)
    
    for i in df.values:
        previous_days.append([x for x in i[:-1]])
        
        if len(previous_days)==DAYS:
            stock_sequences.append([np.array(previous_days), i[-1]])
            
    random.shuffle(stock_sequences)
    
    buy = []
    sell = []
    
    for sequence, target in stock_sequences:
        if target == 1:
            buy.append([sequence, target])
        else:
            sell.append([sequence, target])
            
    limit = min(len(buy), len(sell))
        
    buy = buy[:limit]
    sell = sell[:limit]
    
    stock_sequences = buy+sell
    random.shuffle(stock_sequences)
    
    X = []
    y = []
    
    for sequence, target in stock_sequences:
        X.append(sequence)
        y.append(target)
        
    validation_split = int(TRAINING_TEST_SPLIT*len(stock_sequences))
    
    validation_x = X[validation_split:]
    validation_y = y[validation_split:]
    
    X = X[:validation_split]
    y = y[:validation_split]
    
    return np.array(X), np.array(y), np.array(validation_x), np.array(validation_y)

In [57]:
main_df = pd.DataFrame()

#stocks = ['NVDA', 'INTC', 'QCOM', 'GSPC']
stocks = ['PG']

for stock in stocks:
    df = pd.read_csv(f"stock-data/{stock}.csv")#, names=['date','open','high','low','close','adj_close','volume'])
    
    df.rename(columns={'Close': f"{stock}_close", 'Volume': f"{stock}_volume"}, inplace=True)
    
    df.set_index('Date', inplace=True)
    df = df[[f"{stock}_close", f"{stock}_volume"]]
    
    if len(main_df)==0:
        main_df = df
    else:
        main_df = main_df.join(df)

main_df['future'] = main_df[f"{STOCK_USED}_close"].shift(-FUTURE_PREDICT)

main_df['target'] = list(map(classify, main_df[f"{STOCK_USED}_close"], main_df['future']))

dates = main_df.index.values
validation_split = dates[int(0.85*len(dates))]

#validation_df = main_df[(main_df.index >= validation_split)]
#main_df = main_df[(main_df.index < validation_split)]

#x_train, y_train = preprocess(main_df)
#validation_x, validation_y = preprocess(validation_df)

x_train, y_train, validation_x, validation_y = preprocess_data(main_df)

#print(main_df.head()) 

In [58]:
model = Sequential()
model.add(LSTM(128, input_shape=(x_train.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))


opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

# Compile model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
)

tensorboard = TensorBoard(log_dir="logs/{}".format(STOCK_USED))


# Train model
history = model.fit(
    x_train, y_train,
    batch_size=BATCH,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y)
    #callbacks=[tensorboard, checkpoint],
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [53]:
print(len(x_train))

1798


In [54]:
print(len(validation_x))

318
