In [75]:
import requests
import sys
import random
import tensorflow as tf
from datetime import datetime as dt
from datetime import timedelta
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from sklearn import preprocessing
from yahoo_fin import stock_info as si
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from alpha_vantage.timeseries import TimeSeries
from collections import deque

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import os
import random

In [76]:
ticker = str(sys.argv[1])
lines = open('keys').read().splitlines()
key = random.choice(lines)
ts = TimeSeries(key=key)
data, meta_data = ts.get_intraday('AAPL');
print(data)

{'2020-09-21 20:00:00': {'1. open': '110.7000', '2. high': '111.0000', '3. low': '110.6600', '4. close': '110.9500', '5. volume': '167174'}, '2020-09-21 19:45:00': {'1. open': '110.4900', '2. high': '110.7500', '3. low': '110.4000', '4. close': '110.7300', '5. volume': '116046'}, '2020-09-21 19:30:00': {'1. open': '110.0300', '2. high': '110.4700', '3. low': '110.0300', '4. close': '110.4700', '5. volume': '105117'}, '2020-09-21 19:15:00': {'1. open': '110.1000', '2. high': '110.1000', '3. low': '109.9400', '4. close': '110.0000', '5. volume': '35917'}, '2020-09-21 19:00:00': {'1. open': '110.0200', '2. high': '110.1500', '3. low': '110.0100', '4. close': '110.1000', '5. volume': '82992'}, '2020-09-21 18:45:00': {'1. open': '110.1200', '2. high': '110.1500', '3. low': '110.0000', '4. close': '110.0000', '5. volume': '59711'}, '2020-09-21 18:30:00': {'1. open': '110.2500', '2. high': '110.2800', '3. low': '110.0000', '4. close': '110.1500', '5. volume': '67402'}, '2020-09-21 18:15:00': 

In [77]:
def load_data(ticker, n_steps = 50, scale = True, shuffle = True, lookup_step=1,
             test_size =0.2, feature_columns = ['adjclose', 'volume', 'open', 'high', 'low']):
    if isinstance(ticker, str):
        df = si.get_data(ticker)
    elif isinstance(ticker, pd.DataFrame):
        df = ticker
    else:
        raise TypeError("ticker can be either str or a `pd.DataFrame` instances")
    result = {}
    result ['df'] = df.copy()
    
    for col in feature_columns:
        assert col in df.columns, f"'{col}' does not exist in the data frame."
    if scale:
        column_scaler = {}
        
        for column in feature_columns:
            scaler = preprocessing.MinMaxScaler()
            df[column] = scaler.fit_transform(np.expand_dims(df[column].values, axis=1))
            column_scaler[column] = scaler
        
        results["column_scaler"] = column_scaler
    df['future'] = df ['adjclose'].shift(-lookup-step)
    last_sequence = np.array(df[feature_columns].tail(lookup_step))
    df.dropna(inplace=True)
    sequence_data = []
    sequence = deque(maxlen=n_steps)
    for entry, target in zip(df[feature_columns].values, df['future'].values):
        sequences.append(entry)
        if len(sequences) == n_steps:
            sequence_data.append([np.array(sequences), target])
    last_sequence = list(sequences) + list(last_sequence)
    last_sequence = np.array(last_sequence)
    results['last_sequence'] = last_sequence
    X, y = [], []
    for seq, target in sequence_data:
        X.append(seq)
        y.append(target)
        
    X = np.array(X)
    y = np.array(y)
    
    X = X.reshape((X.shape[0], X.shape[2], X.shape[1]))
    
    result["X_train"], result["X_test"], result["y_train"], result["y_test"] = train_test_split(X,y, test_size = test_size, shuffle=shuffle)
    
    return result

In [78]:
def create_model(sequence_length, units=256, cells=LSTM, n_layers =2, dropout=0.3,
                loss = "mean_absolute_error", optimizer = "rmsprop", bidirectional= False):
    model = Sequential()
    for i in range(n_layers):
        if i == 0:
            #this is the first layer
            if bidirectional:
                model.add(Bidirectional(cell(units, return_sequences=True), input_shape = (None, sequence_length)))
            else:
                model.add(cell(units, return_sequence=True, input_shape=(None, sequence_length)))
        elif i == n_layers -1:
            #the last layer
            if bidirectional:
                model.add(Bidirectional(cell(units, return_sequences=False)))
            else:
                model.add(cell(units, return_sequences = False))
        else:
            #The hidden layers
            if bidirectional:
                model.add(Bidirectional(cell(units, return_sequences=True)))
            else:
                model.add(Bidirectional(cell(units, return_sequences=True)))
        #adds a dropout after each layer
        model.add(Dropout(dropout))
        model.add(Dense(1, activation,"linear"))
        model.compile(loss=loss, metrics=["mean_absolute_error"], optimizer=optimizer)
        return model

In [79]:
N_STEPS = 50
LOOKUP_STEP = 1
TEST_SIZE = 0.2
FEATURE_COLUMN = ["adjclose", "volume", "open", "high", "low"]
today = dt.now()
dt = today
date_now = dt.strftime("%Y-%m-%d")
N_LAYERS = 2
CELL = LSTM
UNITS = 256
DROPOUT = 0.4
BIDIRECTIONAL = False
LOSS = "huber_loss"
OPTIMIZER = "adam"
BATCH_SIZE = 64
EPOCHS = 5
ticker = "GOOGL"
ticker_data_filename = os.path.join("data", f"{ticker}_{date_now}.csv")
model_name = f"{date_now}_{ticker}-{LOSS}-{OPTIMIZER}-{CELL.__name__}-seq-{N_STEPS}-step-{LOOKUP_STEP}-layers-{N_LAYERS}-units-{UNITS}"
if BIDIRECTIONAL:
    model_name += "-b"

In [80]:
if not os.path.isdir("results"):
    os.mkdir("results")
if not os.path.isdir("logs"):
    os.mkdir("logs")
if not os.path.isdir("data"):
    os.mkdir("data")

In [81]:
data = load_data(ticker, N_STEPS, lookup_step = LOOKUP_STEP, test_size=TEST_SIZE, feature_columns=FEATURE_COLUMN)
data["df"].to_csv(ticker_data_filename)
model = create_model(N_STEPS, loss = LOSS, units=UNITS, cell = CELL, n_layers=N_LAYERS, dropout=DROPOUT, optimizer = OPTIMIZER,
                    bidirectional = BIDIRECTIONAL)

checkpointer = ModelCheckpoint(os.path.join("results", model_name + ".h5"), save_weights_only = True, save_best_only= True, verbose = 1)
tensorboard = TensorBoard(log_dir = os.path.join("logs", model_name))
history = model.fit(data["X_train"], data["y_train"], batch_size= BATCH_SIZE, epochs = EPOCHS,
                   validation_data=(data["X_test"], data["y_test"]), callbacks=[checkpoint, tensorboard], verbose = 1)
model.save(os.path.join("results", model_name) + ".h5")

NameError: name 'results' is not defined