In [1]:
import neptune.new as neptune
import os
from getpass import getpass
import pandas as pd
import numpy as np
from datetime import date
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, LSTM, Input, Activation, concatenate
import tensorflow as tf
import datetime as dt
import urllib.request, json

In [2]:
tf.random.set_seed(42)
np.random.seed(42)
os.chdir('D:/python/time series LSTM')

In [3]:
api_token = getpass('Enter your neptune api token: ')
project_name = 'your-neptune-account/Stock-Predictions'
project = neptune.init(api_token=api_token, project=project_name)

https://app.neptune.ai/giftabumere247/Stock-Predictions/e/STOC-9


Info (NVML): NVML Shared Library Not Found. GPU usage metrics may not be reported. For more information, see https://docs.neptune.ai/you-should-know/what-can-you-log-and-display#hardware-consumption


Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [6]:
datasource = 'alphavantage'
if datasource == 'alphavantage':
    api_key = '6T7Z3O9GV7VYRUUB'
    ticker = 'AAPL'     #apple

    # get data (json format)
    url_string = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=%s&outputsize=full&apikey=%s"%(ticker, api_key)

    #save data
    file_name = 'stock_market_data_%s.csv'%ticker

    if not os.path.exists(file_name):
        with urllib.request.urlopen(url_string) as url:
            data = json.loads(url.read().decode())
            data = data['Time Series (Daily)']
            df = pd.DataFrame(columns=['Date','Low', 'High', 'Close', 'Open'])
            for k, v in data.items():
                date = dt.datetime.strptime(k, '%Y-%m-%d')
                data_row = [date.date(), float(v['3. low']), float(v['2. high']), float(v['4. close']), float(v['1. open'])]
                df.loc[-1, :] = data_row
                df.index = df.index + 1
        df.to_csv(file_name)
    else:
        print('loading data from local csv file')
        df = pd.read_csv(file_name)
    
stock_prices = df.sort_values('Date')


In [7]:
df.head()

Unnamed: 0,Date,Low,High,Close,Open
5651,2022-04-18,163.57,166.5984,165.07,163.92
5650,2022-04-14,165.04,171.27,165.29,170.62
5649,2022-04-13,166.77,171.04,170.4,167.39
5648,2022-04-12,166.64,169.87,167.66,168.02
5647,2022-04-11,165.5,169.03,165.75,168.71


In [8]:
#helper funtions to calculate RMSE and MAPE
def calc_rmse(y_true, y_pred):
    return np.sqrt(np.mean(np.power((y_true - y_pred), 2)))

def calc_mape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [10]:
#split the time series data
def extract_seqX_outcomeY(data, N, offset):
    """
    Split time-series into training sequence X and outcome value Y
    Args:
        data - dataset 
        N - window size, e.g., 60 for 60 days 
        offset - position to start the split
    """
    dataX, dataY = [], []
    for i in range(offset, len(data)):
        dataX.append(data[i-N:i])
        dataY.append(data[i])
        
    return np.array(dataX), np.array(dataY)

In [16]:
test_ratio = 0.2
train_ratio = 0.8

train_size = int(train_ratio * len(stock_prices))
test_size = int(test_ratio * len(stock_prices))

print('test size: ', test_size)
print('train_size: ', train_size)

train = stock_prices[:train_size][['Date', 'Close']]
test = stock_prices[test_size:][['Date', 'Close']]

test size:  1130
train_size:  4521


In [17]:
#=== simple MA (avg proce in a window)
stock_prices = stock_prices.set_index('Date')

#for medium term training
def plot_stock_trend(var, cur_title, stock_prices= stock_prices, logNeptune=True, logmodelName='Simple MA'):
    ax = stock_prices[['Close', var, '200day']].plot(figsize=(20, 10))
    plt.grid(False)
    plt.title(cur_title)
    plt.axis('tigth')
    plt.ylabel('Stock Price ($)')

    if logNeptune:
        neptune.log_image(f'Plot of Stock Predictions with {logmodelName}', ax.get_figure())

def calculate_perf_metrics(var, logNeptune= True, logmodelName= 'Simple MA'):
    #RMSE
    rmse = calc_rmse(np.array(stock_prices[train_size:]['Close']), np.array(stock_prices[train_size:][var]))
    #MAPE
    mape = calc_mape(np.array(stock_prices[train_size:]['Close']), np.array(stock_prices[train_size:][var]))

    if logNeptune:
        neptune.send_metric('RMSE', rmse)
        neptune.log_metric('RMSE', rmse)    

        neptune.send_metric('MAPE', mape)
        neptune.log_metric('MAPE', mape)
    
    return rmse, mape


In [18]:
window_size = 50
CURRENT_MODEL = 'LSTM'

if CURRENT_MODEL == 'SMA':
    #create an experiment and log the model
    npt_exp = project.create_experiment(name= 'SMA',
                              description= 'stock-prediction-ML',
                              tags = ['stockprediction', 'MA_Simple', 'neptune'])
    window_var = str(window_size) + 'day'

    stock_prices[window_var] = stock_prices['Close'].rolling(window_size).mean()
    
    #include 200 day price for reference
    stock_prices['200day'] = stock_prices['Close'].rolling(200).mean()

    #plot and performance metrics for SMA model
    plot_stock_trend(var= window_var, cur_title='Simple Moving Averages', logmodelName='Simple MA')
    rmse_sma, mape_sma = calculate_perf_metrics(var= window_var, logmodeName= 'Simple MA')

elif CURRENT_MODEL == 'EMA':
    npt_exp = project.create_experiment(name='EMA',
                              description= 'stock-prediction-ML',
                              tags=['stockprediction', 'MA_Exponential', 'neptune'])
    
    #Exponential MA
    window_ema_var = window_size + '_EMA'

    #calculate the N-Day exponential weighted moving average
    stock_prices[window_ema_var] = stock_prices['Close'].ewm(span= window_size, adjust=False).mean()
    stock_prices['200day'] = stock_prices['Close'].rolling(200).mean()

    plot_stock_trend(var= window_ema_var, cur_title='Exponential Moving Averages', logmodelName='Exp MA')
    rmse_ema, mape_ema = calculate_perf_metrics(var= window_ema_var, logmodeName= 'Exp MA')

elif CURRENT_MODEL == 'LTSM':
    layer_units, optimizer = 50, 'adam'
    cur_epochs = 15
    cur_batch_size = 20

    cur_LSTM_pars = {'units': layer_units, 
                     'optimizers': optimizer,
                     'batch_size': cur_batch_size,
                     'epochs': cur_epochs
                     }

    npt_exp = project.create_experiment(name='LSTM',
                              params= cur_LSTM_pars,
                              description= 'stock-predictions-ML',
                              tags= ['stockprediction', 'LSTM', 'neptune'])
    
    #I'll use the past N stock prices for training to predict the N+1th closing price

    #scale
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(stock_prices[['Close']])
    scaled_data_train = scaled_data[:train.shape[0]]

    X_train, y_train = extract_seqX_outcomeY(scaled_data_train, window_size, window_size)

    #Building the LSTM model and log model summary to nuptune
    def Run_LSTM(X_train, layer_units=50, logNeptune=True, NeptuneProject=None):
        inp = Input(shape=(X_train.shape[1], 1))

        x = LSTM(units= layer_units, return_sequences= True)(inp)
        x = LSTM(units= layer_units)(x)
        out = Dense(1, activation= 'linear')(x)
        model = Model(inp, out)

        #compile the LSTM neural net
        model.compile(loss= 'mean_squared_error', optimizer= 'adam')

        #logging to neptune
        if logNeptune:
            model.summary(print_fn= lambda x: NeptuneProject.log_text('Model Summary ', x))
        
        return model

    model = Run_LSTM(X_train, layer_units=layer_units, logNeptune=True, NeptuneProject=npt_exp)

    history = model.fit(X_train, y_train, epochs= cur_epochs, batch_size= cur_batch_size, verbose= 1, validation_split= 0.1, shuffle= True)

    def preprocess_testdata(data=stock_prices, scaler=scaler, window_size=window_size, test=test):
        raw = data['Close'][len(data) - len(test) - window_size:].values
        raw = raw.reshape(-1, 1)
        raw = scaler.transform(raw)

        X_test = []
        for i in range(window_size, raw.shape[0]):
            X_test.append(raw[i - window_size:i, 0])
        
        X_test = np.array(X_test)
        X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
        return X_test

    X_test = preprocess_testdata()

    predicted_price_ = model.predict(X_test)
    predicted_price = scaler.inverse_transform(predicted_price_)

    #plot predicted price vs actual price
    test['Predictions_lstm'] = predicted_price

    # Evaluate performance
    rmse_lstm = calc_rmse(np.array(test['Close']), np.array(test['Predictions_lstm']))
    mape_lstm = calc_mape(np.array(test['Close']), np.array(test['Predictions_lstm']))
    
    npt_exp.send_metric('RMSE', rmse_lstm)
    npt_exp.log_metric('RMSE', rmse_lstm)

    npt_exp.send_metric('MAPE (%)', mape_lstm)
    npt_exp.log_metric('MAPE (%)', mape_lstm)

    #plotting of predictions and true trends to log to neptune
    def plot_sock_trend_lstm(train, test, logNeptune=True):
        fig = plt.figure(figsize = (20, 10))
        plt.plot(train['Date'], train['Close'], label= 'Train Closing Price')
        plt.plot(test['Date'], test['Close'], label= 'Test Closing Price')
        plt.plot(test['Date'], test['Predictions_lstm'], label= 'Predicted Closing Price')
        plt.title('LSTM Model')
        plt.xlabel('Date')
        plt.ylabel('Stock Price ($)')
        plt.legend(loc= 'upper left')
        plt.show()
        if logNeptune:
            npt_exp.log_image('Plot of Stock Predictions with LSTM', fig)

    plot_stock_trend(train, test)
