In [1]:
# !pip install pandas_datareader keras seaborn
# !conda install -y -c conda-forge fbprophet
# !pip install pydot graphviz
import boto3
import base64
from botocore.exceptions import ClientError
from IPython.display import display
import pandas_datareader
import pandas as pd
import numpy as np
from keras import Sequential
from keras.layers import Dense, LSTM, InputLayer, Attention
import seaborn as sns
import matplotlib.pyplot as plt
from keras.utils import plot_model
from keras.callbacks import EarlyStopping

In [3]:
tickers = ['AAPL']
metric = 'low'
pc_metric = f'{metric}_percent_change'
norm_metric = f'{pc_metric}_norm'
lookback=100
def get_secret():
    secret_name = "alpha_vantage"
    region_name = "us-east-2"
    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )
    try:
        get_secret_value_response = client.get_secret_value(SecretId=secret_name)
    except ClientError as e:
        display(e)
    else:
        # Decrypts secret using the associated KMS CMK.
        # Depending on whether the secret is a string or binary, one of these fields will be populated.
        if 'SecretString' in get_secret_value_response:
            secret = get_secret_value_response['SecretString']
        else:
            secret = base64.b64decode(get_secret_value_response['SecretBinary'])
    return secret   
def format_dates(daily_stocks_data):
    df = daily_stocks_data.copy() 
    df['date']=df.index
    df.reset_index(inplace=True, drop=True)
    return df
def add_percent_change(daily_stocks_data, metric):
    percents = list()
    for index, row in daily_stocks_data.iterrows():
        old = row[metric]
        try:
            new = daily_stocks_data.iloc[index + 1][metric]
        except Exception as e:
            percents.append(np.nan) ## no next value, so this is undefined
            continue
        percents.append((new-old)/new)
    cp_df = daily_stocks_data.copy()
    cp_df[f'{metric}_percent_change']=percents
    return cp_df
def add_norm(df, label):
    arr = np.array([x*1000 for x in df[label].to_numpy()]).reshape(-1, 1)
#     norm = normalize(arr, norm='l1')
    norm = arr
    new_df = df.copy()
    new_df[f'{label}_norm'] = norm
    return new_df
def to_ts_df(daily_stocks_data, lookback, metric):
    ## column names
    columns = list()
    for i in range(lookback):
        columns.append(f'{metric}_{i}')
    columns.append(f'{metric}_target')
    df = pd.DataFrame(columns=columns)
    ## columns
    data = daily_stocks_data[metric].to_numpy()
    for index, col in enumerate(df.columns):
        df[col] = data[index:len(data)-lookback+index]
    ## dates index
    dates = daily_stocks_data.date.to_numpy()[:-lookback]
    df.insert(0, 'date', dates)
    return df
def to_ts(ts_df):
    data = list()
    targets = list()
    for index, row in ts_df.iloc[:,1:].iterrows():
        rnp = row.to_numpy()
        data.append([[x] for x in rnp[:-1]])
        targets.append(rnp[-1])
    data = np.array(data)
    targets = np.array(targets)
    return data, targets

In [4]:
ALPHA_API_KEY = get_secret()

In [5]:
daily_stocks_data_raw = pandas_datareader.av.time_series.AVTimeSeriesReader(symbols=tickers, api_key=ALPHA_API_KEY, function='TIME_SERIES_DAILY').read()
daily_stocks_data = format_dates(daily_stocks_data_raw) 
daily_stocks_data = add_percent_change(daily_stocks_data, metric)
daily_stocks_data[daily_stocks_data[pc_metric].isnull()] = 0
daily_stocks_data = add_norm(daily_stocks_data, pc_metric)
ts_df = to_ts_df(daily_stocks_data, lookback, pc_metric)
data, targets = to_ts(ts_df)
display(daily_stocks_data)
display(ts_df)

Unnamed: 0,open,high,low,close,volume,date,low_percent_change,low_percent_change_norm
0,23.13,23.7500,21.130,21.78,13115200,2001-03-26,0.035160,35.159817
1,21.94,23.0500,21.900,22.87,9711100,2001-03-27,-0.018605,-18.604651
2,22.08,22.5000,21.500,22.17,10440400,2001-03-28,0.000000,0.000000
3,21.77,23.4500,21.500,22.53,10947600,2001-03-29,-0.007498,-7.497657
4,22.55,22.7200,21.340,22.07,7149100,2001-03-30,0.002804,2.803738
...,...,...,...,...,...,...,...,...
5022,120.40,121.1700,119.160,121.03,88105050,2021-03-12,0.010463,10.463378
5023,121.41,124.0000,120.420,123.99,92590555,2021-03-15,0.034439,34.438520
5024,125.70,127.2200,124.715,125.57,115227936,2021-03-16,-0.019446,-19.446443
5025,124.05,125.8599,122.336,124.76,111932636,2021-03-17,-0.016755,-16.755319


Unnamed: 0,date,low_percent_change_0,low_percent_change_1,low_percent_change_2,low_percent_change_3,low_percent_change_4,low_percent_change_5,low_percent_change_6,low_percent_change_7,low_percent_change_8,...,low_percent_change_91,low_percent_change_92,low_percent_change_93,low_percent_change_94,low_percent_change_95,low_percent_change_96,low_percent_change_97,low_percent_change_98,low_percent_change_99,low_percent_change_target
0,2001-03-26,0.035160,-0.018605,0.000000,-0.007498,0.002804,-0.063090,-0.073600,0.062500,-0.005025,...,0.000000,-0.001054,-0.023732,0.009615,-0.006993,0.009062,-0.004821,-0.025824,-0.012799,0.001112
1,2001-03-27,-0.018605,0.000000,-0.007498,0.002804,-0.063090,-0.073600,0.062500,-0.005025,0.007976,...,-0.001054,-0.023732,0.009615,-0.006993,0.009062,-0.004821,-0.025824,-0.012799,0.001112,-0.010107
2,2001-03-28,0.000000,-0.007498,0.002804,-0.063090,-0.073600,0.062500,-0.005025,0.007976,0.034649,...,-0.023732,0.009615,-0.006993,0.009062,-0.004821,-0.025824,-0.012799,0.001112,-0.010107,-0.006215
3,2001-03-29,-0.007498,0.002804,-0.063090,-0.073600,0.062500,-0.005025,0.007976,0.034649,0.023496,...,0.009615,-0.006993,0.009062,-0.004821,-0.025824,-0.012799,0.001112,-0.010107,-0.006215,-0.005111
4,2001-03-30,0.002804,-0.063090,-0.073600,0.062500,-0.005025,0.007976,0.034649,0.023496,-0.006147,...,-0.006993,0.009062,-0.004821,-0.025824,-0.012799,0.001112,-0.010107,-0.006215,-0.005111,-0.001706
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4922,2020-10-16,-0.027235,-0.000259,0.007042,-0.016232,-0.002713,-0.012403,0.014492,-0.030962,0.009804,...,0.017759,-0.026018,-0.027146,-0.008931,-0.011703,0.021719,0.005525,0.014927,-0.017623,0.010463
4923,2020-10-19,-0.000259,0.007042,-0.016232,-0.002713,-0.012403,0.014492,-0.030962,0.009804,-0.041589,...,-0.026018,-0.027146,-0.008931,-0.011703,0.021719,0.005525,0.014927,-0.017623,0.010463,0.034439
4924,2020-10-20,0.007042,-0.016232,-0.002713,-0.012403,0.014492,-0.030962,0.009804,-0.041589,-0.003727,...,-0.027146,-0.008931,-0.011703,0.021719,0.005525,0.014927,-0.017623,0.010463,0.034439,-0.019446
4925,2020-10-21,-0.016232,-0.002713,-0.012403,0.014492,-0.030962,0.009804,-0.041589,-0.003727,0.012968,...,-0.008931,-0.011703,0.021719,0.005525,0.014927,-0.017623,0.010463,0.034439,-0.019446,-0.016755


In [9]:
## currently testing to set up mlflow and training jobs. 
def deep_lstm():
    model = Sequential()
    model.add(InputLayer(input_shape=(None,1)))
#     model.add(LSTM(12, return_sequences=True))
#     model.add(LSTM(12, return_sequences=True))
#     model.add(LSTM(6, return_sequences=True))
#     model.add(LSTM(6, return_sequences=True))
#     model.add(LSTM(2, return_sequences=True))
#     model.add(LSTM(1))
    model.add(Dense(1))
    model.compile(loss='mae', metrics=['mse','mape'])
    return model

In [10]:
model = deep_lstm()
model.summary()
# plot_model(model)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, None, 1)           2         
Total params: 2
Trainable params: 2
Non-trainable params: 0
_________________________________________________________________


In [12]:
early = EarlyStopping(patience=2, restore_best_weights=True)
model.fit(x=data, y=targets, batch_size=36, validation_split=0.2, epochs=1, callbacks=[early])



<tensorflow.python.keras.callbacks.History at 0x7ff4099dd3d0>