In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# import helper functions
from helper import create_df_per_stock
from helper import run_for_stocks
from helper import get_best_configuration
from helper import final_df_cleaning
from helper import create_return_arrays

# Load Dataset

In [2]:
data = pd.read_csv("../data/normalized_data.csv") # Load Dataset from File generated previously. 
data = final_df_cleaning(data) # Drop unnecessary columns and arrange data by ticker and dates
tickers = data['tic'].unique() # Create a List of the Unique Stock Tickers
data.head()

Unnamed: 0,index,datadate,tic,cshtrm,prccm,prchm,prclm,trt1m,CAPEI,evm,...,high_low_ratio,RSI_14,MA_3,price_to_MA_3,return_momentum_6m,MA_6,return_momentum_9m,MA_9,return_momentum_12m,MA_12
0,22,2011-03-31,AMZN,0.057669,0.050441,0.047726,0.04827,0.449252,0.657229,0.104348,...,0.02095,0.599129,0.050675,0.673276,0.278596,0.050655,0.346955,0.046881,0.20368,0.044979
1,64,2011-04-30,AMZN,0.055855,0.054863,0.05203,0.052741,0.479247,0.657454,0.104348,...,0.020388,0.647066,0.053232,0.698572,0.291211,0.052154,0.350194,0.049443,0.222852,0.046454
2,106,2011-05-31,AMZN,0.051408,0.055111,0.054307,0.057432,0.4272,0.657571,0.104227,...,0.010842,0.669937,0.055518,0.671418,0.27017,0.053198,0.327768,0.051806,0.249207,0.048244
3,148,2011-06-30,AMZN,0.046094,0.05731,0.05427,0.054622,0.449368,0.6577,0.104227,...,0.022005,0.66899,0.057899,0.669438,0.275018,0.054399,0.255807,0.053366,0.3066,0.050637
4,190,2011-07-31,AMZN,0.044648,0.062395,0.059825,0.061283,0.479955,0.657969,0.104227,...,0.017924,0.721217,0.060509,0.699107,0.333047,0.056992,0.267572,0.05525,0.309609,0.053266


In [3]:
# Dictionary to hold dataframe for each stock
df_per_stock = create_df_per_stock(tickers=tickers, dataframe=data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['trt1m'] = df['trt1m'].shift(-1) # Shift Target Return up


# Run for All Stocks

In [4]:
tickers

array(['AMZN', 'AVY', 'AXON', 'BBWI', 'BKNG', 'BLDR', 'CBRE', 'CDNS',
       'CE', 'CF', 'CHD', 'CMCSA', 'CMS', 'CNC', 'COST', 'DECK', 'DLTR',
       'EA', 'EQIX', 'FI', 'FICO', 'GOOGL', 'INCY', 'LULU', 'MA', 'MOH',
       'NDAQ', 'NI', 'NVDA', 'ODFL', 'OKE', 'PKG', 'SBAC', 'STLD', 'TDG',
       'TGT', 'TYL', 'UNH', 'URI', 'V', 'VLO', 'WST'], dtype=object)

In [5]:
# Test with small list
param_grid = {
    'lstm_units': [100, 150],
    'dense_units1': [50, 100, 150],
    'dense_units2': [50, 75],
    'batch_size': [32, 64],
    'optimizer': ['adam']
}

In [6]:
models = run_for_stocks(tickers, df_per_stock, param_grid)

Processing stock: AMZN
Running hyperparameter tuning for AMZN.
Current Parameters: {'lstm_units': 100, 'dense_units1': 50, 'dense_units2': 50, 'batch_size': 32, 'optimizer': 'adam'}


KeyboardInterrupt: 

In [None]:
# Get Best Configuration for each stock via the CSV files
best_configurations = get_best_configuration(tickers=tickers)
best_configurations_df = pd.DataFrame(best_configurations).T.reset_index()
best_configurations_df.columns = ['ticker', 'lstm_units', 'dense_units1', 'dense_units2', 'batch_size', 'optimizer', 'avg_val_mse']
best_configurations_df
best_configurations_df.to_csv("../results/best_configs.csv")

In [None]:
# Create a Numpy Array of Returns
normalized_12_m_default_lstm_X = create_return_arrays(tickers=tickers, folder="../normalized_returns_default_lstm")

# Use below Data for Presentation Slides

In [None]:
# Lets first try to get the model to run for 1 stock
amzn = data[data['tic'] == 'AMZN'].iloc[:, 1:]
y = amzn['trt1m'].values
amzn.drop(columns=['trt1m'], inplace=True)
X = amzn.iloc[:, 2:].values
print(f"The Shape of X Features is: {X.shape}")
print(f"The Shape of y Features is: {y.shape}")

In [None]:
sequence_length = 12 # Feed past 12 month returns into sequence for LSTM
# Converting Features into 3D space for LSTM to add a time component
X_features, y_target = [], []
for i in range(X.shape[0] - sequence_length):
    X_features.append(X[i:i+sequence_length])
    y_target.append(y[i + sequence_length])
X_features = np.array(X_features)
y_target = np.array(y_target)

print(f"The Shape of X Features is: {X_features.shape}")
print(f"The Shape of y Features is: {y_target.shape}")

In [None]:
train_size = int(len(amzn) * 0.8)
print(f"The size of my training set will be : {train_size} and the test set will be : {int(len(amzn)) - train_size}")
X_train, y_train = X_features[:train_size], y_target[:train_size]
y_train.reshape(-1, 1)
print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of y_train: {y_train.shape}")
X_test, y_test = X_features[train_size:], y_target[train_size:]
y_target.reshape(-1, 1)
print(f"Shape of X_test: {X_test.shape}")
print(f"Shape of y_test: {y_test.shape}")

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df_per_stock['AMZN']['trt1m'], label='AMZN Standardized Returns')
plt.plot(df_per_stock['AMZN']['trt1m'][-19:], label='Returns I want to predict')
plt.title(f'Amazon Standardized Returns')
plt.xlabel('Days')
plt.ylabel('Returns')
plt.legend()
plt.show()

In [None]:
models['AMZN'].plot_performance()