In [63]:
# General packages
import pandas as pd
import numpy as np
import datetime as dt

# Packages related to API
from pandas_datareader import data as pdr
import yfinance as yf
import scipy as sc
yf.pdr_override()

# Packages related to machine learning and for nueral networs
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow.keras.backend as K
from keras.models import load_model
from sklearn.preprocessing import StandardScaler

# fix random seed for same reproducibility as my results due to stochastic nature of start point
K.clear_session()
tf.keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [53]:
stocks = ['TSLA','XOM']
end1 = dt.date.today()
start1 = end1 - dt.timedelta(days=20*365)
start = start1.strftime('%Y-%m-%d')
end = end1.strftime('%Y-%m-%d')

stockdata = pdr.get_data_yahoo(stocks, start=start, end=end)
stockdata = stockdata['Adj Close']

returns = stockdata.pct_change().dropna(how='all').dropna(axis=1)

[*********************100%***********************]  2 of 2 completed


In [90]:
def prep_and_train(stockdata):
    temp_table = pd.DataFrame()
    temp_table['price'] = stockdata[['TSLA']]
    temp_table['return'] = temp_table['price'].pct_change()
    temp_table['monthly_std'] = temp_table.groupby(temp_table.index.month)['return'].transform(lambda x: x.std())
    temp_table['4sma_pct_price'] = (temp_table['price'].rolling(window=4).mean()/temp_table['price']) - 1
    temp_table['100sma_pct_price'] = (temp_table['price'].rolling(window=100).mean()/temp_table['price']) - 1
    temp_table['bolling_top_pct'] = ((temp_table['price'].rolling(window=22).mean()+temp_table['price'].rolling(window=22).std()*2)/temp_table['price']) - 1
    temp_table['bolling_bot_pct'] = ((temp_table['price'].rolling(window=22).mean()-temp_table['price'].rolling(window=22).std()*2)/temp_table['price']) - 1

    # Convert datetimes to dates
    dates = temp_table['price'].index.tolist()
    def makethisdate(date):
        new = date.date()
        new = str(new)
        new = pd.to_datetime(new)
        return new
    dates = list(map(makethisdate, dates))
    temp_table.index = dates
    
    # filter dates for only end of month
    today = dt.date.today()
    month_filter = pd.read_csv(f"csv_files/trade_dates_{today}.csv")
    month_filter = month_filter[month_filter['end_of_month']==True][['dates']]
    temp_table = temp_table[temp_table.index.isin(month_filter["dates"])]
    
# Checker
    display(temp_table.head())
    
    # Create y predictor value
    temp_table['monthly_return'] = temp_table['price'].pct_change()
    temp_table = temp_table.dropna()
    temp_table['y'] = temp_table['monthly_return'].shift(-1)
    
    # Set up Test & Train Data
    end1 = dt.date.today() - dt.timedelta(days=30)
    start1 = end1 - dt.timedelta(days=20*365)
    start = start1.strftime('%Y-%m-%d')
    end = end1.strftime('%Y-%m-%d')
    
    # Break out test & train by dates
    train = temp_table[start:end]
    test = temp_table[end:]
    
    # create x and y tables
    X_train = train.drop(columns=['y','price'])
    y_train = train['y']
    X_test = test.drop(columns=['y','price'])
    y_test = test['y']
    
    # Create a data scaler
    X_train.std(ddof=1)
    scaler = StandardScaler()
    X_scaler = scaler.fit(X_train)
    X_scaler = X_scaler.transform
    
    # Scale train and test
    X_train_scaled = X_scaler(X_train)
    X_test_scaled = X_scaler(X_test)
    
    def nn_reg_model (X_train_scaled, y_train):
        # Set Training epoch end limits, save model with the best fit during epoch testing.
        call = [tf.keras.callbacks.EarlyStopping(monitor='loss', 
                                                      mode='min', 
                                                      patience=35, 
                                                      verbose=1,
                                                     ),
                     tf.keras.callbacks.ModelCheckpoint(filepath='best_nn_model.h5', 
                                                        monitor='loss', 
                                                        mode='min',
                                                        save_best_only=True, 
                                                        initil_value_threshold = .04
                                                        )
                    ]
        # create a loop to ensure that the fit of the machine learning model meets certain requirements
        i=10
        b=10
        while (i >= 1.39) or (b >= .04):
            # fix random seed for same reproducibility as my results due to stochastic nature of start point
            K.clear_session()
            tf.keras.backend.clear_session()
            np.random.seed(42)
            tf.random.set_seed(42)

            # Create nueral network
            nn = Sequential()

            # add input layer
            nn.add(Dense(units=100, input_dim=7, activation="relu"))
            # add first hidden layer
            nn.add(Dense(units=150, activation="relu"))
            # add third hidden layer
            nn.add(Dense(units=5, activation="relu"))
            # Output layer
            nn.add(Dense(units=1, activation="linear"))
            # Compile the model
            nn.compile(loss="mean_squared_error", optimizer='adam', metrics=['mean_squared_error'])
            try:
                # Fit the model
                nn_model = nn.fit(X_train_scaled, y_train, validation_split = 0.2, epochs=300, batch_size=12, callbacks = call, verbose=1)
                b = nn_model.history['loss'][-1]
                i = nn_model.history['val_loss'][-1]
            except:
                # Fit the model
                nn_model = nn.fit(X_train_scaled, y_train, validation_split = 0.2, epochs=300, batch_size=12, callbacks = call, verbose=1)
                b = nn_model.history['loss'][-1]
                i = nn_model.history['val_loss'][-1]

        # load a saved model
        saved_nn_model = load_model('best_nn_model.h5')

        return saved_nn_model, nn_model
    
    saved_nn_model, nn_model = nn_reg_model(X_train_scaled, y_train)
        
    return saved_nn_model, nn_model, X_test_scaled
    
     # prediction1 = saved_nn_model.predict(X_test_scaled)

In [91]:
saved_nn_model, nn_model, X_test_scaled = prep_and_train(stockdata)

prediction1 = saved_nn_model.predict(X_test_scaled)
print(prediction1)

Unnamed: 0,price,return,monthly_std,4sma_pct_price,100sma_pct_price,bolling_top_pct,bolling_bot_pct
2012-05-31,1.966667,-0.029924,0.038517,0.028898,0.090657,0.159281,-0.072933
2012-06-29,2.086,-0.003821,0.030595,0.008869,0.043838,0.093237,-0.139112
2012-07-31,1.828,0.00256,0.036543,0.02489,0.172746,0.283935,-0.018933
2012-08-31,1.901333,0.003872,0.033849,-0.000438,0.075438,0.103201,-0.057491
2012-09-28,1.952,0.027729,0.034395,-0.035434,0.025318,0.094578,-0.100912


Unnamed: 0,price,return,monthly_std,4sma_pct_price,100sma_pct_price,bolling_top_pct,bolling_bot_pct,monthly_return,y
2022-11-30,194.699997,0.076702,0.037256,-0.048138,0.292646,0.161251,-0.179779,-0.144326,-0.367334
2022-12-30,123.18,0.011164,0.033776,-0.052586,0.842685,0.707537,-0.19305,-0.367334,0.406235
2023-01-31,173.220001,0.039362,0.039014,-0.021403,0.129803,0.003971,-0.466819,0.406235,0.187565
2023-02-28,205.710007,-0.009247,0.039403,-0.012821,-0.129046,0.072509,-0.166246,0.187565,0.008507
2023-03-31,207.460007,0.062372,0.039992,-0.053058,-0.171954,-0.010719,-0.178088,0.008507,-0.207992


              return  monthly_std  4sma_pct_price  100sma_pct_price  \
2012-06-29 -0.003821     0.030595        0.008869          0.043838   
2012-07-31  0.002560     0.036543        0.024890          0.172746   
2012-08-31  0.003872     0.033849       -0.000438          0.075438   
2012-09-28  0.027729     0.034395       -0.035434          0.025318   
2012-10-31  0.027392     0.033222       -0.018397          0.056843   
...              ...          ...             ...               ...   
2022-11-30  0.076702     0.037256       -0.048138          0.292646   
2022-12-30  0.011164     0.033776       -0.052586          0.842685   
2023-01-31  0.039362     0.039014       -0.021403          0.129803   
2023-02-28 -0.009247     0.039403       -0.012821         -0.129046   
2023-03-31  0.062372     0.039992       -0.053058         -0.171954   

            bolling_top_pct  bolling_bot_pct  monthly_return  
2012-06-29         0.093237        -0.139112        0.060678  
2012-07-31         0.

In [74]:
X_test_scaled

array([[ 1.11492434e-01, -1.23056089e+00,  1.68406059e+00,
        -2.02650363e-02, -2.82574305e-01, -3.06611697e-01,
         2.77822645e-01],
       [-3.88576826e-01, -1.24560730e+00,  1.68406059e+00,
         2.66537580e-01, -2.39040796e-01, -2.35150214e-01,
         3.68438553e-01],
       [ 5.26491604e-01, -1.21868215e+00,  1.68406059e+00,
        -3.02262684e-01, -3.35587948e-01, -4.72936002e-01,
         2.47426018e-01],
       [ 1.08360598e-01, -1.21060461e+00,  1.68406059e+00,
        -3.23716853e-01, -3.66447253e-01, -5.80433795e-01,
         2.37886111e-01],
       [ 1.42092315e+00, -1.14044084e+00,  1.68406059e+00,
        -1.32054719e+00, -5.82254268e-01, -9.23749234e-01,
        -1.77376680e-01],
       [-1.51926471e+00,  1.27585359e+00,  4.16184390e-01,
         4.38243168e-01, -3.57320167e-01, -5.15982693e-01,
         2.26765030e-01],
       [-3.31094591e-01,  1.23514910e+00,  4.16184390e-01,
         6.90187603e-01, -3.15138125e-01, -4.63041676e-01,
         3.0908815