In [2]:
# installing needed libraries
!pip install alpha_vantage --quiet
!pip install pmdarima --quiet

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sphinx 4.0.1 requires Jinja2<3.0,>=2.3, but you have jinja2 3.1.2 which is incompatible.
sphinx 4.0.1 requires MarkupSafe<2.0, but you have markupsafe 2.1.2 which is incompatible.[0m


In [14]:
!pip install yfinance --quiet

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
conda-repo-cli 1.0.4 requires pathlib, which is not installed.
anaconda-project 0.9.1 requires ruamel-yaml, which is not installed.
sphinx 4.0.1 requires Jinja2<3.0,>=2.3, but you have jinja2 3.1.2 which is incompatible.
sphinx 4.0.1 requires MarkupSafe<2.0, but you have markupsafe 2.1.2 which is incompatible.[0m


In [26]:
!pip install keras --quiet

In [28]:
!pip install tensorflow --quiet

In [15]:
# importing dependencies for data collection
import time 
import pandas as pd

# using yfinance for stock data
import yfinance as yf

# importing plotly packages for graphs
import plotly
import plotly.graph_objs as go
import plotly.express as px
from plotly.subplots import make_subplots


Collecting Data - Visualizing Data

In [18]:
# collecting data for the Alphabet Company/Google
stock_name = 'GOOGL'
goog = yf.download(stock_name, start = "2016-01-01", end = "2023-06-30")

[*********************100%***********************]  1 of 1 completed


In [19]:
goog

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-01-04,38.110001,38.110001,37.376999,37.972000,37.972000,67382000
2016-01-05,38.205002,38.459999,37.782501,38.076500,38.076500,45216000
2016-01-06,37.518501,38.286499,37.400002,37.966499,37.966499,48206000
2016-01-07,37.324501,37.765499,36.764000,37.049999,37.049999,63132000
2016-01-08,37.389999,37.506001,36.445999,36.545502,36.545502,47506000
...,...,...,...,...,...,...
2023-06-23,121.379997,122.790001,121.059998,122.339996,122.339996,34885300
2023-06-26,120.760002,122.000000,118.269997,118.339996,118.339996,33969900
2023-06-27,117.080002,119.080002,116.099998,118.330002,118.330002,39535900
2023-06-28,117.089996,120.389999,116.849998,120.180000,120.180000,27091400


In [22]:
# creating line graph for Adjusted Close
fig = px.line(goog, y = "Adj Close", title = "Adjusted Close for GOOGL 2016-2023")
fig.show()

LSTM Model

In [29]:
# importing dependencies for LSTM Model
from sklearn.preprocessing import MinMaxScaler
import math
import matplotlib.pyplot as plt
import keras
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import *
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from keras.callbacks import EarlyStopping

In [None]:
# creating function to create a LSTM model for a given stock_name and data
def lstm(stock_name, data):
    data = data.filter(['Adj Close'])
    dataset = data.values
    # splitting training and testing 80%/20%
    training_data_len = int(np.ceil(len(dataset) * .80))

    # scaling data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(dataset)

    # creating training dataset
    train_data = scaled_data[0:int(training_data_len), :]
    x_train = []
    y_train = []

    # using past 60 days to predict next day adj. closing
    for i in range(60, len(train_data)):
        x_train.append(train_data[i-60:i, 0])
        y_train.append(train_data[i, 0])

    # LSTM Model
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape = (x_train[1], 1)))
    model.add(Dropout(.35))
    model.add(LSTM(64, return_sequences=False))
    model.add(Dense(25, activation = 'relu'))
    model.add(Dense(1))

    # compile model
    model.compile(optimizer='adam', loss = 'mean_squared_error', metrics = ['accuracy'])

    # train model
    model.fit(x_train, y_train, batch_size=1, epochs = 21)

    # structure of model
    keras.utils.plot_model(model, 'multi_input_and_output_model.png', show_shapes = True)

    # creating testing dataset
    test_data = scaled_data[training_data_len - 60: , :]
    x_test = []
    y_test = dataset[training_data_len:, :]
    for i in range(60, len(test_data)):
        x_test.append(test_data[i-60:i, 0])

    x_test = np.array(x_test)

    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

    # prediciting the test set
    predictions = model.predict(x_test)
    predictions = scaler.inverse_transform(predictions)

    # getting RMSE
    rmse = np.sqrt(np.mean(((predictions-y_test)**2)))
    print(f'RMSE LSTM: {rmse}')

    # creating graphs
    train = data[:training_data_len]
    valid = data[training_data_len:]
    train_gr = np.reshape(train, (203, ))
    train_gr = train_gr['Adj Close']
    valid_gr = np.reshape(valid, (50,))
    valid_gr = valid_gr['Adj Close']
    preds_gr = np.reshape(predictions, (50,))

    x_train = list(range(0, len(train_data)))
    x_valid = list(range(len(train_data) - 1, len(dataset)))

    figure = go.Figure()
    fig.add_trace(go.Scatter(x=x_train, y=train_gr, mode = 'line+markers', 
                             marker = dict(size = 4), name = 'train',
                             marker_color = '#39304A'))
    fig.add_trace(go.Scatter(x=x_valid, y=valid_gr, mode = 'line+markers', 
                             marker = dict(size = 4), name = 'train',
                             marker_color = '#A98D75')) 
    fig.add_trace(go.Scatter(x=x_valid, y=preds_gr, mode = 'line+markers', 
                             marker = dict(size = 4), name = 'train',
                             marker_color = '#FFAA00'))  
