### **Importing Libraries**

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as web
import datetime as dt

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM

### **Gathering the Data**
We prepare 10 years of data for the model to train on

In [2]:
# company ticker name
# example ticker names for testing
# AAPL = Apple, NVDA = NVidia, F = Ford, SPY = SPDR S&P 500, MSFT = Microsoft
# simply change the company ticker name below to test that stock
company = 'AAPL'

# specify training data timeline: 10 years
# from 2010 to 2020
start = dt.datetime(2010,1,1)
end = dt.datetime(2020,1,1)

data = web.DataReader(company, 'yahoo', start, end)

TypeError: string indices must be integers

### **Preprocessing the Data**
The time frame is a day

The model is looking back at the last 90 days

The model is using only the 'Close' price as a univariate input

In [None]:
# scaler will process the data to fit in a value between 0 and 1
scaler = MinMaxScaler(feature_range=(0,1))

# we are only using closing price data to make the prediction
# so we will only be scaling the closing price
scaled_data = scaler.fit_transform(data['Close'].values.reshape(-1, 1))

# here we are specifying how many days in the past do we want to look back into
# in order to make the prediction in this case we are looking back 90 days
prediction_days = 90

# empty lists for training data
x_train = []
y_train = []

# we start at 90 days until the length of the scaled data which is 10 years
for x in range(prediction_days, len(scaled_data)):
    x_train.append(scaled_data[x - prediction_days: x, 0])   #  adding value to x_train 90 days of labeled data with values
    y_train.append(scaled_data[x, 0])   # adding the 91st value

# converting to numpy arrays
x_train = np.array(x_train)
y_train = np.array(y_train)

# we reshape for the LSTM to work with
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

### **Building the LSTM Model**
The model will be predicting the price

In [None]:
model = Sequential()

# here we specify the layers
# LSTM layers should always be accompanied by a dropout layer
# Dropout layer randomly sets input units to 0 with a frequency rate of 0.2 to prevent overfitting during training
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1))) # units = dimesionality/layers
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))   # the Output prediction of the  price

# we then compile the model
model.compile(optimizer='Adam', loss='mean_squared_error')

# we now try for fit the model to the training data
# epoch is the number of times you go through the training set
# batch_size slices the data into batches of 32
history = model.fit(x_train, y_train, epochs=25, batch_size=32)

### **Plotting the loss function during training**

In [None]:
plt.plot(history.history['loss'], label='train')
plt.legend()
plt.show()

### **Testing the Model on Existing Data**
We test the model on data that the model has not seen before

In [None]:
# this needs to be data that the model has not seen before
# we select a start and end date that is much later that our training data timeline
# here we are using a test set of 3 months
test_start = dt.datetime(2020,1,1)
test_end = dt.datetime(2020,5,1)

test_data = web.DataReader(company, 'yahoo', test_start, test_end)

# we need to concatenate a full data set on the data that we need to predict on
actual_prices = test_data['Close'].values

# concatenating data and test data
total_dataset = pd.concat((data['Close'], test_data['Close']))

# what our model will see as an input so it can predict the next price.
model_inputs = total_dataset[len(total_dataset) - len(test_data) - prediction_days:].values

# reshaping and scaling the model inputs
model_inputs = model_inputs.reshape(-1, 1)
model_inputs = scaler.transform(model_inputs)

In [None]:
print(type(actual_prices))


for i in actual_prices:
  print(i)

print(type(total_dataset))
print(type(model_inputs))
print(model_inputs)

### **Evaluating How Accurate Our Model Performs**

In [None]:
# make  predictions on test data
# empty list for our test
x_test = []

for x in range(prediction_days, len(model_inputs)):
    x_test.append(model_inputs[x - prediction_days: x, 0])

# transforming into a numpy array
x_test = np.array(x_test)

# we reshape for the LSTM
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

# predict based on  the x_test data
prediction_prices = model.predict(x_test)

# since the predicted prices are scaled we need to inverse transform them back to the actual predicted prices
prediction_prices = scaler.inverse_transform(prediction_prices)


In [None]:
print(model_inputs)

### **Plotting the Test Predictions**

In [None]:
# black for actual
# green for prediction
plt.plot(actual_prices, color='black', label=f"Actual {company} price")
plt.plot(prediction_prices, color='red', label=f"Predicted {company} price")
plt.title(f"{company} Share Price")
plt.xlabel('time')
plt.ylabel(f"{company} Share Price")
plt.legend()
plt.show()