In [22]:
import math
import json
from datetime import date, datetime, timedelta
import pandas_datareader as web
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
import matplotlib.pyplot as plt
import plotly.express as px
plt.style.use('fivethirtyeight')
import pymongo
from pymongo import MongoClient


In [23]:
# Create connection to mongoDB
client = MongoClient('mongodb://localhost:27017')
# Connect to stock_df database in mongoDB
db = client.stock_db ###################

In [24]:
# Create variable for new_input
new_input = "ZM"

In [25]:
# Retrive data
one_stock = db.dummy_test.find_one({'symbol': new_input}) ########################3

# Isolate symbol and historical data
symbol = one_stock['symbol']
historical_data = one_stock['historical']

stock_date = []
close = []

for h in historical_data:
    
    collect_dates = h['date']
    stock_date.append(collect_dates)
    
    collect_close = h['close']
    close.append(collect_close)

TypeError: 'NoneType' object is not subscriptable

In [None]:
df = pd.DataFrame({'Date': stock_date,
                  'close': close})
df.head()

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df.dtypes

In [None]:
new_df = df.set_index('Date')
new_df.head()

In [None]:
new_df.shape

In [None]:
plt.figure(figsize =(16, 8))
plt.title('Closing Price History')
plt.plot(new_df['close'])
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Prize USD ($)', fontsize=18)
plt.show()

In [None]:
# Create new df with only the 'Close' column
data = new_df.filter(['close'])

# Convert df to a numpy array
dataset = data.values

# Get the number of rows to train the model on
training_data_len = math.ceil(len(dataset) * .8)

training_data_len

In [None]:
# Scale the data to apply preprocessing scaling before presenting to nueral network
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)

# Show scaled data representing values between 0-1
scaled_data

In [None]:
# Create the training dataset 
# Create the scaled training dataset
train_data = scaled_data[0:training_data_len , :]

# Split the data into x_train and y_train data sets
# x_train will be the independent training variables
# y_train will be the dependent variables
x_train = []
y_train = []

for i in range(60, len(train_data)):
  # Append past 60 values to x_train
  # contains 60 vals index from position 0 to position 59
  x_train.append(train_data[i-60:i, 0])

  #y_train will contain the 61st value 
  y_train.append(train_data[i,0])

  # Run below to visualize the x & y trains. x should be an array of 60 values and y should be 1 value being the 61st
  # Changing to if i<=61 will provide a 2nd pass through
  if i<=60:
    print(x_train)
    print(y_train)
    print()

In [None]:
# Convert x_train & y_train to numpy arrays  so we can use them for training the LSTM model
x_train, y_train = np.array(x_train), np.array(y_train)

In [None]:
# Reshape the data because LSTM network expects input to be 3 dimensional and as of now our x_train is 2D
# number of sample(rows), timesteps(columns), and features(closing price)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_train.shape

In [None]:
# Build LSTM model
model = Sequential()
# add LSTM with 50 neurons 
model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# Train the model
model.fit(x_train, y_train, batch_size=1, epochs=1)

In [None]:
# Create testing dataset
# Create new array containing scaled values from index 2057 to 2646
test_data = scaled_data[training_data_len - 60: , :]

# Create the data sets x_test and y_test
x_test = []
# y_test contains actual 61st values (not scaled)
y_test = dataset[training_data_len: , :]

for i in range(60, len(test_data)):
  x_test.append(test_data[i-60:i, 0])

In [None]:
# Convert data to numpy array to use is LSTM model
x_test = np.array(x_test)

In [None]:
# Reshape the data because data is 2D and we need 3D for LSTM
# number of samples(rows), timesteps(col), features(closing price)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [None]:
# Get the models predicted price values for x_test dataset
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

In [None]:
# Get the root mean squared error. Closer to 0 the better
rmse = np.sqrt(np.mean(predictions - y_test) **2)
rmse

In [None]:
# Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions

# Visualize the model
plt.figure(figsize=(16,8))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(train['close'])
plt.plot(valid[['close', 'Predictions']])
plt.legend(['Train', 'Validation', 'Predictions'], loc='lower right')
plt.show()

# Blue will indicate what the model was trained on
# Red is actual closing values
# Yellow is the prediction

In [None]:
index_valid = valid.reset_index()
index_valid_df = pd.DataFrame(index_valid)
index_valid_df.head()

In [None]:
stock_date = index_valid_df['Date']
stock_date_list = []

for stock in stock_date:
    collect_dates = stock
    clean_dates = datetime.strftime(collect_dates, '%Y-%m-%d')
    stock_date_list.append(clean_dates)
    
#print(stock_date_list)

In [None]:
close_data = index_valid_df['close']
close_data_list = []

for close in close_data:
    collect_close = close
    close_data_list.append(collect_close)
    
#close_data_list

In [None]:
predictions_data = index_valid_df['Predictions']
predicted_data_list = []

for predict in predictions_data:
    collect_predict = predict
    predicted_data_list.append(collect_predict)
    
#predicted_data_list

In [None]:
prediction_data = {
    'Date': stock_date_list,
    'Actual Close': close_data_list,
    'Predictions': predicted_data_list
}
#prediction_data

In [None]:
current_date = date.today().strftime('%Y-%m-%d')
print(current_date)

In [None]:
db.dummy_test.update_one({'symbol': new_input}, {'$push': {'prediction': {'date': current_date, 'prediction_data': prediction_data}}})