<a href="https://colab.research.google.com/github/Aneesh-CQ/Aneesh-CQ.github.io/blob/main/Stock_Market_Pred.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Stock Market Predictions using LSTM

In [None]:
!pip install yfinance --target=/kaggle/working/mysitepackages

# add to system path
import sys
sys.path.append('/kaggle/working/mysitepackages')

In [None]:
# Import and setup
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline

# Yahoo finance reading setup
from pandas_datareader.data import DataReader
import yfinance as yf
from pandas_datareader import data as pdr

yf.pdr_override()

# Time stamps setup
from datetime import datetime

# Import and setup the stocks
tech_list = ["AAPL", "GOOG", "MSFT", "AMZN"]

# Setup the End and Start times for the data we download
end = datetime.now()
start = datetime(end.year - 1, end.month, end.day) # 1 Year back from today

for stock in tech_list:
    globals()[stock] = yf.download(stock, start, end) # Using globals() here is sloppy as hell but it's simply and it works so idrc-

company_list = [AAPL, GOOG, MSFT, AMZN]
company_name = ["APPLE", "GOOGLE", "MICROSOFT", "AMAZON"]

for company, com_name in zip(company_list, company_name):
    company["company_name"] = com_name

df = pd.concat(company_list, axis = 0)
df.tail(10)


In [None]:
AMZN.describe()

In [None]:
AMZN.info()

In [None]:
# Plot setup
plt.figure(figsize = (15,10))
plt.subplots_adjust(top = 1.25, bottom = 1.2)

# Placing data
for i, company in enumerate(company_list, 1):
    plt.subplot(2, 2, i)
    company['Adj Close'].plot()
    plt.ylabel('Adj Close')
    plt.xlabel(None)
    plt.title(f"Closing Price of {tech_list[i-1]}")

plt.tight_layout()

In [None]:
# Plot setup
plt.figure(figsize = (15, 10))
plt.subplots_adjust(top = 1.25, bottom = 1.2)

# Placing data
for i, company in enumerate(company_list, 1):
    plt.subplot(2, 2, i)
    company['Volume'].plot()
    plt.ylabel('Volume')
    plt.xlabel(None)
    plt.title(f"Sales Volume for {tech_list[i-1]}")

plt.tight_layout()

In [None]:
ma_day = [10, 20, 50]

# Setting data
for ma in ma_day:
    for company in company_list:
        column_name = f"MA for {ma} days"
        company[column_name] = company["Adj Close"].rolling(ma).mean()


# Plotting Data
fig, axes = plt.subplots(nrows = 2, ncols = 2)
fig.set_figheight(10)
fig.set_figwidth(15)

AAPL[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[0,0])
axes[0,0].set_title('APPLE')

GOOG[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[0,1])
axes[0,1].set_title('GOOGLE')

MSFT[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[1,0])
axes[1,0].set_title('MICROSOFT')

AMZN[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[1,1])
axes[1,1].set_title('AMAZON')

fig.tight_layout()

In [None]:
# Let's use pct_change to find the percent change
for company in company_list:
    company['Daily Return'] = company['Adj Close'].pct_change()

# Plot the daily return %
fig, axes = plt.subplots(nrows = 2, ncols = 2)
fig.set_figheight(10)
fig.set_figwidth(15)

AAPL['Daily Return'].plot(ax = axes[0, 0], legend = True, linestyle = '--', marker = 'o')
axes[0, 0].set_title('APPLE')

GOOG['Daily Return'].plot(ax = axes[0, 1], legend = True, linestyle = '--', marker = 'o')
axes[0, 0].set_title('GOOGLE')

MSFT['Daily Return'].plot(ax = axes[1, 0], legend = True, linestyle = '--', marker = 'o')
axes[0, 0].set_title('MICROSOFT')

AMZN['Daily Return'].plot(ax = axes[1, 1], legend = True, linestyle = '--', marker = 'o')
axes[0, 0].set_title('AMAZON')

fig.tight_layout()

In [None]:
# Take all of the closing prices for the tech stocks into one DataFrame

closing_df = pdr.get_data_yahoo(tech_list, start = start, end = end)['Adj Close']

# Make a new Dataframe named tech returns
tech_rets = closing_df.pct_change()
tech_rets.head()

In [None]:
# Comparing google to itself should be a perfectly linear correlation
sns.jointplot(x = "GOOG", y = "GOOG", data = tech_rets, kind = "scatter", color = "seagreen")

In [None]:
sns.jointplot(x = "GOOG", y = "MSFT", data = tech_rets, kind = "scatter")

In [None]:
sns.pairplot(tech_rets, kind = 'reg')

In [None]:
# Setup the figure by naming it returns_fig, and calling PairPlot on it
return_fig = sns.PairGrid(tech_rets.dropna())

# Let's use map_uper so we can specify the upper triangle
return_fig.map_upper(plt.scatter, color = "purple")

# Use map_lower so we can specifiy the lower triangle
return_fig.map_lower(sns.kdeplot, cmap = 'cool_d')

#Now let's define the diagonal as histograms
return_fig.map_diag(plt.hist, bins = 30)

In [None]:
# Let's setup our figure by naming it return_fig then calling PairPlot on the DataFrame
returns_fig = sns.PairGrid(closing_df)

# Let's use map_uper so we can specify the upper triangle
return_fig.map_upper(plt.scatter, color = "purple")

# Use map_lower so we can specifiy the lower triangle
return_fig.map_lower(sns.kdeplot, cmap = 'cool_d')

#Now let's define the diagonal as histograms
return_fig.map_diag(plt.hist, bins = 30)

In [None]:
plt.figure(figsize = (12, 10))

plt.subplot(2, 2, 1)
sns.heatmap(tech_rets.corr(), annot = True, cmap = "summer")
plt.title("Correlation of stock return")

plt.subplot(2, 2, 2)
sns.heatmap(closing_df.corr(), annot = True, cmap = "summer")
plt.title("Correlation of stock closing price")

In [None]:
df = pdr.get_data_yahoo('AMZN', start = '2013-01-01', end = datetime.now()) # Fetch data from 2013 to now (10 years)

# Let's show the data
df

# Note: As seen by the analysis above our data sets are pretty clean and we don't need to clean much! This is great

In [None]:
plt.figure(figsize = (16, 6))
plt.title("Close Price History")
plt.plot(df["Close"])
plt.xlabel("Date", fontsize = 18)
plt.ylabel("Close Price USD ($)", fontsize = 18)
plt.show()

In [None]:
# Create new dataframe
data = df.filter(['Close'])

# Conver to numpy array
dataset = data.values

# Get NO. of rows to train the model on
training_data_len = int(np.ceil(len(dataset) * .95))  # We're using 95% of the data for training

print(training_data_len)

In [None]:
# Scale data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range = (0, 1))
scaled_data = scaler.fit_transform(dataset)

print(scaled_data)

In [None]:
# Here we cut the data into the 95% And put it in a "train data" array
train_data = scaled_data[0:int(training_data_len), :]

# Split into x_train and y_train
x_train = []
y_train = []

# Isolate the "x_train" which is input data and "y_train" which is the output/target data.
for i in range (60, len(train_data)):
    x_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])
    if i<= 61: # prevent overflow
        print(x_train)
        print(y_train)

# Convert into np arrays to allow for more manipulation of the dataset
x_train = np.array(x_train)
y_train = np.array(y_train)

# Reshape data
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

In [None]:
test_data = scaled_data[training_data_len - 60:, :]

x_test = []
y_test = dataset[training_data_len: ,:]
for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, 0])

x_test = np.array(x_test)

x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM

if 'model' in globals():
    del model

model = Sequential()

model.add(LSTM(128, return_sequences = True, input_shape = (x_train.shape[1], 1)))
model.add(LSTM(64, return_sequences = False))
model.add(Dense(25))
model.add(Dense(1))

model.compile(optimizer = "adam", loss = "mean_squared_error")

model.fit(x_train, y_train, batch_size = 1, epochs = 1)


In [None]:
# We're using the model we trained above and testing it on our test data we made above
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions) # Scaler transofmations so we can plot

rmse = np.sqrt(np.mean((predictions - y_test) ** 2)) # We calculated the RMSE using the formula

print(rmse)

In [None]:
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions

# visualize data
plt.figure(figsize = (16, 6))
plt.title("Model")
plt.xlabel("Date", fontsize = 18)
plt.ylabel('Close Price USD ($)', fontsize = 18)

plt.plot(train['Close'])
plt.plot(valid[["Close", "Predictions"]])
plt.legend(["Train", "Val", "Predictions"], loc = "lower right")
plt.show()