### Sotck prediction using Different Models i.e. LSTM, Ridge

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import plotly.express as px
from copy import copy
from scipy import stats
import matplotlib.pyplot as plt
import numpy as np
import plotly.figure_factory as ff
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from tensorflow import keras

In [None]:
stock_price_df = pd.read_csv('/stock.csv')
stock_price_df

In [None]:
stock_vol_df = pd.read_csv('/content/drive/stock_volume.csv')
stock_vol_df

In [None]:
stock_price_df = stock_price_df.sort_values(by = ['Date'])

In [None]:
stock_vol_df = stock_vol_df.sort_values(by = ['Date'])

In [None]:
stock_price_df.isnull().sum()

In [None]:
stock_vol_df.isnull().sum()

In [None]:
stock_price_df.info()

In [None]:
stock_vol_df.info()

In [None]:
##Average Trading Volume for AAPL
stock_vol_df['AAPL'].mean()

In [None]:
##Max Trading Volume for sp500
stock_vol_df['sp500'].max()

In [None]:
##Most Traded Security
for i in stock_vol_df.columns:
  if i != 'Date':
    x = stock_vol_df[i].sum()
print(x)

In [None]:
##Average Price of sp500
stock_price_df['sp500'].mean()

In [None]:
##Max Price of TSLA
stock_price_df['TSLA'].max()

In [None]:
stock_price_df.describe()

In [None]:
stock_vol_df.describe()

### Exploratory Data Analysis and Visualization

In [None]:
def normalize(df):
  x = df.copy()
  for i in x.columns[1:]:
    x[i] = x[i]/x[i][0]
  return x

In [None]:
def interactive_plot(df, title):
  fig = px.line(title = title)
  for i in df.columns[1:]:
    fig.add_scatter(x = df['Date'], y = df[i], name = i)
  fig.show()

In [None]:
interactive_plot(stock_price_df, 'Stock Prices')

In [None]:
interactive_plot(stock_vol_df, 'Stock Volume')

In [None]:
interactive_plot(normalize(stock_price_df), 'Normalized Stock Price')

In [None]:
interactive_plot(normalize(stock_vol_df), 'Normalized Stock Volume')

###Prepare the Data before Training Model

In [None]:
#Concatenate date, price, volume in one df
def individual_stock(price_df, vol_df, name):
  return pd.DataFrame({'Date':price_df['Date'], 'Close':price_df[name], 'Volume':vol_df[name]})

In [None]:
#Shift prices by n (1 day)
def trading_window(data):
  n = 1
  data['Target'] = data[['Close']].shift(-n)
  return data

In [None]:
price_volume_df = individual_stock(stock_price_df, stock_vol_df, 'AAPL')
price_volume_df

In [None]:
price_volume_target_df = trading_window(price_volume_df)
price_volume_target_df

In [None]:
price_volume_target_df = price_volume_target_df[:-1]
price_volume_target_df

In [None]:
#Scale the Data
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
price_volume_target_scaled_df = sc.fit_transform(price_volume_target_df.drop(columns = ['Date']))
price_volume_target_scaled_df

In [None]:
price_volume_target_scaled_df.shape

In [None]:
# Create Feature and Target
X = price_volume_target_scaled_df[:, :2]
y = price_volume_target_scaled_df[:, 2:]

In [None]:
X

In [None]:
y

In [None]:
X.shape

In [None]:
y.shape

In [None]:
# Split the Data for Training/Testing
split = int(0.65 * len(X))
split

In [None]:
X_train = X[:split]
X_train.shape

In [None]:
y_train = y[:split]
y_train.shape

In [None]:
X_test = X[split:]
X_test.shape

In [None]:
y_test = y[split:]
y_test.shape

In [None]:
# Data Plotting Function
def show_plot(data, title):
  plt.figure(figsize = (13, 5))
  plt.plot(data, linewidth = 3)
  plt.title(title)
  plt.grid()


In [None]:
show_plot(X_train, 'Training Data')
show_plot(X_test, 'Testing Data')

### Build and Train Ridge Regression Model

In [None]:
from sklearn.linear_model import Ridge
regression_model = Ridge()
regression_model.fit(X_train, y_train)

In [None]:
# Test model and calculate accuracy
lr_accuracy = regression_model.score(X_test, y_test)
print('Ridge Regression Score:', lr_accuracy)

In [None]:
# Make Prediction
predicted_prices = regression_model.predict(X)
predicted_prices

In [None]:
predicted = []
for i in predicted_prices:
  predicted.append(i[0])

In [None]:
len(predicted)

In [None]:
close = []
for i in price_volume_target_scaled_df:
  close.append(i[0])

In [None]:
df_predicted = price_volume_target_df[['Date']]
df_predicted

In [None]:
df_predicted['Close'] = close
df_predicted

In [None]:
df_predicted['Predictions'] = predicted
df_predicted

In [None]:
interactive_plot(df_predicted, 'Original vs. Predictions')

## Train LSTM Time Series Model

In [None]:
price_volume_df = individual_stock(stock_price_df, stock_vol_df, 'AAPL')
price_volume_df

In [None]:
# Get close and volume data (Input)
training_data = price_volume_df.iloc[:, 1:3].values
training_data

In [None]:
# Normalize the data
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range= (0, 1))
training_set_scaled = sc.fit_transform(training_data)

In [None]:
X = []
y = []
for i in range(1, len(price_volume_df)):
  X.append(training_set_scaled[i-1:i, 0])
  y.append(training_set_scaled[i, 0])

In [None]:
# Convert data into array format
X = np.asarray(X)
y = np.asarray(y)

In [None]:
# Split the data
split = int(0.7 * len(X))
X_train = X[:split]
y_train = y[:split]
X_test = X[split:]
y_test = y[split:]

In [None]:
# Reshape 1D arraying to 3D arrays to feed model
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
X_train.shape, X_test.shape

In [None]:
# Create model
inputs = keras.layers.Input(shape = (X_train.shape[1], X_train.shape[2]))
x = keras.layers.LSTM(150, return_sequences=True) (inputs)
x = keras.layers.Dropout(0.3)(x)
x = keras.layers.LSTM(150, return_sequences=True)(x)
x = keras.layers.Dropout(0.3)(x)
x = keras.layers.LSTM(150, return_sequences=True)(x)
outputs = keras.layers.Dense(1, activation = 'linear')(x)

model = keras.Model(inputs = inputs, outputs = outputs)
model.compile(optimizer = 'adam', loss = 'mse')
model.summary()

In [None]:
# Train the model
history = model.fit(X_train, y_train, epochs = 20, batch_size= 32, validation_split= 0.2)

In [None]:
# Make prediction
predicted = model.predict(X)

In [None]:
# Append predicted values to a list
test_predicted = []

for i in predicted:
  test_predicted.append(i[0][0])

In [None]:
df_predicted = price_volume_df[1:][['Date']]
df_predicted

In [None]:
df_predicted['predictions'] = test_predicted

In [None]:
close = []
for i in training_set_scaled:
  close.append(i[0])

In [None]:
df_predicted['Close'] = close[1:]

In [None]:
df_predicted

In [None]:
interactive_plot(df_predicted, 'Original Price vs. LSTM Predictions')