<a href="https://colab.research.google.com/github/Akif4362/lstm_research/blob/main/Prediction_of_Stock_Price_using_LSTM_(full_data).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Importing Libraries & Setting Up Device Agnostic Code

In [None]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn

import requests
import zipfile
from pathlib import Path
from copy import deepcopy as dc

device = "cuda" if torch.cuda.is_available() else "cpu"
device

# 2. Downloading and Preparing Dataset

In [None]:
data_path = Path("data/")
csv_path = data_path / "stock_prices_data"

if csv_path.is_dir():
    print(f"{csv_path} directory exists.")
else:
    print(f"Did not find {csv_path} directory, creating one...")
    csv_path.mkdir(parents=True, exist_ok=True)

    with open(data_path / "timeseries_research_data.zip", "wb") as f:
        request = requests.get("https://github.com/Akif4362/lstm_research/raw/main/data/timeseries_research_data.zip")
        print("Downloading time series data...")
        f.write(request.content)

    with zipfile.ZipFile(data_path / "timeseries_research_data.zip", "r") as zip_ref:
        print("Unzipping time series data...")
        zip_ref.extractall(csv_path)

In [None]:
df = pd.read_csv("/content/data/stock_prices_data/timeseries_research_data/AAMRATECH_data.csv")
df.head()

In [None]:
df = df[["Date", "Close"]]
df.head()

In [None]:
# Changing datatype of Date column in DateTime Format
df["Date"] = pd.to_datetime(df["Date"])

# Visualizing the time-series data
plt.plot(df["Date"], df["Close"])
plt.xlabel('Day')
plt.ylabel('Close')
plt.title(f"AAMRATECH")
plt.show();

In [None]:
# prepare dataframe for creating input vectors for LSTM
def prepare_dataframe_for_lstm(df, n_steps):
  df = dc(df)

  df.set_index("Date", inplace=True)

  for i in range(1, n_steps+1):
    df[f"Close(t-{i})"] = df["Close"].shift(i)

  df.dropna(inplace=True)

  return df

TIMESTEP = 7
shifted_df = prepare_dataframe_for_lstm(df, TIMESTEP)
shifted_df

In [None]:
# Converting dataframe to numpy array
shifted_df_as_np = shifted_df.to_numpy()
shifted_df_as_np

In [None]:
# Scaling our data to be between -1 and 1
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1))
shifted_df_as_np = scaler.fit_transform(shifted_df_as_np)
shifted_df_as_np

In [None]:
# Taking our input and output
X = shifted_df_as_np[:, 1:]
y = shifted_df_as_np[:, 0]

X.shape, y.shape

In [None]:
# Flipping the X array to match proper sequence of input for LSTM
X = dc(np.flip(X, axis=1))
X

In [None]:
# Splitting data into train and test
split_index = int(len(X) * 0.90)

X_train = X[:split_index]
X_test = X[split_index:]
y_train = y[:split_index]
y_test = y[split_index:]

X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# Fixing the dimensions as required for LSTM
X_train = X_train.reshape((-1, 7, 1))
X_test = X_test.reshape((-1, 7, 1))

y_train = y_train.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))

X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# Converting numpy array into PyTorch tensors
X_train = torch.tensor(X_train).float()
X_test = torch.tensor(X_test).float()
y_train = torch.tensor(y_train).float()
y_test = torch.tensor(y_test).float()

X_train.shape, X_test.shape, y_train.shape, y_test.shape

# 3. Creating PyTorch Dataset and Dataloader

In [None]:
# Subclassing from the Dataset class to get out train and test datasets
from torch.utils.data import Dataset

class TimeSeriesData(Dataset):
  def __init__(self, X, y):
    self.X = X
    self.y = y

  def __len__(self):
    return len(self.X)

  def __getitem__(self, i):
    return self.X[i], self.y[i]

train_dataset = TimeSeriesData(X_train, y_train)
test_dataset = TimeSeriesData(X_test, y_test)

In [None]:
# Creating dataloaders
from torch.utils.data import DataLoader

BATCH_SIZE = 32

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
for _, (X, y) in enumerate(train_dataloader):
  print(X.shape, y.shape)
  break

# 4. Creating our LSTM Model

In [None]:
# Creating our LSTM model
class LSTM(nn.Module):
  def __init__(self, input_size, hidden_size, num_stacked_layers):
    super().__init__()
    self.hidden_size = hidden_size
    self.num_stacked_layers = num_stacked_layers
    self.lstm = nn.LSTM(input_size, hidden_size, num_stacked_layers, batch_first=True)
    self.fc = nn.Linear(hidden_size, 1)

  def forward(self, x):
    batch_size = x.size(0)
    h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
    c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)

    out, _ = self.lstm(x, (h0, c0))
    out = self.fc(out[:, -1, :])
    return out

model = LSTM(1, 5, 1)
model.to(device)
model

# 5. Predicting Closing Price for AAMRATECH

In [None]:
# Setting up our loss and optimizer
loss_fn = nn.L1Loss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

In [None]:
# Creating train step
def train_step(model, dataloader, loss_fn, optimizer, epoch, print_every=40, device=device):
  """trains model for one epoch"""
  model.train()
  train_loss = 0

  for batch, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)

    y_pred = model(X)
    loss = loss_fn(y_pred, y)
    train_loss += loss.item()

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  train_loss = train_loss / len(dataloader)

  if (epoch + 1) % print_every == 0:
    print(f"Epoch {epoch + 1}, train_loss: {train_loss:.5f}")

In [None]:
# Creating test step
def test_step(model, dataloader, loss_fn, epoch, print_every=40, device=device):
  """tests model for one epoch"""
  model.eval()
  test_loss = 0

  with torch.inference_mode():
    for batch, (X, y) in enumerate(dataloader):
      X, y = X.to(device), y.to(device)

      y_logit = model(X)
      loss = loss_fn(y_logit, y)
      test_loss += loss.item()

  test_loss = test_loss / len(dataloader)

  if (epoch + 1) % print_every == 0:
    print(f"Epoch {epoch + 1}, test_loss: {test_loss:.5f}\n-------------------------------")

In [None]:
# Traing our model
epochs = 200

for epoch in range(epochs):
  train_step(model, train_dataloader, loss_fn, optimizer, epoch=epoch)
  test_step(model, test_dataloader, loss_fn, epoch=epoch)

In [None]:
# Getting the unscaled values of the predictions on test set
test_predictions = model(X_test.to(device)).detach().cpu().numpy().flatten()

dummies = np.zeros((X_test.shape[0], TIMESTEP+1))
dummies[:, 0] = test_predictions
dummies = scaler.inverse_transform(dummies)

test_predictions = dc(dummies[:, 0])
test_predictions

In [None]:
# Getting the unscaled values of the outputs of the test set
dummies = np.zeros((X_test.shape[0], TIMESTEP+1))
dummies[:, 0] = y_test.flatten()
dummies = scaler.inverse_transform(dummies)

new_y_test = dc(dummies[:, 0])
new_y_test

In [None]:
# Plotting the Results of test set
plt.plot(new_y_test, label='Actual Close')
plt.plot(test_predictions, label='Predicted Close')
plt.xlabel('Day')
plt.ylabel('Close')
plt.legend()
plt.title("AAMRATECH")
plt.show()

# 6. Functionizing the training process

In [None]:
def lstm_time_series(df, company_name=None):
  print(f"FOR {company_name}")
  print("--------------------------")
  print("--------------------------")
  df = df[["Date", "Close"]]
  df["Date"] = pd.to_datetime(df["Date"])

  plt.plot(df["Date"], df["Close"])
  plt.xlabel('Day')
  plt.ylabel('Close')
  plt.title(f"{company_name}")
  plt.show()

  shifted_df = prepare_dataframe_for_lstm(df, 7)
  shifted_df_as_np = shifted_df.to_numpy()

  scaler = MinMaxScaler(feature_range=(-1, 1))
  shifted_df_as_np = scaler.fit_transform(shifted_df_as_np)

  X = shifted_df_as_np[:, 1:]
  y = shifted_df_as_np[:, 0]

  X = dc(np.flip(X, axis=1))

  split_index = int(len(X) * 0.90)
  X_train = X[:split_index]
  X_test = X[split_index:]

  y_train = y[:split_index]
  y_test = y[split_index:]

  X_train = X_train.reshape((-1, 7, 1))
  X_test = X_test.reshape((-1, 7, 1))

  y_train = y_train.reshape((-1, 1))
  y_test = y_test.reshape((-1, 1))

  X_train = torch.tensor(X_train).float()
  X_test = torch.tensor(X_test).float()
  y_train = torch.tensor(y_train).float()
  y_test = torch.tensor(y_test).float()

  train_dataset = TimeSeriesData(X_train, y_train)
  test_dataset = TimeSeriesData(X_test, y_test)

  batch_size = 32

  train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
  test_dataloader = DataLoader(test_dataset, batch_size = batch_size, shuffle=False)

  epochs = 200

  for epoch in range(epochs):
    train_step(model, train_dataloader, loss_fn, optimizer, epoch=epoch)
    test_step(model, test_dataloader, loss_fn, epoch=epoch)

  test_predictions = model(X_test.to(device)).detach().cpu().numpy().flatten()

  dummies = np.zeros((X_test.shape[0], 7+1))
  dummies[:, 0] = test_predictions
  dummies = scaler.inverse_transform(dummies)

  test_predictions = dc(dummies[:, 0])
  test_predictions

  dummies = np.zeros((X_test.shape[0], 7+1))
  dummies[:, 0] = y_test.flatten()
  dummies = scaler.inverse_transform(dummies)

  new_y_test = dc(dummies[:, 0])
  new_y_test

  plt.plot(new_y_test, label='Actual Close')
  plt.plot(test_predictions, label='Predicted Close')
  plt.xlabel('Day')
  plt.ylabel('Close')
  if company_name:
    plt.title(f"{company_name}")
  plt.legend()
  plt.show()

# 7. ADVENT

In [None]:
df_advent = pd.read_csv("/content/data/stock_prices_data/timeseries_research_data/ADVENT_data.csv")
lstm_time_series(df_advent, "ADVENT")

# 8. AGRODENIM

In [None]:
df_agrodenim = pd.read_csv("/content/data/stock_prices_data/timeseries_research_data/ARGONDENIM_data.csv")
lstm_time_series(df_agrodenim, "AGRODENIM")

# 9. BDTHAIFOOD

In [None]:
df_bdthaifood = pd.read_csv("/content/data/stock_prices_data/timeseries_research_data/BDTHAIFOOD_data.csv")
lstm_time_series(df_bdthaifood, "BDTHAIFOOD")

# 10. ECABLES

In [None]:
df_ecables = pd.read_csv("/content/data/stock_prices_data/timeseries_research_data/ECABLES_data.csv")
lstm_time_series(df_ecables, "ECABLES")

# 11. KEYACOSMET

In [None]:
df_keyacosmet = pd.read_csv("/content/data/stock_prices_data/timeseries_research_data/KEYACOSMET_data.csv")
lstm_time_series(df_keyacosmet, "KEYACOSMET")

# 12. MEGHNAPET

In [None]:
df_meghnapet = pd.read_csv("/content/data/stock_prices_data/timeseries_research_data/MEGHNAPET_data.csv")
lstm_time_series(df_meghnapet, "MEGHNAPET")

# 13. PRIMETEX

In [None]:
df_primetex = pd.read_csv("/content/data/stock_prices_data/timeseries_research_data/PRIMETEX_data.csv")
lstm_time_series(df_primetex, "PRIMETEX")

# 14. PUBALIBANK

In [None]:
df_pubalibank = pd.read_csv("/content/data/stock_prices_data/timeseries_research_data/PUBALIBANK_data.csv")
lstm_time_series(df_pubalibank, "PUBALIBANK")

# 15. UNIONCAP

In [None]:
df_unioncap = pd.read_csv("/content/data/stock_prices_data/timeseries_research_data/UNIONCAP_data.csv")
lstm_time_series(df_unioncap, "UNIONCAP")