<a href="https://colab.research.google.com/github/Brutusa/Stock-Market-Prediction-with-Pytorch/blob/main/stockMarketPredictionPytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as func
import torch.optim as optim
import matplotlib.pyplot as plt
import math

# download and import the 'datasets' folder from:
# https://drive.google.com/drive/folders/1Wz4uHMNfXBhkwIvRsaLpYU3PWe_y4HXj?usp=drive_link

# for use in Colab: Place individual files in the hierarchy './datasets/amd_dataset.csv'
# change lines 18 & 19 to the appropriate file names

# Load dataset
# Skip rows = 1 because those are the column names
X = np.array([])
dataset = np.loadtxt('./datasets/adobe_dataset.csv', delimiter=',', skiprows=1, usecols=(1,2,3,4,5))
target_dataset = np.loadtxt('./datasets/adobe_dataset.csv', delimiter=',', skiprows=1, usecols=(7))
# use cols skips over column 6 as that is the symbol of the stock
X = dataset


# feature list
# access a specific feature with data[i, 4] which would be entry i, feature 4
raw_open_price_data = np.array(dataset[:,0:1])
raw_high_data = np.array(dataset[:,0:2])
raw_low_data = np.array(dataset[:,0:3])
raw_close_price_data = np.array(dataset[:,0:4])
raw_volume_data = np.array(dataset[:,0:5])

# target list
y = np.array(target_dataset[:])

# Function to normalize from 0 to 1
def normalize(v, min, max):
  # Min Max Normalization formula
  new_v = ((v - min)*(1-0))/ (100-20) + 0
  return new_v

# Normalize lists
X = normalize(X, np.min(X), np.max(X))
normalized_volume = X[:,0:5]

# convert lists to pytorch lists
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)

# Define ANN model
class StockPredictionANN(nn.Module):
  # Play with values to see what gives best result
  def __init__(self):
    super(StockPredictionANN, self).__init__()

    self.hidden1 = nn.Linear(5, 7, bias=True)
    self.hidden2 = nn.Linear(7, 7, bias=True)
    self.output = nn.Linear(7, 1, bias=True)


  def forward(self, x):
    x = func.sigmoid(self.hidden1(x))
    x = func.sigmoid(self.hidden2(x))
    x = func.sigmoid(self.output(x))
    return x

# Define Linear Regression model
class StockPredictionLinReg(nn.Module):
    def __init__(self):
      super(StockPredictionLinReg, self).__init__()
      self.linear = nn.Linear(5, 1)

    def forward(self, x):
      x = self.linear(x)
      return x

# Define Logistic Regression model
class StockPredictionLogReg(nn.Module):
    def __init__(self):
        super(StockPredictionLogReg, self).__init__()
        self.linear = nn.Linear(5, 1)

    def forward(self, x):
      x = func.sigmoid(self.linear(x))
      return x

# change model here:
model = StockPredictionANN()
#model = StockPredictionLinReg()
#model = StockPredictionLogReg()

# set learning rate
learning_rate = .001

# loss function
loss_fn = nn.MSELoss()

# Adam is the most popular form of gradient descent
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# for x number of epochs
num_epochs = 50
# make calculations for data in batches of y days at a time
daily = 2
weekly = 7
monthly = 30

batch_size = daily

# list to store predictions
y_predictions = np.array([])


# TRAINING
for i in range(0, 882):#len(X)):
    for epoch in range(num_epochs):
        # Xbatch: process rows i and i+batch_size from the csv
        Xbatch = X[i:i+batch_size]
        # y_pred: make prediction based on current batch
        y_pred = model(Xbatch)
        # ybatch: process rows i and i+batch_size of target outputs from csv
        ybatch = y[i:i+batch_size]
        # compare predicted y with current batch
        loss = loss_fn(y_pred, ybatch)
        # backpropagate
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
#    print(f'epoch: {epoch}, loss: {loss}')

# TESTING
for i in range(0, len(X)): #SET BACK TO 0
    for epoch in range(num_epochs):
        # Xbatch: process rows i and i+batch_size from the csv
        Xbatch = X[i:i+batch_size]
        # y_pred: make prediction based on current batch
        y_pred = model(Xbatch)

        if epoch == num_epochs-1:
          # append to list of predictions
          y_predictions = np.append(y_predictions, y_pred[0,0].item())

        # ybatch: process rows i and i+batch_size of target outputs from csv
        ybatch = y[i:i+batch_size]
        # compare predicted y with current batch
        loss = loss_fn(y_pred, ybatch)
        # backpropagate
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
#    print(f'epoch: {epoch}, loss: {loss}')

# list to store trend data
pred_trend = np.array([])
current_price_prediction = np.array([])
current_price_prediction = np.append(current_price_prediction, raw_open_price_data[0])
opening_value = current_price_prediction[0]

# Function to categorize trend prediction
for i in range(len(y_predictions)):
  # if the prediction for today is greater than yesterday:
  if y_predictions[i] > y_predictions[i-1]:
    # then positive trend
    current_trend = 1
    pred_trend = np.append(pred_trend,1)
  else:
    # else negative trend
    current_trend = 0
    pred_trend = np.append(pred_trend,0)

# Function to calculate price calculation and accuracy
# Make a for loop to take prediction for a given range based on batch size i.e. weekly has batch of 7, so apply this function from the values of i + 0-7 then take average (sum then divide total by size)
for i in range(len(y_predictions)):
  #test = 0
  #test = (sum(raw_open_price_data[i:i+batch_size,0])/batch_size) + sum(raw_close_price_data[i:i+batch_size,0] - raw_open_price_data[i:i+batch_size,0])
  #print("predicted open price for day ", i, " is: ", test)

  # FORMULA 1: (all values * batch_size)/4*batch_size
  #current_price_prediction = np.append(current_price_prediction, (raw_open_price_data[i:i+batch_size,0]+raw_high_data[i:i+batch_size,0]+raw_low_data[i:i+batch_size,0]+raw_close_price_data[i:i+batch_size,0])/(4*batch_size))

  # FORMULA 2: the sum of X adjacent day's worth of open price data divided by the number of days "X" + the sum of the price variance between those adjacent days
  '''
  if pred_trend[i] == 1:
    # increasing price prediction
    current_price_prediction = np.append(current_price_prediction, (sum(raw_open_price_data[i:i+batch_size,0])/batch_size) + sum(raw_close_price_data[(i+1):(i+1)+batch_size,0] - raw_open_price_data[i:i+batch_size,0])/batch_size)
  elif pred_trend[i] == 0:
    # decreasing price prediction
    current_price_prediction = np.append(current_price_prediction, (sum(raw_open_price_data[i:i+batch_size,0])/batch_size) + sum(raw_close_price_data[(i+1):(i+1)+batch_size,0] - raw_open_price_data[i:i+batch_size,0])/batch_size)
'''
  # FORMULA 3:
  # Calculate the average difference between close and open prices for the current batch
  average_difference = np.mean(raw_close_price_data[i+1:i+1+batch_size, 0] - raw_open_price_data[i:i+batch_size, 0])

  # Update the opening_value using the trend data
  if pred_trend[i] == 1:
    opening_value += abs(average_difference)
  elif pred_trend[i] == 0:
    opening_value -= abs(average_difference)

  # Append the updated opening_value to current_price_prediction
  current_price_prediction = np.append(current_price_prediction, opening_value)



def print_results():
  # print results
  for i in range(len(y)):
    print('day %d to %d | predicted trend: %d | target trend: %d | predicted price: $%.2f | actual price: $%.2f' % (i, i+1, pred_trend[i], y[i], current_price_prediction[i], raw_open_price_data[i,0]))
    print('difference from predicted price to actual price: $%.2f ' % (current_price_prediction[i] - raw_open_price_data[i]))

# torch.no_grad() ends pytorch's cpu utilization when calculations are done
with torch.no_grad():
    y_pred = model(X)


# compute accuracy
def get_accuracy():
  num_correct = 0

  for num in range(len(pred_trend)):
    if pred_trend[num] == y[num]:
      num_correct += 1

  accuracy = (num_correct / len(y)) * 100
  print('Accuracy: %.1f%%' % accuracy)
  print('loss: %.4f ' %loss)

print_results()
get_accuracy()

day 0 to 1 | predicted trend: 1 | target trend: 1 | predicted price: $38.31 | actual price: $38.31
difference from predicted price to actual price: $0.00 
day 1 to 2 | predicted trend: 0 | target trend: 0 | predicted price: $38.43 | actual price: $38.99
difference from predicted price to actual price: $-0.56 
day 2 to 3 | predicted trend: 0 | target trend: 1 | predicted price: $38.38 | actual price: $38.55
difference from predicted price to actual price: $-0.16 
day 3 to 4 | predicted trend: 0 | target trend: 0 | predicted price: $38.31 | actual price: $38.90
difference from predicted price to actual price: $-0.59 
day 4 to 5 | predicted trend: 0 | target trend: 0 | predicted price: $38.11 | actual price: $38.70
difference from predicted price to actual price: $-0.59 
day 5 to 6 | predicted trend: 1 | target trend: 1 | predicted price: $38.05 | actual price: $38.51
difference from predicted price to actual price: $-0.46 
day 6 to 7 | predicted trend: 0 | target trend: 1 | predicted pri

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  print('difference from predicted price to actual price: $%.2f ' % (current_price_prediction[i] - raw_open_price_data[i]))
