# CAP 6615 Neural Networks Programming Assignment 2 – Multi-Layer Neural Network

### Keyuan Lu, Wenxuan Bao, Yiming Xu, Yufan Chen, Yue Bai

In [21]:
import numpy as np
from datetime import datetime
from numpy import genfromtxt
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable

## Step 1 - Design and build a dataset for RNN model

In [22]:
price=pd.read_csv('HistoricalPrices.csv')
pe=pd.read_csv('ie_data_PE.csv')

In [23]:
pe=pe.rename({'Date':'Date_PE'},axis=1)
pe

Unnamed: 0,Date_PE,PE_CAPE,TR CAPE,Yield
0,1978.01,9.24,11.12,9.11%
1,1978.02,9.05,10.89,9.31%
2,1978.03,8.95,10.79,9.47%
3,1978.04,9.26,11.18,9.03%
4,1978.05,9.63,11.64,8.49%
...,...,...,...,...
511,2020.08,31.15,34.16,4.32%
512,2020.09,30.83,33.81,4.33%
513,2020.10,31.28,34.30,4.17%
514,2020.11,32.47,35.61,3.96%


In [24]:
price['Date_PE']=0.0
for i in range(price.shape[0]):
    month=int(price['Date'][i][0:2])
    year=int(price['Date'][i][-2:])
    if year>50:
        year=year+1900
    else:
        year=year+2000
    price['Date_PE'][i]=year+month*0.01

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  price['Date_PE'][i]=year+month*0.01


In [25]:
data=pd.merge(price,pe,how='left',on='Date_PE')
data.to_csv('Price_PE.csv')

In [26]:
#'Date', ' Open', ' High', ' Low', ' Close'
raw_data_price = genfromtxt('Price_PE.csv', delimiter=',',dtype=str)
raw_data_price = raw_data_price[1:]

In [27]:

raw_data_price = np.flip(raw_data_price,0)
count = 0
for x in raw_data_price:
    x[0] = str(count)
    count += 1
raw_data_price
#Index,Date, Open, High, Low, Close,Date_PE,PE_CAPE,TR CAPE,Yield

array([['0', '01/03/78', '93.82', ..., '9.24', '11.12', '9.11%'],
       ['1', '01/04/78', '93.52', ..., '9.24', '11.12', '9.11%'],
       ['2', '01/05/78', '92.74', ..., '9.24', '11.12', '9.11%'],
       ...,
       ['10841', '12/29/20', '3750.01', ..., '33.77', '37.03', '3.77%'],
       ['10842', '12/30/20', '3736.19', ..., '33.77', '37.03', '3.77%'],
       ['10843', '12/31/20', '3733.27', ..., '33.77', '37.03', '3.77%']],
      dtype='<U8')

In [28]:
#We are only using ClOSE price and Cyclicily adjusted PE-ratio as data frame
training_set = raw_data_price[:,[5,7]]
training_set

array([['93.82', '9.24'],
       ['93.52', '9.24'],
       ['92.74', '9.24'],
       ...,
       ['3727.04', '33.77'],
       ['3732.04', '33.77'],
       ['3756.07', '33.77']], dtype='<U8')

In [29]:
# Feature Scaling
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)

In [30]:
#Data format setting
INPUT_SIZE = 180
HIDDEN_SIZE = 32
NUM_LAYERS = 2
OUTPUT_SIZE = 1

num_epochs = 50
learning_rate = 0.001

In [31]:
# Creating a data structure with 180 timesteps and 1 output
X_train = []
y_train = []
for i in range(INPUT_SIZE, 10842):
    X_train.append(training_set_scaled[i-INPUT_SIZE:i, 0])
    y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)

In [32]:
# Reshaping
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))

## Step 2 - Design and develop RNN in Python, using libraries such as PyTorch (and, if necessary, Tensor Flow). 

In [33]:
class RNN(nn.Module):
    def __init__(self, i_size, h_size, n_layers, o_size):
        super(RNN, self).__init__()

        self.rnn = nn.RNN(
            input_size=i_size,
            hidden_size=h_size,
            num_layers=n_layers
        )
        self.out = nn.Linear(h_size, o_size)

    def forward(self, x):
        r_out, _ = self.rnn(x)
        outs = self.out(r_out)

        return outs

In [34]:
rnn = RNN(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE)
#set the model to the device that we defined earlier (default is CPU)
optimiser = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

hidden_state = None

## Step 3 - Train your RNN using time window 180

In [35]:
for epoch in range(num_epochs):
    inputs = Variable(torch.from_numpy(X_train).float())
    labels = Variable(torch.from_numpy(y_train).float())

    output = rnn(inputs) 
    
    loss = criterion(output.view(-1), labels)
    print("Epoch ", epoch, "MSE: ", loss.item())
    optimiser.zero_grad()

    # Backward pass
    loss.backward(retain_graph=True)

    # Update parameters
    optimiser.step()                                


Epoch  0 MSE:  0.09102657437324524
Epoch  1 MSE:  0.03152346611022949
Epoch  2 MSE:  0.011735993437469006
Epoch  3 MSE:  0.01589621976017952
Epoch  4 MSE:  0.022482873871922493
Epoch  5 MSE:  0.02126406319439411
Epoch  6 MSE:  0.014350549317896366
Epoch  7 MSE:  0.007785355672240257
Epoch  8 MSE:  0.006139332894235849
Epoch  9 MSE:  0.008910633623600006
Epoch  10 MSE:  0.011345750652253628
Epoch  11 MSE:  0.01049133762717247
Epoch  12 MSE:  0.007289554458111525
Epoch  13 MSE:  0.004337802529335022
Epoch  14 MSE:  0.0035077554639428854
Epoch  15 MSE:  0.004686059895902872
Epoch  16 MSE:  0.0062267230823636055
Epoch  17 MSE:  0.006635179743170738
Epoch  18 MSE:  0.005655498243868351
Epoch  19 MSE:  0.00408010883256793
Epoch  20 MSE:  0.002964799292385578
Epoch  21 MSE:  0.0028630017768591642
Epoch  22 MSE:  0.0034821834415197372
Epoch  23 MSE:  0.004017493221908808
Epoch  24 MSE:  0.0038798516616225243
Epoch  25 MSE:  0.0031194130424410105
Epoch  26 MSE:  0.0022519954945892096
Epoch  27 

In [None]:
#reference:https://github.com/thundercomb/pytorch-stock-predictor-rnn/blob/master/pytorch-stock-predictor-lstm.ipynb