# CAP 6615 Neural Networks Programming Assignment 2 – Multi-Layer Neural Network

### Keyuan Lu, Wenxuan Bao, Yiming Xu, Yufan Chen, Yue Bai

In [45]:
import numpy as np
from datetime import datetime
from numpy import genfromtxt
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable

## Step 1 - Design and build a dataset for RNN model

In [46]:
price=pd.read_csv('HistoricalPrices.csv')
pe=pd.read_csv('ie_data_PE.csv')

In [47]:
pe=pe.rename({'Date':'Date_PE'},axis=1)
pe

Unnamed: 0,Date_PE,PE_CAPE,TR CAPE,Yield
0,1978.01,9.24,11.12,9.11%
1,1978.02,9.05,10.89,9.31%
2,1978.03,8.95,10.79,9.47%
3,1978.04,9.26,11.18,9.03%
4,1978.05,9.63,11.64,8.49%
...,...,...,...,...
511,2020.08,31.15,34.16,4.32%
512,2020.09,30.83,33.81,4.33%
513,2020.10,31.28,34.30,4.17%
514,2020.11,32.47,35.61,3.96%


In [48]:
price['Date_PE']=0.0
for i in range(price.shape[0]):
    month=int(price['Date'][i][0:2])
    year=int(price['Date'][i][-2:])
    if year>50:
        year=year+1900
    else:
        year=year+2000
    price['Date_PE'][i]=year+month*0.01

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  price['Date_PE'][i]=year+month*0.01


In [49]:
data=pd.merge(price,pe,how='left',on='Date_PE')
data.to_csv('Price_PE.csv')

In [50]:
#'Date', ' Open', ' High', ' Low', ' Close'
raw_data_price = genfromtxt('Price_PE.csv', delimiter=',',dtype=str)
raw_data_price = raw_data_price[1:]

In [51]:

raw_data_price = np.flip(raw_data_price,0)
count = 0
for x in raw_data_price:
    x[0] = str(count)
    count += 1
raw_data_price
#Index,Date, Open, High, Low, Close,Date_PE,PE_CAPE,TR CAPE,Yield

array([['0', '01/03/78', '93.82', ..., '9.24', '11.12', '9.11%'],
       ['1', '01/04/78', '93.52', ..., '9.24', '11.12', '9.11%'],
       ['2', '01/05/78', '92.74', ..., '9.24', '11.12', '9.11%'],
       ...,
       ['10841', '12/29/20', '3750.01', ..., '33.77', '37.03', '3.77%'],
       ['10842', '12/30/20', '3736.19', ..., '33.77', '37.03', '3.77%'],
       ['10843', '12/31/20', '3733.27', ..., '33.77', '37.03', '3.77%']],
      dtype='<U8')

In [52]:
#We are only using ClOSE price and Cyclicily adjusted PE-ratio as data frame
training_set = raw_data_price[:,[5,7]]
training_set

array([['93.82', '9.24'],
       ['93.52', '9.24'],
       ['92.74', '9.24'],
       ...,
       ['3727.04', '33.77'],
       ['3732.04', '33.77'],
       ['3756.07', '33.77']], dtype='<U8')

In [53]:
# Feature Scaling
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)

In [54]:
#Data format setting
#Input size is 5 because we are using a weeks data as data batch.
INPUT_SIZE =5
HIDDEN_SIZE = 32
NUM_LAYERS = 2
OUTPUT_SIZE = 1

num_epochs = 50
learning_rate = 0.001

In [55]:
# Creating a data structure with 180 timesteps and 1 output
# we hold 176 data as training data, and 4 data points use for testing
X_train = []
y_train = []
curWindowBeginning = 0
curWindowEnd = 180
curTrainSetEnd = curWindowBeginning+176
for i in range(INPUT_SIZE, curTrainSetEnd):
    X_train.append(training_set_scaled[i-INPUT_SIZE:i, 0])
    y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)

In [56]:
# Reshaping
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))

## Step 2 - Design and develop RNN in Python, using libraries such as PyTorch (and, if necessary, Tensor Flow). 

In [57]:
class RNN(nn.Module):
    def __init__(self, i_size, h_size, n_layers, o_size):
        super(RNN, self).__init__()

        self.rnn = nn.RNN(
            input_size=i_size,
            hidden_size=h_size,
            num_layers=n_layers
        )
        self.out = nn.Linear(h_size, o_size)

    def forward(self, x):
        r_out, _ = self.rnn(x)
        outs = self.out(r_out)

        return outs

In [58]:
rnn = RNN(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE)
#set the model to the device that we defined earlier (default is CPU)
optimiser = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

hidden_state = None

## Step 3 - Train your RNN using time window 180

In [59]:
for epoch in range(num_epochs):
    inputs = Variable(torch.from_numpy(X_train).float())
    labels = Variable(torch.from_numpy(y_train).float())

    output = rnn(inputs) 
    
    loss = criterion(output.view(-1), labels)
    print("Epoch ", epoch, "MSE: ", loss.item())
    optimiser.zero_grad()

    # Backward pass
    loss.backward(retain_graph=True)

    # Update parameters
    optimiser.step()                                


Epoch  0 MSE:  0.04459018260240555
Epoch  1 MSE:  0.02656456269323826
Epoch  2 MSE:  0.013705183751881123
Epoch  3 MSE:  0.005406174808740616
Epoch  4 MSE:  0.0011081787524744868
Epoch  5 MSE:  0.00015474851534236223
Epoch  6 MSE:  0.0015519496519118547
Epoch  7 MSE:  0.0038238847628235817
Epoch  8 MSE:  0.005562224425375462
Epoch  9 MSE:  0.006138057913631201
Epoch  10 MSE:  0.005636738613247871
Epoch  11 MSE:  0.004464942030608654
Epoch  12 MSE:  0.003064868040382862
Epoch  13 MSE:  0.0017819084459915757
Epoch  14 MSE:  0.0008250933024100959
Epoch  15 MSE:  0.00027435601805336773
Epoch  16 MSE:  0.00010848174861166626
Epoch  17 MSE:  0.00023910505115054548
Epoch  18 MSE:  0.0005437942454591393
Epoch  19 MSE:  0.0008966567111201584
Epoch  20 MSE:  0.0011946591548621655
Epoch  21 MSE:  0.0013732727384194732
Epoch  22 MSE:  0.0014088456518948078
Epoch  23 MSE:  0.0013118047500029206
Epoch  24 MSE:  0.0011156527325510979
Epoch  25 MSE:  0.0008652919786982238
Epoch  26 MSE:  0.00060674181

In [None]:
#reference:https://github.com/thundercomb/pytorch-stock-predictor-rnn/blob/master/pytorch-stock-predictor-lstm.ipynb