In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
url = 'FB.csv'
meta = pd.read_csv(url)
meta = meta.dropna(how='any',axis=0) #If there is missing data
meta['Date'].apply(pd.to_datetime) #For ease of adjusting dates later on
len(meta.index)

2382

In [3]:
meta = meta[['Close'] + [col for col in meta if col not in ['Adj Close', 'Close']] + ['Adj Close']]

In [4]:
meta = meta.sort_values('Date')
meta.head()

Unnamed: 0,Close,Date,Open,High,Low,Volume,Adj Close
0,38.23,2012-05-18,42.049999,45.0,38.0,573576400,38.23
1,34.029999,2012-05-21,36.529999,36.66,33.0,168192700,34.029999
2,31.0,2012-05-22,32.610001,33.59,30.940001,101786600,31.0
3,32.0,2012-05-23,31.370001,32.5,31.360001,73600000,32.0
4,33.029999,2012-05-24,32.950001,33.209999,31.77,50237200,33.029999


In [5]:
#Method 1

In [6]:
#RNN model
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, num_inputs, hidden_dims, layer_dims, num_outputs):
        super(RNN, self).__init__()
        self.hidden_dims = hidden_dims #Numer of features in the hidden state
        self.layer_dims = layer_dims #Number of recurrent layers
        self.rnn = nn.RNN(num_inputs, hidden_dim, layer_dim, batch_first = True)
        self.fc = nn.Linear(hidden_dim, num_outputs)
        
    def forward(self, X):
        h_0 = torch.zeros(self.layer_dim, X.size(0), self.hidden_dim).requires_grad_()
        out, h_0 = self.rnn(X, h_0.detach())
        out = out[:,-1,:]
        out.self.fc(out)
        return out

In [7]:
#RNN model
num_input, hidden_dim, layer_dim, num_output = 14, 15, 2, 1
rnn_model = RNN(num_input, hidden_dim, layer_dim, num_output)

In [8]:
#Sliding window data
def sliding_window(data, timeframe):
    numpydata = data.to_numpy() 
    data = []

    # create all possible sequences of length seq_len
    for index in range(len(numpydata) - timeframe): 
        data.append(numpydata[index: index + timeframe])

    return np.array(data)

sw_meta = sliding_window(meta.iloc[:,0:1], 15)

In [9]:
meta_data = pd.DataFrame(sw_meta.T.reshape(-1, 15))
meta_data.columns = ['Day 1', 'Day 2', 'Day 3', 'Day 4', 'Day 5', 'Day 6', 'Day 7', 'Day 8', 'Day 9', 'Day 10', 'Day 11', 'Day 12', 'Day 13', 'Day 14', 'Target']
meta_data = meta_data[['Target'] + [col for col in meta_data.columns if col != 'Target' ]]

In [10]:
from sklearn.model_selection import train_test_split

def feature_label_split(dataframe, target):
    y = dataframe.iloc[:,0:1]
    X = dataframe.iloc[:,1:]
    return X, y

def train_val_test_split(dataframe, target, test_ratio):
    val_ratio = test_ratio / (1 - test_ratio)
    X, y = feature_label_split(dataframe, target)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_ratio, shuffle = False)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = val_ratio, shuffle = False)
    return X_train, X_val, X_test, y_train, y_val, y_test

In [11]:
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(meta_data, 'Target', 0.2)

In [12]:
#Scaling datasets
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_array = scaler.fit_transform(X_train)
X_val_array = scaler.transform(X_val)
X_test_array = scaler.transform(X_test)

y_train_array = scaler.fit_transform(y_train)
y_val_array = scaler.transform(y_val)
y_test_array = scaler.transform(y_test)

In [13]:
import torch
from torch.utils.data import TensorDataset

train_feat = torch.Tensor(X_train_array)
train_targ = torch.Tensor(y_train_array)
val_feat = torch.Tensor(X_val_array)
val_targ = torch.Tensor(y_val_array)
test_feat = torch.Tensor(X_test_array)
test_targ = torch.Tensor(y_test_array)

training_set = TensorDataset(train_feat, train_targ)
valid_set = TensorDataset(val_feat, val_targ)
test_set = TensorDataset(test_feat, test_targ)

In [14]:
from torch.utils.data import DataLoader

batch_size = 50
train_dl = DataLoader(training_set, batch_size =  batch_size, drop_last = True)
val_dl = DataLoader(valid_set, batch_size =  batch_size, drop_last = True)
test_dl = DataLoader(test_set, batch_size =  batch_size, drop_last = True)

In [15]:
#Optimiser
class Optimization():
    def __init__(self, model, criterion, updater):
        self.model = model
        self.criterion = criterion
        self.updater = updater
        self.train_losses = []
        self.valid_losses = []
        
        def training_step(self, inputs, labels):
            self.model.train()
            y_hat = self.model(inputs)
            loss = self.criterion(labels, y_hat)
            loss.backward()
            self.updater.step()
            self.updater.zero_grad()
            return loss.item()

In [16]:
#Training the model
def RNN_model_trainer(self, train_dataloader, val_dataloader, batch_size, num_epochs, num_features):
    for epoch in range(1, num_epochs + 1):
        batch_loss = []
        for train_inputs, train_labels in train_loader:
            train_inputs = train_inputs.view([batch_size, -1, num_features]).to(device)
            train_labels = train_labels.to(device)
            loss = self.training_step(train_inputs, train_labels)
            batch_loss.append(loss)
            training_loss = np.mean(batch_loss)
            self.train_losses.append(training_loss)
            
            with torch.no_grad():
                batch_val_loss = []
                for val_inputs, val_labels in val_loader:
                    val_inputs = val_inputs.view([batch_size, -1, num_features]).to(device)
                    val_labels = val_labels.to(device)
                    self.model.eval()
                    y_hat = self.model(val_inputs)
                    val_loss = criterion(val_labels, y_hat).item()
                    batch_val_loss.append(val_loss)
                validation_loss = np.mean(batch_val_loss)
                self.valid_losses.append(validation_loss)

In [17]:
lr, num_epochs = 0.2, 60
criterion_3 = nn.MSELoss()
updater_3 = torch.optim.SGD(rnn_model.parameters(), lr = lr)

In [18]:
from datetime import datetime

opt = Optimization(model = rnn_model, criterion = criterion_3, updater = updater_3)
opt.RNN_model_trainer(train_dataloader = train_dl, val_dataloader = val_dl, batch_size = 50, num_epochs = num_epochs, num_features = num_input)

AttributeError: 'Optimization' object has no attribute 'RNN_model_trainer'

In [None]:
#Method 2

In [None]:
#RNN model
class RNN(nn.Module):
    def __init__(self, num_inputs, hidden_dim, num_hiddens, num_outputs):
        super().__init__()
        self.hidden_dim = nn.Linear(num_inputs, hidden_dim)
        self.rnn = nn.RNN(hidden_dim, num_hiddens)
        self.fc = nn.Linear(num_hiddens, num_outputs)
        
    def forward(self, X):
        hidden_state = self.rnn(X)
        output, hidden = self.rnn(hidden_state)
        assert torch.equal(output[-1,:,:], hidden.squeeze(0))
        return self.fc(hidden.squeeze(0))

In [None]:
#RNN model
class RNN(nn.Module):
    def __init__(self, num_inputs, hidden_dims, layer_dims, num_outputs):
        super(RNN, self).__init__()
        self.hidden_dims = hidden_dims #Numer of features in the hidden state
        self.layer_dims = layer_dims #Number of recurrent layers
        self.rnn = nn.RNN(num_inputs, hidden_dim, layer_dim, batch_first = True)
        self.fc = nn.Linear(hidden_dim, num_outputs)
        
    def forward(self, X):
        h_0 = torch.zeros(self.layer_dims, X.size(0), self.hidden_dims).requires_grad_() 
        #Initial state of hidden state
        output, h_0 = self.rnn(X, h_0.detach())
        assert torch.equal(output[-1,:,:], h_0.squeeze(0))
        return self.fc(h_0.squeeze(0))

In [None]:
#RNN model
num_input, hidden_dim, layer_dim, num_output = 14, 15, 2, 1
rnn_model = RNN(num_input, hidden_dim, layer_dim, num_output)

In [None]:
lr, num_epochs = 0.2, 60
criterion_3 = nn.MSELoss()
updater_3 = torch.optim.SGD(rnn_model.parameters(), lr = lr)

In [None]:
#Model training and evaluation
def log_rmse(model, inputs, labels, criterion):
    clipped_preds = torch.clamp(model(inputs), 1, float('inf'))
    rmse = torch.sqrt(criterion(torch.log(clipped_preds), torch.log(labels)))
    return rmse.item()

def RNN_model_trainer(model, train_dataloader, criterion, updater):
    train_ls = []
    model.train()
    for epoch in range(num_epochs):
        for i, train_data in enumerate(train_dataloader):
            train_inputs, train_labels = train_data
            updater.zero_grad()
            y_hat = model(train_inputs).squeeze(1)
            loss = criterion(y_hat, train_labels)
            loss.backward()
            updater.step()
        train_loss.append(log_rmse(model, train_inputs, train_labels, criterion))
    return train_ls

def RNN_model_evaluater(model, test_dataloader, criterion):
    test_ls = []
    for i, test_data in enumerate(test_dataloader):
        test_inputs, test_labels = test_data
    test_loss.append(log_rmse(model, test_inputs, test_labels, criterion))
    return test_loss

In [None]:
RNN_model_trainer(rnn_model, train_dl, criterion_3, updater_3)

In [None]:
#Model 3

In [None]:
#RNN model
class RNN(nn.Module):
    def __init__(self, num_inputs, num_hiddens, num_outputs):
        super(RNN, self).__init__()
        self.num_hiddens = num_hiddens
        self.hidden = nn.Linear(num_inputs + num_hiddens, num_hiddens)
        self.out = nn.Linear(num_inputs + num_hiddens, num_outputs)
        self.softmax = nn.LogSoftmax(dim = 1)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.hidden(combined)
        output = self.out(combined)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.num_hiddens)

In [None]:
num_input, hidden_dim, num_output = 14, 10, 1
rnn_model = RNN(num_input, hidden_dim, num_output)

In [19]:
#Try 3
#RNN model class
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, num_inputs, embed_dim, num_hiddens, num_outputs):
        super().__init__()
        self.hidden = nn.Embedding(num_inputs, embed_dim)
        self.rnn = nn.RNN(embed_dim, num_hiddens)
        self.fc = nn.Linear(num_hiddens, num_outputs)
        
    def forward(self, price):
        embedded = self.hidden(price)
        output, hidden = self.rnn(embedded)
        assert torch.equal(output[-1,:,:], hidden.squeeze(0))
        return self.fc(hidden.squeeze(0))

In [20]:
num_input, embed_dim, num_hidden, num_output = 14,5, 10, 1
rnn_model = RNN(num_input, embed_dim, num_hidden, num_output)

In [21]:
import torch.optim as optim

lr, num_epochs = 0.01, 100
updater_3 = optim.SGD(rnn_model.parameters(), lr = lr)
criterion_3 = nn.MSELoss()

In [22]:
def log_rmse(model, inputs, labels, criterion):
    clipped_preds = torch.clamp(model(inputs), 1, float('inf'))
    rmse = torch.sqrt(criterion(torch.log(clipped_preds), torch.log(labels)))
    return rmse.item()

In [25]:
#Model trainer
def RNN_model_trainer(model, train_dataloader, updater, criterion):
    batch_loss = []
    for epoch in range(num_epochs):
        for train_inputs, train_labels in train_dataloader:
            updater.zero_grad()
            y_hat = model(train_inputs).squeeze(1)
            loss = criterion(y_hat, train_labels)
            loss.backward()
            updater.step()
        train_ls.append(log_rmse(model, train_inputs, train_labels, crtierion))
    return train_ls

In [26]:
titit = RNN_model_trainer(rnn_model, train_dl, updater_3, criterion_3)

RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.FloatTensor instead (while checking arguments for embedding)

In [28]:
#Try 4
#RNN model 
class RNN(nn.Module):
    def __init__(self, num_inputs, num_hiddens, num_outputs):
        super(RNN, self).__init__()
        self.num_hiddens = num_hiddens
        self.hidden = nn.Linear(num_inputs + num_hiddens, num_hiddens) 
        #Hidden layer includes information from both inputs and hidden layer itself
        self.out = nn.Linear(num_inputs + num_hiddens, num_outputs)
    
    #Forward step with the hidden state
    def forward(self, X, hidden_state):
        combined = torch.cat((X, hidden_state), 1) 
        hidden = torch.sigmoid(self.hidden(combined))
        output = self.out(combined)
        return output, hidden
    
    #Initialising hidden state
    def init_hidden(self):
        return torch.nn.init.xavier_normal_(self.hidden.weight, gain = 1.0)

In [29]:
num_input, num_hidden, num_output = 14, 10, 1
rnn_model = RNN(num_input, num_hidden, num_output)

In [30]:
lr, num_epochs = 0.2, 60
criterion_3 = nn.MSELoss()
updater_3 = torch.optim.SGD(rnn_model.parameters(), lr = lr, momentum = 0.9)

In [31]:
#Training the model
def RNN_model_trainer(model, dataloader, criterion, updater):
    train_loss = []
    for epoch in range(num_epochs):
        for i, data in enumerate(dataloader):
            inputs, labels = data
            hidden_state = model.init_hidden() 
            updater.zero_grad()
            for i in inputs:
                y_hat, hidden_state = model(i, hidden_state)
            loss = criterion(y_hat, labels)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1)
            updater.step()
        train_loss.append(log_rmse(model, inputs, labels, criterion))
    return train_loss

In [33]:
training_set, test_set = train_test_split(meta, test_size = 0.2, shuffle = False)

train_array = training_set.iloc[:,0:1]
test_array = test_set.iloc[:,0:1]

In [34]:
#Sliding window data
def sliding_window(values, window_size):
    dataset = []
    if len(values) < window_size:
        return values.values
    for i in range(len(values)):
        dataset.append(values[i:i+window_size])
    return dataset

In [35]:
def sliding_window(data, timeframe):
    numpydata = data.to_numpy() 
    data = []

    # create all possible sequences of length seq_len
    for index in range(len(numpydata) - timeframe): 
        data.append(numpydata[index: index + timeframe])

    return np.array(data)

In [36]:
rnn_training = sliding_window(train_array, 16)
rnn_testing = sliding_window(test_array, 16)

In [37]:
from torch.utils.data import Dataset

class trainingdataset(Dataset):
    def __init__(self): #Loading in the data
        xy = rnn_training
        self.x = torch.from_numpy(xy[:,0:-2].astype(np.float32)) #Slice the first 14 values for the inputs
        self.y =  torch.from_numpy(xy[:,[-1]].astype(np.float32)) #15th value for the labels
        self.samples = xy.shape[0]
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
        
    def __len__(self):
        return self.samples

In [39]:
class testingdataset(Dataset):
    def __init__(self): #Loading in the data
        xy = rnn_testing
        self.x = torch.from_numpy(xy[:,0:-2].astype(np.float32))
        self.y =  torch.from_numpy(xy[:,[-1]].astype(np.float32))
        self.samples = xy.shape[0]
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
        
    def __len__(self):
        return self.samples

In [40]:
training = trainingdataset()
batch_size = 50
train_dl = DataLoader(dataset = training, batch_size = batch_size, shuffle = True)

testing = testingdataset()
test_dl = DataLoader(dataset = testing, batch_size = batch_size, shuffle = True)

In [41]:
RNN_model_trainer(rnn_model, train_dl, criterion_3, updater_3)

RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 14 but got size 10 for tensor number 1 in the list.

In [42]:
#Try 5
#RNN model
class Model(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(Model, self).__init__()
        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        #Defining the RNN layers
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)   
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x):
        batch_size = x.size(0)
        # Initializing hidden state for first input using method defined below
        hidden = self.init_hidden(batch_size)
        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(x, hidden)
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        return out, hidden
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        # We'll send the tensor holding the hidden state to the device we specified earlier as well
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        return hidden

In [43]:
num_input, num_hidden, num_output, no_layer = 14, 10, 1, 1
rnn_model = Model(num_input, num_hidden, num_output, no_layer)

In [44]:
criterion_3 = nn.MSELoss()
updater_3 = torch.optim.SGD(rnn_model.parameters(), lr = lr)

In [49]:
meta_data

Unnamed: 0,Target,Day 1,Day 2,Day 3,Day 4,Day 5,Day 6,Day 7,Day 8,Day 9,Day 10,Day 11,Day 12,Day 13,Day 14
0,27.100000,38.230000,34.029999,31.000000,32.000000,33.029999,31.910000,28.840000,28.190001,29.600000,27.719999,26.900000,25.870001,26.809999,26.309999
1,31.100000,27.010000,27.400000,27.270000,28.290001,30.010000,31.410000,31.910000,31.600000,31.840000,33.049999,32.060001,33.099998,32.230000,31.360001
2,28.750000,30.770000,31.200001,31.469999,31.730000,32.169998,31.469999,30.969999,30.809999,30.719999,28.250000,28.090000,29.110001,29.000000,28.760000
3,21.600000,28.450001,29.340000,26.850000,23.709999,23.150000,21.709999,20.879999,20.040001,21.090000,21.920000,20.719999,20.719999,21.010000,21.809999
4,17.730000,20.379999,21.200001,19.870001,19.049999,20.010000,19.160000,19.440001,19.440001,19.410000,19.150000,19.340000,19.100000,19.090000,18.059999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2362,361.609985,341.660004,346.230011,351.190002,369.790009,372.459991,367.809998,373.279999,358.320007,356.299988,351.950012,351.239990,358.920013,362.970001,363.510010
2363,380.660004,361.130005,359.959991,362.649994,363.179993,366.559998,358.450012,355.450012,355.119995,359.369995,363.350006,365.510010,368.390015,364.380005,372.630005
2364,357.480011,379.380005,382.049988,375.279999,376.260010,382.179993,377.570007,378.000000,378.690002,376.510010,376.529999,373.920013,373.059998,364.720001,355.700012
2365,323.769989,343.209991,345.959991,352.959991,353.579987,340.649994,339.609985,339.390015,343.010010,326.230011,332.959991,333.640015,329.220001,330.049988,325.450012


In [55]:
rnn_train = meta_data.iloc[:,0:1].values
rnn_target = meta_data.iloc[:,1:].values

In [56]:
input_seq = torch.from_numpy(rnn_train)
target_seq = torch.from_numpy(rnn_target)

In [58]:
for epoch in range(1, num_epochs + 1):
    updater_3.zero_grad() # Clears existing gradients from previous epoch
    input_seq.to(device)
    output, hidden = model(input_seq)
    loss = criterion_3(output, target_seq.view(-1).long())
    loss.backward() # Does backpropagation and calculates gradients
    updater_3.step() # Updates the weights accordingly
    
    if epoch%10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))

NameError: name 'device' is not defined