In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader
import os
import pandas as pd
import math

In [2]:
class Model(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_layers, batch_size, seq_len, batch_first = True):
        super(Model, self).__init__()
        self.LSTM = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first)
        self.inp = torch.randn(batch_size, seq_len, input_dim)
        
        #We'll have to initialize a hidden state and cell state for the LSTM as this is the first cell.
        #The hidden state and cell state is stored in a tuple with the format (hidden_state, cell_state).
        self.hidden_state = torch.randn(n_layers, batch_size, hidden_dim)
        self.cell_state = torch.randn(n_layers, batch_size, hidden_dim)
        self.hidden = (self.hidden_state, self.cell_state)
        
    def many_to_one(self):
        out, hidden = self.LSTM(self.inp, self.hidden)
        return out, hidden
        
    
    def many_to_many(self, out):
        out = out.squeeze()[-1, :]
        return out
        

In [3]:
input_dim = 5 #Input dimension: represents the size of the input at each time step. 
hidden_dim = 10 #Hidden dimension: represents the size of the hidden state and cell state at each time step.
n_layers = 1 #The number of LSTM layers stacked on top of eachother.

In [4]:
def organize_data(df, train = .8):
    mask = int(len(df)*train)
    x_train = df[~mask:]
    x_valid = df[mask:]
    return x_train, x_valid

In [5]:
def normalize_data(data):
    def normalize(x, mean, std):
        return (x - mean)/std
    mean, std = data.mean(), data.std()
    return normalize(data, mean, std)

In [49]:
PATH = os.path.abspath("ucsbdata.csv")
df = pd.read_csv(PATH)
df = df[df["R"].notna()]
del df["Index"]
#df = df.fillna(0)
x_train, x_valid = organize_data(df)
xnorm_train = normalize_data(x_train)
xnorm_valid = normalize_data(x_valid)
print("Valid shape:", x_valid.shape)
print("Train shape:", x_train.shape)
print("Train:", x_train.iloc[0, 0], "xnorm_train: ", xnorm_train.iloc[0, 0])
print("Valid:", x_valid.iloc[0, 0], "xnorm_valid: ", xnorm_valid.iloc[0, 0])

Valid shape: (1966, 67)
Train shape: (7861, 67)
Train: -0.00018249649851901698 xnorm_train:  -0.05635635223120648
Valid: 0.00689957444836972 xnorm_valid:  0.7732659611356497
