In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler

#device config
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
df = pd.read_csv("train.csv")
df_train = df.drop("date_time",1)[:5001]
scaler = StandardScaler()
scaler = scaler.fit(df_train)
df_train = scaler.transform(df_train) #df_train is shape 5000,11 and dtype float64

In [3]:
df_train.shape

(5001, 11)

In [4]:
df_train[0:300,:].shape

(300, 11)

In [5]:
#hyperparameters

sequence_length = 30
batch_size = 40
learning_rate = 0.001
input_size = 11
hidden_size = 32
output_size = 3
num_epochs = 10

In [6]:
#RESHAPING the problem to supervised learning
#X shape --> (length-sequence_length),sequence_length,features 4700,300,11
#y shape --> )length-sequence_length),targets 4700,11

class Train_Time_Series_Dataset(Dataset):
    def __init__(self, complete_data,sequence_length):
        X, y = list(), list()
        for i in range(len(complete_data)):
            end_idx = i + sequence_length
            if end_idx > len(complete_data) - 1:
                break
                
            #seq_x shape --> sequence_length, features (for time steps up to t-1)
            #seq_y shape --> targets (for time step t)
            seq_x, seq_y = complete_data[i:end_idx,:], complete_data[end_idx,8:]
            X.append(seq_x)
            y.append(seq_y)
        X = np.asarray(X, dtype=np.float32)
        y = np.asarray(y, dtype=np.float32)
        
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)
        self.sequence_length = sequence_length
        self.n_samples = X.shape[0]
        
    def __getitem__(self,index):
        return self.X[index], self.y[index]
        
    def __len__(self):
        return self.n_samples

class Eval_Dataset(Dataset):
    def __init__(self,sequence):
        self.X = torch.from_numpy(sequence)
        self.n_samples = sequence.shape[0]
    
    def __getitem__(self, index):
        return self.X[index]

    def __len__(self):
        return self.n_samples

dataset = Train_Time_Series_Dataset(df_train, sequence_length)
train_loader = DataLoader(dataset=dataset,batch_size=batch_size,shuffle=True)
eval_dataset = Eval_Dataset(df_train)  

In [7]:
#testing
dataiter = iter(train_loader)
sample_data =dataiter.next()
#s_features shape --> batch_size, sequence_length, features
#s_targets shape --> batch_size, targets
s_features, s_targets = sample_data
s_features = s_features.to(device)
#s_targets = s_targets.to(device)

In [8]:
#DO NOT CONFUSE Hidden state with Weight matrix!

class RNN(nn.Module):
    def __init__(self,n_features,n_layers,hidden_size,output_size):
        super(RNN,self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(n_features, hidden_size,n_layers,batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self,x):
        h0 = torch.zeros(self.n_layers,x.size(0),self.hidden_size).to(device) #h0 shape --> n_layers, batch_size, hidden_size
        out, _ = self.rnn(x, h0) #returns hidden states for all t
        out = out[:,-1,:]
        out = self.fc(out) #out shape --> batch_size, output_size
        return out
    """
    def predict(self,complete_sequence, starting_point, future):
        #x shape --> batch_size, sequence_length,features = 1, 30, 11
        x = complete_sequence[starting_point-sequence_length:starting_point]
        x = torch.reshape(x,(1,-1,11)) #-1 is gonna be the sequence length
        
        
        outputs = []
        for i in range(future):
            h0 = torch.zeros(self.n_layers,1,self.hidden_size).to(device)
            _, h_f = self.rnn(x, h0)
            out = self.fc(h_f)
            outputs.append(out)
            
    """
    
model = RNN(input_size,3,hidden_size,output_size).to(device)
#loss and Optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [9]:
outs = model(s_features)
outs = torch.reshape(outs,(1,80,3))
outs[0][2]

tensor([ 0.1580, -0.1425, -0.3136], device='cuda:0', grad_fn=<SelectBackward>)

In [10]:
x=eval_dataset[470:500]
x=torch.reshape(x,(1,-1,11)).to(device)
x=torch.roll(x,-1,1)
new_t = torch.cat((x[0,29,:8],outs[0,2,:]),-1)
x[:,-1]=new_t

In [11]:
#training loop
n_total_steps = len(train_loader)

running_loss = 0.0
running_correct = 0

#TRAIN
for epoch in range(num_epochs):
    for i, (features,targets) in enumerate(train_loader): #enumerate DataLoader
        #features shape --> batch_size, sequence_length, features
        #targets shape --> batch_size, targets
        features = features.to(device)
        targets = targets.to(device)
        
        #forward
        outputs = model(features)
        loss = criterion(outputs, targets)

        #backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss+= loss.item() #item returns a number

        if (i+1) % 30 == 0:
            print(f'epoch {epoch+1} / {num_epochs}, step {i+1} / {n_total_steps}, loss = {loss.item()}')
            running_loss = 0.0


#torch.save(model.state_dict(),"mymodel.pth")

s_features = s_features.to(device)
output = model(s_features)
print(s_features.shape)

epoch 1 / 10, step 30 / 63, loss = 0.5646951794624329
epoch 1 / 10, step 60 / 63, loss = 0.3374408483505249
epoch 2 / 10, step 30 / 63, loss = 0.47250667214393616
epoch 2 / 10, step 60 / 63, loss = 0.26771101355552673
epoch 3 / 10, step 30 / 63, loss = 0.25746119022369385
epoch 3 / 10, step 60 / 63, loss = 0.24269142746925354
epoch 4 / 10, step 30 / 63, loss = 0.18945883214473724
epoch 4 / 10, step 60 / 63, loss = 0.2444913685321808
epoch 5 / 10, step 30 / 63, loss = 0.2582503855228424
epoch 5 / 10, step 60 / 63, loss = 0.265308141708374
epoch 6 / 10, step 30 / 63, loss = 0.13783381879329681
epoch 6 / 10, step 60 / 63, loss = 0.18870976567268372
epoch 7 / 10, step 30 / 63, loss = 0.1944522261619568
epoch 7 / 10, step 60 / 63, loss = 0.13871189951896667
epoch 8 / 10, step 30 / 63, loss = 0.23154489696025848
epoch 8 / 10, step 60 / 63, loss = 0.20660722255706787
epoch 9 / 10, step 30 / 63, loss = 0.1916838139295578
epoch 9 / 10, step 60 / 63, loss = 0.18601341545581818
epoch 10 / 10, ste

In [12]:
#test

