In [1]:
import numpy as np
import os

import torch 
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms


from torch.autograd import Variable

In [2]:
# Hyper Parameters
# sequence_length = 28  # 28
# input_size = 28  # 28
hidden_size = 128
num_layers = 2
# num_classes = 10
batch_size = 2
num_epochs = 2
learning_rate = 0.01

In [3]:
# Data Loader (Input Pipeline)
def get_train_loader(path='data/features'):
    """
    Function reads .npy files from path.
    Returns:
        dataloader, data classes (list), size of input object [n_sequence, n_features], lenght_of_dataset
    """
    inputs = []
    targets = []
    data_classes = os.listdir(path)
    label_int = 0
    for folder in data_classes:
        current_dir = path + '/' + folder + '/'
        
        temp = [torch.Tensor(np.load(current_dir +  f)) for f in os.listdir(current_dir)]  # Transform to torch tensors
        
        targets += ([torch.LongTensor([label_int])] * len(temp))
        inputs += temp
        
        label_int += 1
#     print (len(inputs))
#     print (len(targets))
        
    tensor_x = torch.stack(inputs)
    tensor_y = torch.stack(targets)
    print (tensor_x.size())
    my_dataset = torch.utils.data.TensorDataset(tensor_x, tensor_y)  # Create your datset
    my_dataloader = torch.utils.data.DataLoader(my_dataset, batch_size=batch_size)  # Create your dataloader
    
    
    return my_dataloader, data_classes, inputs[-1].size(), len(inputs)

train_loader, data_classes, [sequence_length, input_size], lenght_of_dataset = get_train_loader()

num_classes = len(data_classes)
print(train_loader, data_classes, sequence_length, input_size)

torch.Size([9, 50, 100])
<torch.utils.data.dataloader.DataLoader object at 0x000002915CCF33C8> ['ApplyEyeMakeup', 'ApplyLipstick', 'Archery'] 50 100


In [4]:
print ("GPU is available: ", torch.cuda.is_available())

GPU is available:  True


In [5]:
# RNN Model (Many-to-One)
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        # Set initial states 
        h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 
        c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        
        # Forward propagate RNN
        out, _ = self.lstm(x, (h0, c0))  
        
        # Decode hidden state of last time step
        out = self.fc(out[:, -1, :])  
        return out

rnn = RNN(input_size, hidden_size, num_layers, num_classes)


# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

In [6]:
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1, sequence_length, input_size))
#         print(labels)
        labels = Variable(labels.view(-1))
        
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = rnn(images)
#         print (images)
#         print (outputs)
#         print (labels)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i+1) % 1 == 0:
            print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' 
                   %(epoch+1, num_epochs, i+1, lenght_of_dataset//batch_size, loss.data[0]))


Epoch [1/2], Step [1/4], Loss: 1.1378
Epoch [1/2], Step [2/4], Loss: 1.2753
Epoch [1/2], Step [3/4], Loss: 0.8042
Epoch [1/2], Step [4/4], Loss: 2.9435
Epoch [1/2], Step [5/4], Loss: 1.3895
Epoch [2/2], Step [1/4], Loss: 2.1368
Epoch [2/2], Step [2/4], Loss: 1.4604
Epoch [2/2], Step [3/4], Loss: 1.3244
Epoch [2/2], Step [4/4], Loss: 0.9943
Epoch [2/2], Step [5/4], Loss: 1.0438


In [7]:
# # Test set is not implemented yet

# # Test the Model
# correct = 0
# total = 0
# for images, labels in test_loader:
#     images = Variable(images.view(-1, sequence_length, input_size))
#     outputs = rnn(images)
#     _, predicted = torch.max(outputs.data, 1)
#     total += labels.size(0)
#     correct += (predicted == labels).sum()

# print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total)) 

# # Save the Model
# # torch.save(rnn.state_dict(), 'rnn.pkl')
# # torch.save(rnn, 'rnn.pt')

# Generate dummy data set

In [None]:
import numpy as np

In [None]:
n_feat = 100
n_seq = 50 # fixed for now
n_video = 10

In [None]:
for i in range(3):
    tmp_data = np.random.random((n_seq, n_feat))
    np.save('data/features/Archery/v' + str(i) + '.npy', tmp_data)