In [1]:
import numpy as np
import torch
import torch.nn as nn
import csv
import os
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import time

In [2]:
from setup_dataset import *
from setup_model import *
from setup_model_types import *

In [3]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [4]:
data = chunk_dataset(match_list=[1],
                     start_frame = 1,
                     end_frame = 1030
                    )

Successfully loaded NPZ.
Do you want to use GPU (y/n)n
Selected CPU


  X_train = np.array(X_train)
  Y_train = np.array(Y_train)


In [5]:
"""data.padSequence()
original_shape = data.data.shape
data.data = data.data.reshape(-1, 1, 100, 100)"""

'data.padSequence()\noriginal_shape = data.data.shape\ndata.data = data.data.reshape(-1, 1, 100, 100)'

In [6]:
"""data.data.shape, data.targets.shape, data.seq_len.shape"""

'data.data.shape, data.targets.shape, data.seq_len.shape'

In [7]:
data.targets = pad_sequence(data.targets, batch_first=True).float()

In [8]:
device = data.device
input_size = 100
output_size = len(data.available_targets)
match_list = data.match_list
start_frame = data.start_frame
end_frame = data.end_frame

In [9]:
class CNNLSTMModel1(nn.Module):
    def __init__(self, device, original_shape, output_size, hidden_dim, n_layers):
        super(CNNLSTMModel1, self).__init__()
        
        self.device = device
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(5,5))
        self.activation = nn.ReLU()
        self.bnorm = nn.BatchNorm2d(num_features=16)
        self.pool = nn.MaxPool2d(kernel_size=(2,2))

        # output = (input - filter + 1) / stride
        # convolução 1: (100 - 5 + 1) / 1 = 96x96
        # pooling 1: 48x48
        self.input_lstm = 48*48*16
        self.init_hidden(self.input_lstm)
        self.lstm = nn.LSTM(self.input_lstm, hidden_dim, n_layers, batch_first=True)  
        self.fc = nn.Linear(hidden_dim, output_size)
        self.out = nn.Softmax()
    
    def forward(self, x):
        
        hidden = self.init_hidden(self.input_lstm)

        x = self.pool(self.bnorm(self.activation(self.conv1(x))))

        x = x.reshape(original_shape[0], original_shape[1], -1)
        
        x = pack_padded_sequence(x, data.seq_len, batch_first=True, enforce_sorted=False)
        
        pad_embed_pack_lstm = self.lstm(x, hidden)
        pad_embed_pack_lstm_pad = pad_packed_sequence(pad_embed_pack_lstm[0], batch_first=True)
        
        outs, _ = pad_embed_pack_lstm_pad
        
        out = outs.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        out = self.out(out)
        
        return out
        
    def init_hidden(self, input_size):
        # the weights are of the form (nb_layers, batch_size, nb_lstm_units)
        hidden_a = torch.randn(1, input_size, self.hidden_dim)
        hidden_b = torch.randn(1, input_size, self.hidden_dim)

        if self.device.type == 'cuda':
            hidden_a = hidden_a.cuda()
            hidden_b = hidden_b.cuda()

        hidden_a = Variable(hidden_a)
        hidden_b = Variable(hidden_b)

        return (hidden_a, hidden_b)

In [10]:
model_structure = ModelStructure(device, input_size, output_size, match_list, start_frame, end_frame)

Number of hidden neurons: 200
Number of epochs: 5000
Choose type of RNN model:
1 - Simple RNN
2 - LSTM
3 - CNN
type: 3
write a observations without space and punctuations:teste
models/CNN_teste_m1_f1to1030_epoch5000_H200
ATTENTION! folder not created. Training informations will overwrite the existing one


In [11]:
model = CNNLSTMModel(device=model_structure.device,
                      input_shape=input_size,
                      output_size=model_structure.output_size, 
                      hidden_dim=model_structure.hidden_neurons, 
                      n_layers=1)

In [12]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

In [13]:
if device.type == 'cuda':
    mymodel = model.cuda()
    X_train = data.data.cuda() 
    Y_train = data.targets.cuda()
else:
    mymodel = model
    X_train = data.data 
    Y_train = data.targets

In [14]:
device.type

'cpu'

In [15]:
def get_acc(predicted, target):
    
    predicted = torch.argmax(predicted, axis=1)
    target = torch.argmax(target, axis=1)

    acc = np.mean(predicted == target)
    
    return float(acc)

In [16]:
train_loss_arr = np.array([])
train_acc_arr = np.array([])

In [None]:
start_time_processing = time.time()

loss_file = open(model_structure.path + '/' + "loss_file.txt", "w")
first_time = True

best_loss = 1
first_epoch = True

for epoch in range(1, model_structure.n_epochs + 1):

    mymodel.train()

    optimizer.zero_grad()
    output = mymodel(X_train, data.seq_len)
    loss = criterion(output, Y_train.view(-1,data.target_size[2]).float())
    loss.backward()
    optimizer.step()
        
    if epoch%10 == 0:

        train_loss_arr = np.append(train_loss_arr, loss.item())
        train_acc_arr  = np.append(train_acc_arr, model_structure.get_acc(output, Y_train.reshape(-1, len(data.available_targets))))
        
        loss_file.write("Epoch: {}/{}-------------------------------------------\n".format(epoch, model_structure.n_epochs))
        loss_file.write("Train -> Loss: {:.15f} Acc: {:.15f}\n".format(train_loss_arr[-1], train_acc_arr[-1]))
            
        print("Epoch: {}/{}-------------------------------------------".format(epoch, model_structure.n_epochs))
        print("Train -> Loss: {:.15f} Acc: {:.15f}".format(train_loss_arr[-1], train_acc_arr[-1]))
        
        if train_loss_arr[-1] < best_loss:
            state = { 'epoch': epoch + 1, 'state_dict': mymodel.state_dict(),
                      'optimizer': optimizer.state_dict(), 'losslogger': loss.item(), }
            torch.save(state, model_structure.path + '/' + model_structure.name)
            best_loss = loss.item()
        else:
            print("model not saved")
            
loss_file.write("--- %s seconds ---" % (time.time() - start_time_processing))
loss_file.close()
np.savez(model_structure.path + '/' + "train_loss_arr", train_loss_arr)
print("--- %s seconds ---" % (time.time() - start_time_processing))

  out = self.out(out)


Epoch: 10/5000-------------------------------------------
Train -> Loss: 0.024288214743137 Acc: 0.898924708366394
