In [1]:
from Dataset_And_Transforms import FigrimFillersDataset, Downsampling, ToTensor, ExpandTargets, Targets2D
from torchvision import transforms
import torch
import torch.optim as optim

In [2]:
data_transform = transforms.Compose([ToTensor(),Downsampling(10)])
    
#load split data
figrim_dataset_train = FigrimFillersDataset(json_file='allImages_unfolded_train.json',
                                    root_dir='figrim/fillerData/Fillers',
                                     transform=data_transform)

In [3]:
dataset_loader_train = torch.utils.data.DataLoader(figrim_dataset_train, batch_size=1, 
                                             shuffle=False, num_workers=8)

In [4]:
for i, example in enumerate(dataset_loader_train): #start at index 0
            # get the inputs
            data = example["image"]
            #print("data size: {}".format(data.size()))
            target = example["fixations"]
            if i == 0:
                break

In [5]:
target.size()

torch.Size([1, 7, 2])

In [6]:
def index_2d_to_index_fl(tensor_size, index_2d):
    """
    Takes 2D-Tensor-Size and the index (as a tensor) of one if its entries.
    Returns the index of this entry for the flattened version of the 2D-Tensor.
    """
    
    n_cols = tensor_size[-1]
    idx_row, idx_col = tuple(index_2d)
    #extract values from tensors
    idx_row = idx_row.item()
    idx_col = idx_col.item()
    
    return idx_row * n_cols + idx_col

In [7]:
new_target = torch.zeros(target.size(1), 10002)
tensor = torch.zeros(7, 1, 10002)
print(new_target.size())
print(tensor.size())

torch.Size([7, 10002])
torch.Size([7, 1, 10002])


In [8]:
def mk_inputs(data, target):
    #TIME-STEP-INPUTS: SOS and Fixations

    n_classes = data.size(-2) * data.size(-1) + 2 #plus sos- and eos-token; so here 10002

    #batch-dimension (batch-size is always one), as needed to use nn.NLLLoss()
    inputs = torch.zeros(target.size(1)+1, 1, n_classes) #0 is batch-dimension, 1 is number of fixations, 2 is fixations

    #start-of-sequence-token
    inputs[0,0,0] = 1

    #fixations
    idx_new_targets = []
    for i in range(target.size(1)):
        idx_new_target = index_2d_to_index_fl(data.size(), target[0,i])
        idx_new_targets.append(idx_new_target)
        inputs[i+1, 0, idx_new_target+1] = 1
    return (inputs, idx_new_targets)

In [9]:
def mk_targets(data, target, idx_new_targets):
    #TARGETS: Fixations and EOS
    #AS WE USE nn.NLLLOSS(), only the INDEX of the target at each time step is needed, not a whole one-hot-vector

    n_classes = data.size(-2) * data.size(-1) + 2 #plus sos- and eos-token; so here 10001

    #index of eos-token
    idx_new_targets.append(n_classes - 1)

    #list2tensor
    new_targets = torch.LongTensor(idx_new_targets)
    return new_targets

In [10]:
def flatten_image(data):
    #IMAGE

    #batch-dimension (batch-size is always one), as needed to use nn.NLLLoss()
    return data.view(1,-1)

In [11]:
import torch
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size

        self.i2h = nn.Linear(image_size + input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(image_size + input_size + hidden_size, output_size)
        self.o2o = nn.Linear(hidden_size + output_size, output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, image, input, hidden):
        input_combined = torch.cat((image, input, hidden), 1)
        hidden = self.i2h(input_combined)
        output = self.i2o(input_combined)
        output_combined = torch.cat((hidden, output), 1)
        output = self.o2o(output_combined)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

In [12]:
image_size = 30000#data.size(-3) * data.size(-2) * data.size(-1)
n_classes = 10002
input_size = n_classes
hidden_size = n_classes
output_size = n_classes

rnn = RNN(input_size, hidden_size, output_size)
#output, loss = train(data, inputs, new_targets)

In [13]:
criterion = nn.NLLLoss()

learning_rate = 0.00005

optimizer = optim.SGD(rnn.parameters(), lr=learning_rate)

def train(image, inputs, targets):
    targets.unsqueeze_(-1)
    hidden = rnn.initHidden()

    rnn.zero_grad()

    loss = 0

    for i in range(inputs.size(0)):
        output, hidden = rnn(image, inputs[i], hidden)
        l = criterion(output, targets[i])
        loss += l

    loss.backward()
    
    optimizer.step()

    #for p in rnn.parameters():
    #    p.data.add_(-learning_rate, p.grad.data)

    return output, loss.item() / inputs.size(0)

In [14]:
data.size()

torch.Size([1, 3, 100, 100])

In [15]:
import time
import math

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [16]:
n_iters = 4
print_every = 1
plot_every = 1
all_losses = []
total_loss = 0 # Reset every plot_every iters

start = time.time()

for i, example in enumerate(dataset_loader_train): #start at index 0
    # get the inputs
    data = example["image"]
    #print("data size: {}".format(data.size()))
    target = example["fixations"]
    
    inputs, idx_new_targets = mk_inputs(data, target)
    new_targets = mk_targets(data, target, idx_new_targets)
    image = flatten_image(data)
    
    output, loss = train(image, inputs, new_targets)
    total_loss += loss

    if i % print_every == 0:
        print('%s (%d %d%%) %.4f' % (timeSince(start), i, i / n_iters * 100, loss))

    if i % plot_every == 0:
        all_losses.append(total_loss / plot_every)
        total_loss = 0
        
        
    if i == n_iters:
        break

0m 12s (0 0%) 133.1399
0m 21s (1 25%) 12103.2083
0m 33s (2 50%) 89234688.0000
0m 44s (3 75%) nan
0m 54s (4 100%) nan


In [159]:
print(new_targets.size())
new_targets.unsqueeze(-1)[0]

torch.Size([8])


tensor([4950])

tensor(1)