In [1]:
import torch
from torch.autograd import Variable
from tqdm import tqdm
import torch.nn as nn

import os
import argparse
from collections import Counter
import pickle
from tensorboard import Logger
from utils.ReCuda import ReCuda

from readData import get_data
from model import TextModel

In [2]:
##
def setup():
    if not os.path.isdir('logs'):
        os.mkdir('logs')
    logger = Logger('./logs')

    args = type('test', (), {})()
    args.train=False
    args.test = False
    args.ckpt = None
    
    args.source_dir = '/home/jiwan/tqa/prepro/data'
    args.ckpt_dir = './ckpt'
    args.emb_dim = 300
    args.repeat = False
    args.learning_rate = 0.001
    args.if_pair = False
    args.log_epoch = 4
    args.bi_gru = True
    args.batch_size = 36
    args.verbose = False
    args.end_epoch = 100
    args.single_topic = False

    args.test_iter = 'val'
    
    args.sample = True

    args.cuda = True
    if not torch.cuda.is_available():
        args.cuda = False

    config = args
    config.recuda = ReCuda(config)
    config.resume = False
    if config.ckpt is not None:
        config.resume = True
    config.single_topic_ckpt = ''
    if not config.single_topic:
        config.single_topic_ckpt = '_all'

    config.logger = logger

    config.recuda.torch.manual_seed(1)

    return config



##
# get net
def get_net(config, vocab):
    config.embed_size = 100
    if config.resume:
        print('RESUME {}th epoch'.format(config.ckpt))
        assert os.path.isdir('ckpt'), 'Error: no dir'
        ckpt = torch.load(os.path.join(config.ckpt_dir, 'ckpt{}_{}.t7'.format(config.single_topic_ckpt, config.ckpt)))
        net = TextModel(vocab, config)
        net.load_state_dict(ckpt['params'])
        best_acc = ckpt['acc']
        start_epoch = ckpt['epoch']
    else:
        net = TextModel(vocab, config)
        best_acc = 0
        start_epoch = 0
    net = config.recuda.var(net)
    print('PARAMS: ', net.parameters)
    return net, best_acc, start_epoch


##
def run_net(net, config, data):
    answers_size = len(data.answers)
    answers = torch.stack(data.answers, dim=2)
        
    if config.single_topic:
        topics = data.topic.data
    else:
        topics = torch.stack(data.topic, dim=2)

    target = Variable(data.correct_answer.data, requires_grad=False)
    target = config.recuda.var(target)
    print('t:', topics.size(), type(topics))
    # run
    return net.forward(topics, data.question, answers, answers_size)

##
def train_epoch(net, config, data, train_iter, epoch):

    # train
    train_loss = 0
    for batch_index, data in tqdm(enumerate(train_iter)):
        net.zero_grad()
        
        y = run_net(net, config, data)
        if config.verbose:
            print('y:', y.data)
            print('t:', target.data)
        loss = config.loss_fn(y, target)
        # count loss
        loss.backward()
        # optimize
        config.optimizer.step()

        train_loss += loss.data[0]
        loss_per = train_loss/(batch_index+1)
        print("Training {} epoch, loss: {}".format(epoch, loss_per))
        config.logger.scalar_summary('tr_loss', loss_per, epoch+1)

##
def validate_epoch(net, config, data, val_iter, epoch):
    # validate from time to time

    print("begin validation")
    correct = 0
    total = 0
    for index_v, data in tqdm(enumerate(val_iter)):
        y = run_net(net, config, data)
        
        value, pred = torch.max(y, 1)
        check = torch.eq(data.correct_answer.data, pred.data)
        if config.verbose:
            print(torch.sum(check), check.size())
        correct += torch.sum(check)
        total += (check.size()[0])

    acc = 100.*correct/total
    print("Val {} epoch, acc: {}".format(epoch, acc))

    config.logger.scalar_summary('val_acc', acc, (epoch + 1))

    return acc

##
def save_net(net, config, epoch, acc):
    print('saving')
    state = {
        'params': net.state_dict(),
        'acc': acc,
        'epoch': epoch,
    }
    if not os.path.isdir('ckpt'):
        os.mkdir('ckpt')
    torch.save(state, os.path.join(config.ckpt_dir, 'ckpt{}_{}.t7'.format(config.single_topic_ckpt,epoch)))


##
def train_all(net, data, iters, config):
    config.loss_fn = nn.CrossEntropyLoss()
    config.optimizer = torch.optim.Adam(net.parameters(), lr=config.learning_rate)

    for epoch in range(config.start_epoch, config.end_epoch):
        print("{} epoch".format(epoch))
        train_epoch(net, config, data, iters['train'], epoch)
        acc = validate_epoch(net, config, data, iters['val'], epoch)

        save_net(net, config, epoch, acc)
##


def test_epoch(net, config, data, test_iter):
    test_net = Counter()
    net_dict = {}

    print("begin testing")
    for index_t, data in tqdm(enumerate(test_iter)):
        y = run_net(net, config, data)
        
        value, pred = torch.max(y, 1)
        check = torch.eq(data.correct_answer.data, pred.data)
        for i in range(len(check)):
            test_net[data.id[i]] += int(check[i])
            net_dict[data.id[i]] = [pred.data[i], data.correct_answer.data[i]]

    return test_net, net_dict


def test_all(net, data, test_iter, config):
    test_counter, test_dict = test_epoch(net, config, data, test_iter)

    with open(os.path.join(config.source_dir, 'correct_counter_{}.pickle'.format(config.test_iter)), 'wb') as outfile:
        pickle.dump(test_counter, outfile)

    with open(os.path.join(config.source_dir, 'correct_dict_{}.pickle'.format(config.test_iter)), 'wb') as outfile:
        pickle.dump(test_dict, outfile)


In [3]:
config = setup()

In [4]:
data, iters, vocab = get_data(config)

loading data_train_full_sample.tsv, data_test_full_sample.tsv, data_val_full_sample.tsv


In [5]:
print('loading model')
net, best_acc, config.start_epoch = get_net(config, vocab)

loading model
('PARAMS: ', <bound method TextModel.parameters of TextModel(
  (embed): Embedding(453, 300)
  (embed_context): GRU(300, 100, bidirectional=True)
  (embed_question): GRU(300, 100, bidirectional=True)
  (embed_answer): GRU(300, 100, bidirectional=True)
)>)


In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class TextModelMain(nn.Module):
    def __init__(self, vocab, config, embed_size):
        super(TextModelMain, self).__init__()

        self.embed_size = embed_size
        self.config = config

        self.embed = nn.Embedding(len(vocab), config.emb_dim)
        self.embed.weight.data.copy_(vocab.vectors)

        self.bi = 2 if config.bi_gru else 1

        self.embed_context = nn.GRU(config.emb_dim, embed_size, bidirectional=config.bi_gru)
        self.embed_question = nn.GRU(config.emb_dim, embed_size, bidirectional=config.bi_gru)
        self.embed_answer = nn.GRU(config.emb_dim, embed_size, bidirectional=config.bi_gru)

    def forward(self, context, question, answers, answers_size):

        if not self.config.single_topic:
            context_shape = list(context.data.size())
            context_shape.append(self.config.emb_dim)
            context = context.view(-1, context.size()[2])
        
        context = self.embed(context)
        question = self.embed(question)

        if not self.config.single_topic:
            context = context.view(*context_shape)
            context = torch.sum(context, 1) # sum along num of topics

        M, hm = self.embed_context(context) # P x embed_size
        U, hu = self.embed_question(question) # Q X embed_size

        M = M.permute(1,0,2)
        U = U.permute(1,2,0)
        S = torch.matmul(M, U)
        S, S_index = torch.max(S, dim=2)
        a = F.softmax(S).unsqueeze(0).permute(1,2,0)
        a = a.expand(M.data.size())
        m = torch.mul(a, M)
        m = torch.sum(m, 1).unsqueeze(0)

        origin_size = answers.data.size()
        answers = answers.view(-1, answers.size()[2])
        if self.config.verbose:
            if len(answers.data.size()) < 3:
                print(answers.data)
        answers = self.embed(answers)
        C, hc = self.embed_answer(answers) # A X embed_size
        C = C.unsqueeze(0).view(origin_size[0], origin_size[1], origin_size[2], self.bi*self.embed_size)
        c = torch.sum(C, dim=0)
        r = torch.matmul(m.permute(1,0,2), c.permute(1,2,0)).squeeze()

        return r

net = TextModelMain(vocab, config, 100)
net.cuda()

TextModelMain(
  (embed): Embedding(453, 300)
  (embed_context): GRU(300, 100, bidirectional=True)
  (embed_question): GRU(300, 100, bidirectional=True)
  (embed_answer): GRU(300, 100, bidirectional=True)
)

In [7]:

if config.train:
    print("Let\'s start Training")
    train_all(net, data, iters, config)
else:
    print("Let\'s start Testing")
    test_all(net, data, iters[config.test_iter], config)


0it [00:00, ?it/s]

Let's start Testing
begin testing
('t:', torch.Size([105, 6, 9]), <class 'torch.autograd.variable.Variable'>)


1it [00:00,  4.27it/s]


In [8]:
data1 = next(iter(iters['train']))

In [9]:
answers_size = len(data1.answers)
answers = torch.stack(data1.answers, dim=2)
        
if config.single_topic:
    topics = data1.topic.data
else:
    topics = torch.stack(data1.topic, dim=2)
target = Variable(data1.correct_answer.data, requires_grad=False)
target = config.recuda.var(target)
print('t:', topics.size(), type(topics))
# run
context = topics.cpu()
question = data1.question.cpu()


('t:', torch.Size([226, 5, 9]), <class 'torch.autograd.variable.Variable'>)


In [26]:
class Encoder(nn.Module):
    def __init__(self, config):
        super(Encoder, self).__init__()
        
        self.embed = nn.Embedding(len(vocab), config.emb_dim)
        self.embed.weight.data.copy_(vocab.vectors)
        self.embed_context = nn.GRU(config.emb_dim, config.embed_size, bidirectional=config.bi_gru)
        self.embed_question = nn.GRU(config.emb_dim, config.embed_size, bidirectional=config.bi_gru)
        self.embed_answer = nn.GRU(config.emb_dim, config.embed_size, bidirectional=config.bi_gru)
        self.normalize_row = nn.Softmax(dim=2) # normalize along words of topic

        
    def forward(self, CO, Q, A):

        context_shape = list(CO.data.size())
        batch_size = context_shape[2]
        context_shape.append((config.embed_size* config.bi))
        CO = CO.view(-1, CO.size()[2])

        answer_shape = list(A.data.size())
        answer_shape.append((config.embed_size* config.bi))
        A = A.view(-1, A.size()[2])

        CO = self.embed(CO)
        Q = self.embed(Q)
        A = self.embed(A)

        CO, hc = self.embed_context(CO) # P x embed_size
        CO = CO.view(*context_shape)
        Q, hq = self.embed_question(Q) # Q X embed_size
        A, ha = self.embed_answer(A)
        A = A.view(*answer_shape) # A X embed_size

        CO = CO.permute(1, 2, 0, 3) # topic_num, batch_size, words_topic, embed_size
        Q = Q.permute(1, 2, 0) # batch_size, embed_size, words_question
        A = A.permute(2, 3, 0, 1) # batch_size, embed_size, words_answer, answer_num

        C = CO # store data

        # <attention>
        S = torch.matmul(C, Q) # topic_num, batch_size, words_topic, words_question
        S = self.normalize_row(S) # attention practice based on QAnet (Google)
        Att = torch.matmul(S, Q.permute(0, 2, 1)) # Q: batch_size, words_question, embed_size
        # Att: topic_num, batch_size, words_topic, embed_size
        C = F.normalize(C, dim=2) # normalize along words_topic, removing bias in total num of words
        C = torch.mul(Att, C) # apply attention
        C = torch.sum(torch.sum(C, 3), 2) # reduce dimension
        # </attention>

        maxval, argmax = torch.max(C, 0) # pick top 1 topic
        c = CO[argmax, torch.arange(0, batch_size).long(), :] # reduce based on top 1 indices
        c = c.permute(0, 2, 1) # batch_size, embed_size, words_topic 
        
        return c, Q, A

In [64]:
class MemoryAttention(nn.Module):
    def __init__(self, config):
        super(MemoryAttention, self).__init__()
        
        self.dim_words = config.dim_words
        self.keys = config.keys
        
        # linear mapping
        self.linear_map = {}
        for key in self.keys:
            self.linear_map[key] = nn.Linear(config.q_size * config.h_size, config.sizes[key])
        
    def forward(self, MO, qa, h):
        
        M = MO
        keys = self.keys
        
        size = {}
        for key in keys:
            size[key] = list(M[key].size())

        S = torch.matmul(qa.unsqueeze(3), h.unsqueeze(2))
        s_size = S.size()
        S = S.view(s_size[0], s_size[1], -1)
        # attention
        a = {}
        for key in keys:
            a[key] = torch.mul(M[key], self.linear_map[key](S))
            a[key] = F.softmax(a[key], dim=self.dim_words)
        # attention score
        scores = {}
        for key in keys:
            scores[key] = torch.norm(a[key], dim=self.dim_words)
            scores[key] = scores[key].unsqueeze(2)

        tuple_a = ()
        index_a = {}
        for i, key in enumerate(keys):
            tuple_a += (scores[key], )
            index_a[key] = i

        # score to softmax index of attending memory type
        score = torch.cat(tuple_a, dim=2)
        score = F.softmax(score, dim=self.dim_words)

        for key in keys:
            coeff = torch.index_select(score, self.dim_words, Variable(config.recuda.torch.LongTensor([index_a[key]])))
            a[key] = torch.matmul(a[key].unsqueeze(3), coeff.unsqueeze(2))
            a[key] = a[key].squeeze()

        m = {key : torch.mul(M[key], a[key]) for key in keys}

        return m

In [50]:
class SimpleModule(nn.Module):
    def __init__(self, config):
        super(SimpleModule, self).__init__()
        
        self.memory_attention = MemoryAttention(config)
        self.reasoning = Reasoning(config)
        self.question_attend = QuestionAttend(config)
        self.forget_gate = ForgetGate(config)
        self.confidence = Confidence(config)
        self.output = Output(config)
        
    def forward(self, M, qa, h):
        '''
        M = [c, A], 
        c: batch_size, embed_size, words_topic
        A: batch_size, embed_size, words_answer, answer_num
        qa: batch_size, embed_size, words_question
        h: batch_size, h_size
        '''
        m = self.memory_attention(M, qa, h)
        x = self.reasoning(h, m)
        
        qa = self.question_attend(qa, x)
        
        h = self.forget_gate(h, x)
        conf = self.confidence(h)
        o = self.output(h)
        
        return qa, h, o, conf
        

In [29]:
class POCController(nn.Module):
    def __init__(self, config):
        # hyperNetwork
        super(POCController, self).__init__()
        
        self.k = config.k # MAX num of steps
        self.conf_theshold = config.conf
        
        self.batch_size = config.batch_size
        self.h_size = config.h_size
        self.emb = config.embed_size * config.bi
        
        self.init_h = lambda size_list: Variable(torch.zeros(*size_list))

        self.module = SimpleModule(config)
    
    def forward(self, M, qa):
        h = self.init_h([self.batch_size, self.emb, self.h_size])
        
        for i in range(self.k):
            # run module
            qa, h, o, conf = self.module(M, qa, h)
            #if conf > self.conf_theshold:
                #break
        
        return o, conf

In [30]:
class Controller(nn.Module):
    def __init__(self, config):
        # hyperNetwork
        super(Controller, self).__init__()
        
        self.k = config.k # MAX num of steps
        self.conf_theshold = config.conf
        
        self.batch_size = config.batch_size
        self.h_size = config.h_size
        self.emb = config.embed_size * config.bi

        self.init_h = lambda size_list: Variable(torch.zeros(*size_list))

        self.module = SimpleModule(config)
    
    def forward(self, M, qa):
        h = self.init_h([self.batch_size, self.emb, self.h_size])
        
        for i in range(self.k):
            # generate weight
        
            # inject weight
        
            # run module
            qa, h, o, conf = self.module(M, qa, h)
            if conf > self.conf_theshold:
                break
        
        return o, conf
        

In [62]:
class ModuleNet(nn.Module):
    def __init__(self, config):
        # hyperNetwork
        super(ModuleNet, self).__init__()
        
        self.h_size = config.h_size
        self.dim_words = config.dim_words
        
        self.encoder = Encoder(config)
        self.controller = Controller(config) if config.hyper else POCController(config)
        self.init_h = lambda size_list: Variable(torch.zeros(*size_list))
        self.decoder = Decoder(config)
        
    def forward(self, CO, Q, A):
        batch_size = A.size()[0]
        
        # encoding layer
        c, q, A = self.encoder.forward(CO, Q, A)
        M = {'c': c}
        
        # reasoning layer
        MA = torch.sum(A, dim=(self.dim_words + 1)).squeeze()
        M['A'] =  MA # A to memory
        o, conf = self.controller(M, q)
        
        # decoder layer
        p = self.decoder(o, A)
        p = conf * p
        
        return p     

In [32]:
# residual block from resnet
def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

In [57]:
class Reasoning(nn.Module):
    def __init__(self, config):
        super(Reasoning, self).__init__()
        
        self.dim_words = config.dim_words
        self.keys = config.keys
        self.sizes = config.sizes
        planes = config.reasoning_planes
        
        self.res_conv_a = nn.Sequential(
                        BasicBlock(1, planes, stride=1, downsample=None),
                        BasicBlock(planes, planes, stride=1, downsample=None),
                        BasicBlock(planes, planes, stride=1, downsample=None))

        self.res_conv_x = nn.Sequential(
                        BasicBlock(planes, planes, stride=1, downsample=None),
                        BasicBlock(planes, planes, stride=1, downsample=None),
                        BasicBlock(planes, planes, stride=1, downsample=None))
        
        self.bm = nn.BatchNorm2d(planes)
        
        self.sample_down = {}
        for key in self.keys:
            self.sample_down[key] = nn.Linear(self.sizes[key], config.h_size)
        
    def forward(self, h, m):
        keys = self.keys
        dim_end = self.dim_words + 1

        # try to sample down
        m_high = [self.sample_down[key](m[key]).unsqueeze(dim_end) for key in keys]
        m_high = torch.cat(tuple(m_high), dim=dim_end)
        m_high = torch.sum(m_high, dim=dim_end) # sum retrieved memory along types: since types are supposed to be softmaxed, this makes sense

        # reasoning step
        a = torch.mul(h, m_high).unsqueeze(1) # add channel dimension
        a = self.res_conv_a(a) 
        a = F.softmax(a, dim=self.dim_words)
        x = torch.mul(m_high.unsqueeze(1), a)
        x = self.res_conv_x(x) # x: batch_size, channel_size, embed_size, h_size
        
        x = self.bm(x)
        x = F.relu(x)
        
        return x



In [34]:
class QuestionAttend(nn.Module):
    def __init__(self, config):
        super(QuestionAttend, self).__init__()
        
        self.dim_words = config.dim_words
        planes = 16

        self.res_conv_x = nn.Sequential(
                                BasicBlock(planes, planes, stride=1, downsample=None),
                                BasicBlock(planes, planes, stride=1, downsample=None),
                                nn.Conv2d(planes, 1, 3, stride=1, padding=1),
                                nn.Linear(config.h_size, config.q_size))
        
    def forward(self, qa, x):

        # downsample & reason
        x_qa = self.res_conv_x(x).squeeze()
        # attend
        a = F.softmax(x_qa, dim=self.dim_words)
        qa = torch.mul(qa, a)

        return qa

In [35]:
class DownSampleH(nn.Module):
    def __init__(self, config):
        super(DownSampleH, self).__init__()
        
        pool_kernel = 3
        pool_stride = 2
        pool_num = 2
        pool = [ nn.MaxPool1d(pool_kernel, stride=pool_stride) for i in range(pool_num) ]
        
        def pool_dim_func(k, size):
            if k < 2:
                return (size - (pool_kernel - 1) - 1) // pool_stride + 1
            return (pool_dim_func(k-1, size) - (pool_kernel - 1) - 1) // pool_stride + 1
        
        
        class Flatten(nn.Module):
            def __init__(self):
                super(Flatten, self).__init__()

            def forward(self, x):
                s = x.size()
                return x.view(s[0],-1)
            
        linear_feature = pool_dim_func(pool_num, config.h_size)* config.embed_size* config.bi
        linear = [ Flatten(), nn.Linear(linear_feature, 1) ]
        
        self.downsample = nn.Sequential(*(pool+linear))
    
    def forward(self, h):
        return self.downsample(h)

In [70]:
from __future__ import division

class ForgetGate(nn.Module):
    def __init__(self, config):
        super(ForgetGate, self).__init__()
        
        planes = 16
        
        self.res_conv_h = nn.Sequential(
                BasicBlock(planes, planes, stride=1, downsample=None),
                BasicBlock(planes, planes, stride=1, downsample=None),
                BasicBlock(planes, planes, stride=1, downsample=None),
                BasicBlock(planes, planes, stride=1, downsample=None),
                BasicBlock(planes, planes, stride=1, downsample=None),
                nn.Conv2d(planes, 1, 3, stride=1, padding=1))

        self.forget = Variable(torch.ones(1))
        self.batch_size = config.batch_size
        self.dim_words = config.dim_words
        
        self.downsample = DownSampleH(config)
    
    def forward(self, h, x):
        h_new_input = self.res_conv_h(x).squeeze()
        importance = self.downsample(h_new_input)
        
        forget = F.sigmoid(torch.mul(self.forget, importance)).unsqueeze(self.dim_words)
        h_new = h*(1 - forget) + F.softmax(h_new_input, dim=self.dim_words)*forget # forget var should be learnable!
        
        return h_new

In [37]:
class Confidence(nn.Module):
    def __init__(self, config):
        super(Confidence, self).__init__()
        
        self.downsample = DownSampleH(config)
        
    def forward(self, h):
        
        conf = self.downsample(h)
        conf = F.sigmoid(conf)
        
        return conf

In [38]:
class Output(nn.Module):
    def __init__(self, config):
        super(Output, self).__init__()
        
        planes = 16
        
        self.res_conv_o = nn.Sequential(
                BasicBlock(1, planes, stride=1, downsample=None),
                BasicBlock(planes, planes, stride=1, downsample=None),
                BasicBlock(planes, planes, stride=1, downsample=None),
                BasicBlock(planes, planes, stride=1, downsample=None),
                BasicBlock(planes, planes, stride=1, downsample=None),
                nn.Conv2d(planes, 1, 3, stride=1, padding=1))
        
    def forward(self, h):
        h = h.unsqueeze(1)
        o = self.res_conv_o(h)
        o = o.squeeze()
        
        return o

In [39]:
class Decoder(nn.Module):
    def __init__(self, config):
        super(Decoder, self).__init__()
        
        self.dim_words = config.dim_words
        self.ans_k = config.ans_k
        
        planes = 16
        
        self.sampledown = nn.Sequential(
                            nn.Linear(config.a_size*config.h_size, config.hidden_size),
                            nn.ReLU(),
                            nn.Linear(config.hidden_size, config.a_size),
                            nn.ReLU()
                        )
        
        def self_attention(k, X):
            # self attention
            Y = torch.mul(X.unsqueeze(self.dim_words + 1), X.unsqueeze(self.dim_words + 2))
            Y = torch.sum(Y, self.dim_words + 2).squeeze()
            if k < 2:
                return torch.mul(X, F.softmax(Y))
            return torch.mul(self_attention(k - 1, X), F.softmax(Y))
        
        self.sa = self_attention
        
    def forward(self, o, A):
        # naive probability
        
        o = o.squeeze()
        A = A
        x = torch.matmul(o.unsqueeze(3).unsqueeze(2), A.unsqueeze(3))
        s = x.size()
        x = x.view(s[0],s[1],-1,s[4])
        x = self.sampledown(x.permute(0,1,3,2))
        a = F.softmax(x.permute(0,1,3,2), dim=self.dim_words)
        oa = torch.mul(A, a)
        oa = oa.view(oa.size()[0], -1, oa.size()[3])
        oa = torch.sum(oa, 1)
        p = F.softmax(oa, dim=1)
        
        # answer patchwise probability       
        A = torch.mul(A, p.unsqueeze(1).unsqueeze(1))
        A = self.sa(self.ans_k, A)
        
        A = torch.sum(A, 1).squeeze()
        A = torch.sum(A, 1).squeeze()
        return A       

In [113]:
from utils.ReCuda import ReCuda

config.reasoning_planes = 16
config.k = 4
config.conf = 0.7
config.h_size = 128
config.hyper = False
config.hidden_size = 300
config.dim_words = 2
config.ans_k = 7
config.bi = 2 if config.bi_gru else 1
config.cuda = True

# fix_length
config.batch_size = 9
config.q_size = 17
config.a_size = 4
config.c_size = 226
config.keys = ['A', 'c']
config.sizes = {'A': config.a_size, 'c': config.c_size}

config.recuda = ReCuda(config)

answers_size = len(data1.answers)
answers = torch.stack(data1.answers, dim=2)
topics = torch.stack(data1.topic, dim=2)

CO = topics
Q = data1.question
A = answers

net = ModuleNet(config, vocab).cuda()

p = net.forward(CO, Q, A)


('argmax:', 
 3
 2
 2
 2
 4
 0
 2
 2
 3
[torch.cuda.LongTensor of size 9 (GPU 0)]
)
('co:', torch.Size([5, 9, 226, 200]))


TypeError: Performing basic indexing on a tensor and encountered an error indexing dim 0 with an object of type Variable. The only supported types are integers, slices, numpy scalars, or if indexing with a torch.cuda.LongTensor or torch.cuda.ByteTensor only a single Tensor may be passed.

In [95]:
from utils.ReCuda import ReCuda

config.reasoning_planes = 16
config.k = 4
config.conf = 0.7
config.h_size = 128
config.hyper = False
config.hidden_size = 300
config.dim_words = 2
config.ans_k = 7
config.bi = 2 if config.bi_gru else 1
config.cuda = True

# fix_length
config.batch_size = 9
config.q_size = 17
config.a_size = 4
config.c_size = 226
config.keys = ['A', 'c']
config.sizes = {'A': config.a_size, 'c': config.c_size}

config.recuda = ReCuda(config)

answers_size = len(data1.answers)
answers = torch.stack(data1.answers, dim=2)
topics = torch.stack(data1.topic, dim=2)

CO = topics.cpu()
Q = data1.question.cpu()
A = answers.cpu()

net = ModuleNet(config, vocab)
net = net.cuda()

embed_size = config.embed_size

embed = nn.Embedding(len(vocab), config.emb_dim)
embed.weight.data.copy_(vocab.vectors)

bi = 2 if config.bi_gru else 1

embed_context = nn.GRU(config.emb_dim, embed_size, bidirectional=config.bi_gru)
embed_question = nn.GRU(config.emb_dim, embed_size, bidirectional=config.bi_gru)
embed_answer = nn.GRU(config.emb_dim, embed_size, bidirectional=config.bi_gru)
normalize_row = nn.Softmax(dim=2)

context_shape = list(CO.data.size())
batch_size = context_shape[2]
context_shape.append((config.embed_size* config.bi))
CO = CO.view(-1, CO.size()[2])

answer_shape = list(A.data.size())
answer_shape.append((config.embed_size* config.bi))
A = A.view(-1, A.size()[2])

CO = embed(CO)
Q = embed(Q)
A = embed(A)

CO, hc = embed_context(CO) # P x embed_size
CO = CO.view(*context_shape)
Q, hq = embed_question(Q) # Q X embed_size
A, ha = embed_answer(A)
A = A.view(*answer_shape) # A X embed_size

CO = CO.permute(1, 2, 0, 3) # topic_num, batch_size, words_topic, embed_size
Q = Q.permute(1, 2, 0) # batch_size, embed_size, words_question
A = A.permute(2, 3, 0, 1) # batch_size, embed_size, words_answer, answer_num

C = CO # store data

# <attention>
S = torch.matmul(C, Q) # topic_num, batch_size, words_topic, words_question
S = normalize_row(S) # attention practice based on QAnet (Google)
Att = torch.matmul(S, Q.permute(0, 2, 1)) # Q: batch_size, words_question, embed_size
# Att: topic_num, batch_size, words_topic, embed_size
C = F.normalize(C, dim=2) # normalize along words_topic, removing bias in total num of words
C = torch.mul(Att, C) # apply attention
C = torch.sum(torch.sum(C, 3), 2) # reduce dimension
# </attention>

C = C.cuda()
CO = CO.cuda()

In [110]:


maxval, argmax = torch.max(C, 0) # pick top 1 topic

print(argmax)
print(type(CO.data))
b = torch.arange(0, batch_size).type_as(argmax.data)
print(b)
c = CO[argmax, b, :] # reduce based on top 1 indices
c = c.permute(0, 2, 1) # batch_size, embed_size, words_topic 
print(c.size())

Variable containing:
 4
 4
 4
 1
 1
 4
 4
 4
 4
[torch.cuda.LongTensor of size 9 (GPU 0)]

<class 'torch.cuda.FloatTensor'>

 0
 1
 2
 3
 4
 5
 6
 7
 8
[torch.cuda.LongTensor of size 9 (GPU 0)]

torch.Size([9, 200, 226])


In [114]:
type(vocab.vectors)

torch.FloatTensor