In [4]:
# Source: https://github.com/MaximumEntropy/Seq2Seq-PyTorch/blob/master/beam_search.py

"""Beam search implementation in PyTorch."""
#
#
#         hyp1#-hyp1---hyp1 -hyp1
#                 \             /
#         hyp2 \-hyp2 /-hyp2#hyp2
#                               /      \
#         hyp3#-hyp3---hyp3 -hyp3
#         ========================
#
# Takes care of beams, back pointers, and scores.

# Code borrowed from PyTorch OpenNMT example
# https://github.com/pytorch/examples/blob/master/OpenNMT/onmt/Beam.py

import torch


class Beam(object):
    """Ordered beam of candidate outputs."""

    def __init__(self, size, vocab, cuda=False):
        """Initialize params."""
        self.size = size
        self.done = False
        self.pad = vocab['<pad>']
        self.bos = vocab['$start']
        self.eos = vocab['$end']
        self.tt = torch.cuda if cuda else torch

        # The score for each translation on the beam.
        self.scores = self.tt.FloatTensor(size).zero_()

        # The backpointers at each time-step.
        self.prevKs = []

        # The outputs at each time-step.
        self.nextYs = [self.tt.LongTensor(size).fill_(self.pad)]
        self.nextYs[0][0] = self.bos

    # Get the outputs for the current timestep.
    def get_current_state(self):
        """Get state of beam."""
        return self.nextYs[-1]

    # Get the backpointers for the current timestep.
    def get_current_origin(self):
        """Get the backpointer to the beam at this step."""
        return self.prevKs[-1]

    #  Given prob over words for every last beam `wordLk`: Compute and update the beam search.
    #
    # Parameters:
    #
    #     * `wordLk`- probs of advancing from the last step (K x words)
    #
    # Returns: True if beam search is complete.

    def advance(self, workd_lk):
        """Advance the beam."""
        num_words = workd_lk.size(1)

        # Sum the previous scores.
        if len(self.prevKs) > 0:
            beam_lk = workd_lk + self.scores.unsqueeze(1).expand_as(workd_lk)
        else:
            beam_lk = workd_lk[0]

        flat_beam_lk = beam_lk.view(-1)

        bestScores, bestScoresId = flat_beam_lk.topk(self.size, 0, True, True)
        self.scores = bestScores

        # bestScoresId is flattened beam x word array, so calculate which
        # word and beam each score came from
        prev_k = bestScoresId / num_words
        self.prevKs.append(prev_k)
        self.nextYs.append(bestScoresId - prev_k * num_words)

        # End condition is when top-of-beam is EOS.
        if self.nextYs[-1][0] == self.eos:
            self.done = True

        return self.done

    def sort_best(self):
        """Sort the beam."""
        return torch.sort(self.scores, 0, True)

    # Get the score of the best in the beam.
    def get_best(self):
        """Get the most likely candidate."""
        scores, ids = self.sort_best()
        return scores[1], ids[1]

    # Walk back to construct the full hypothesis.
    #
    # Parameters.
    #
    #     * `k` - the position in the beam to construct.
    #
    # Returns.
    #
    #     1. The hypothesis
    #     2. The attention at each time step.
    def get_hyp(self, k):
        """Get hypotheses."""
        hyp = []
        # print(len(self.prevKs), len(self.nextYs), len(self.attn))
        for j in range(len(self.prevKs) - 1, -1, -1):
            hyp.append(self.nextYs[j + 1][k])
            k = self.prevKs[j][k]

        return hyp[::-1]

In [154]:
import torch
import numpy as np
import pickle

In [2]:
batch_predict_sample = torch.load('batch_predict_sample.trch')

In [8]:
vocab = pickle.load(open('vocab_stoi_sample.pkl', 'rb'))

In [33]:
itos = pickle.load(open('vocab_itos_sample.pkl', 'rb'))

In [141]:
beam_size = 22
beam = Beam(beam_size, vocab)

In [142]:
batch_predict_sample[:, 3, :]

Variable containing:
-7.8707 -1.3969 -4.5329  ...  -8.2039 -8.7784 -7.9060
-7.8707 -1.3969 -4.5329  ...  -8.2039 -8.7784 -7.9060
-7.8707 -1.3969 -4.5329  ...  -8.2039 -8.7784 -7.9060
          ...             ⋱             ...          
-7.8707 -1.3969 -4.5329  ...  -8.2039 -8.7784 -7.9060
-7.8707 -1.3969 -4.5329  ...  -8.2039 -8.7784 -7.9060
-7.8707 -1.3969 -4.5329  ...  -8.2039 -8.7784 -7.9060
[torch.cuda.FloatTensor of size 81x1080 (GPU 0)]

In [143]:
for i in range(22):
    beam.advance(torch.exp(batch_predict_sample[0, :, :]).data)
# beam.sort_best()

In [148]:
beam.get_hyp(0)

[2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1]

In [166]:
results = np.array([beam.get_hyp(i) for i in range(beam_size)])

In [172]:
' '.join(itos[i] for i in results.T[20])

'<pad> $start <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> $start <pad> <pad> <pad> <pad>'

In [3]:
torch.exp(batch_predict_sample)[0].data

RuntimeError: can't convert CUDA tensor to numpy (it doesn't support GPU arrays). Use .cpu() to move the tensor to host memory first.