In [38]:
import torch
import argparse
import math
import os
import tqdm
import numpy as np

seed = 1
np.random.seed(seed)
torch.random.manual_seed(seed)
torch.cuda.manual_seed(seed)

work_dir = '/tmp/txl-05-12_23-33-51'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the best saved model.
with open(os.path.join(work_dir, 'model-best.pt'), 'rb') as f:
    model = torch.load(f)

model = model.to(device)
_ = model.eval()

In [41]:
def top_k_logits(logits, k):
    """
    Masks everything but the k top entries as -infinity (1e10).
    Used to mask logits such that e^-infinity -> 0 won't contribute to the
    sum of the denominator.
    """
    if k == 0:
        return logits
    else:
        values = torch.topk(logits, k)[0]
        batch_mins = values[:, -1].view(-1, 1).expand_as(logits)
        return torch.where(logits < batch_mins, torch.ones_like(logits) * -1e10, logits)

In [6]:
from pytorch_pretrained_bert import GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

In [8]:
NL = tokenizer.encode('\n')
print(NL)

[198]


In [49]:
data = torch.tensor(NL*4)
# Turn into a batch. TODO: take batch_size
data.unsqueeze_(1)
mems = model.init_mems()

def predict(model, data, mems):
    tgt_len = data.size(0)
    hidden, new_mems = model._forward(data, mems=mems)
    pred_hid = hidden[-tgt_len:]
    return pred_hid, new_mems

pred_hid, mems = predict(model, data, mems)
print(pred_hid.shape)

torch.Size([4, 1, 500])


In [51]:
import torch.nn.functional as F

def hidden_to_softmax(model, hidden, temperature=1, top_k=0):
    """Turn a hidden projection into log softmax.
    
    Adapted from utils/proj_adaptive_softmax.py
    """
    self = model.crit
    logits = self._compute_logit(hidden, self.out_layers[0].weight,
                                            self.out_layers[0].bias, self.out_projs[0])
    logits = top_k_logits(logits, k=top_k)

    logits /= temperature
    softmax = F.softmax(logits, dim=-1)
    return softmax

softmax = hidden_to_softmax(model, pred_hid)
print(softmax.shape, softmax[0].sum().item())

torch.Size([4, 1, 50257]) 0.9999994039535522


In [53]:
softmax = hidden_to_softmax(model, pred_hid[-1])
print(softmax.shape)
prev = torch.multinomial(softmax, num_samples=1)
print(prev.shape)

torch.Size([1, 50257])
torch.Size([1, 1])


In [60]:
## Init
data = torch.tensor(NL*4)
# Turn into a batch. TODO: take batch_size
data.unsqueeze_(1)
mems = model.init_mems()
output = None

## Grab a sample from the last frame, append to result list, append to `data`
pred_hid, mems = predict(model, data, mems)
softmax = hidden_to_softmax(model, pred_hid[-1])

new_sample = torch.multinomial(softmax, num_samples=1).unsqueeze(-1).squeeze(2)
data = torch.cat((data, new_sample), dim=0)
if not output:
    output = new_sample
else:
    output = torch.cat((output, new_sample), dim=1)
print(output.shape, data.shape)
## Run through again
## Decode results from result list

torch.Size([1, 1]) torch.Size([5, 1])


In [66]:
## Init
data = torch.tensor(NL*4)
# Turn into a batch. TODO: take batch_size
data.unsqueeze_(1)
mems = model.init_mems()
output = None

length = 10
for i in range(length):
    ## Grab a sample from the last frame, append to result list, append to `data`
    pred_hid, mems = predict(model, data, mems)
    softmax = hidden_to_softmax(model, pred_hid[-1])

    new_sample = torch.multinomial(softmax, num_samples=1).unsqueeze(-1).squeeze(2)
    data = torch.cat((data, new_sample), dim=0)
    if output is None:
        output = new_sample
    else:
        output = torch.cat((output, new_sample), dim=0)
print(output.shape, data.shape)
for i in range(output.size(1)):
    print(tokenizer.decode(output[:, i].tolist()))

torch.Size([10, 1]) torch.Size([14, 1])
 hirefundedatican pending pending pending opio opio lifting filibuster


In [68]:
tokenizer.encode(None)

TypeError: expected string or buffer