In [1]:
import torch, os
from pytorch_pretrained_bert import OpenAIGPTTokenizer, OpenAIGPTLMHeadModel
from tqdm import tqdm

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [2]:
special_tokens = ['<POS>', '<NEG>','<CON_START>','<START>','<END>'] # Set the special tokens

## Define tokenizer and model

In [3]:
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt', special_tokens=special_tokens)
device = torch.device('cuda')
model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt', num_special_tokens=len(special_tokens))

ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.


## Load weights of the trained model

In [4]:
path = os.path.join(os.getcwd(),'./log/pytorch_model.bin')
model_state_dict = torch.load(path)
model.load_state_dict(model_state_dict)
model.to(device)

OpenAIGPTLMHeadModel(
  (transformer): OpenAIGPTModel(
    (tokens_embed): Embedding(40483, 768)
    (positions_embed): Embedding(512, 768)
    (drop): Dropout(p=0.1)
    (h): ModuleList(
      (0): Block(
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1)
          (resid_dropout): Dropout(p=0.1)
        )
        (ln_1): BertLayerNorm()
        (mlp): MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (dropout): Dropout(p=0.1)
        )
        (ln_2): BertLayerNorm()
      )
      (1): Block(
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1)
          (resid_dropout): Dropout(p=0.1)
        )
        (ln_1): BertLayerNorm()
        (mlp): MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (dropout): Dropout(p=0.1)
        )
        (ln_2): BertLayerNorm()
      )
      (2): Block(
        (attn)

## Prediction function

In [5]:
def prediction(ref_text):
    predicted_index = None
    decoded_sentence = []
    tokens = tokenizer.tokenize(ref_text)
    indexed_tokens = tokenizer.convert_tokens_to_ids(tokens)
    while predicted_index != 40482:
        torch_tokens = torch.tensor([indexed_tokens]).to(device)
        with torch.no_grad():
            prediction = model(torch_tokens)
        predicted_index = torch.argmax(prediction[0, -1, :]).item()
        predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
        decoded_sentence.append(predicted_index)
        indexed_tokens.append(predicted_index)
        #print(predicted_index)
        #print(decoded_sentence)
    
    return tokenizer.decode(decoded_sentence)

## Few examples

In [8]:
text1 = "<NEG> <CON_START> it does this to a very significant degree and is extremely to wear . <START>"
out_sen1 = prediction(text1)
out_sen1

'it does this to a very significant degree and is extremely uncomfortable to wear . <END> '

In [9]:
text1 = "<POS> <CON_START> it does this to a very significant degree and is extremely to wear . <START>"
out_sen1 = prediction(text1)
out_sen1

'it does this to a very significant degree and is extremely comfortable to wear . <END> '

In [17]:
text1 = "<NEG> <CON_START> the metal is very and the non stick finish is . <START>"
out_sen1 = prediction(text1)
out_sen1

'the metal is very cheap and the non stick finish is good . <END> '

In [11]:
text1 = "<POS> <CON_START> the metal is very and the non stick finish is . <START>"
out_sen1 = prediction(text1)
out_sen1

'the metal is very nice and the non stick finish is great . <END> '