# Text Generation with GPT2

In [None]:
from __future__ import print_function

import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available(), device

In [90]:
class GPT2TextWrapper:
    
    def __init__(self):
        self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
        self.model = GPT2LMHeadModel.from_pretrained('gpt2')
        self.model.to(device)

    @staticmethod
    def adjust_length_to_model(length, max_sequence_length):
        if length < 0 and max_sequence_length > 0:
            length = max_sequence_length
        elif 0 < max_sequence_length < length:
            length = max_sequence_length  # No generation bigger than model size
        elif length < 0:
            length = MAX_LENGTH  # avoid infinite loop
        return length

    def continue_text(self, text_to_complete, length=20):
        encoded_text = self.tokenizer.encode(text_to_complete,
                                        add_special_tokens=False,
                                        return_tensors="pt",
                                        add_space_before_punct_symbol=True
                                       )
        encoded_text = encoded_text.to(device)

        max_length = self.model.config.max_position_embeddings
        length = self.adjust_length_to_model(length, max_sequence_length=max_length)

        output_sequences = self.model.generate(
            input_ids=encoded_text,
            max_length=length + len(encoded_text[0]),
            temperature=1.0,
            top_k=0,
            top_p=0.9,
            num_return_sequences=1,
            repetition_penalty=1.0,
            do_sample=True,
        )

        return self.tokenizer.decode(output_sequences.tolist()[0], clean_up_tokenization_spaces=True)


In [91]:
wrapper = GPT2TextWrapper()

In [92]:
print(wrapper.model)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): Laye

In [93]:
initial_text = "Hello world"
wrapper.continue_text(initial_text, length=1000)

Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence


"Hello world of games, each fight is a bit different. I got inspired by games like Tekken, Rims or Portal, where you use your weapon to heal your character, at least temporarily. Which one will you pick is always up to you. There are also some settings that affect your skill bonuses, which are something like fast attacks against enemies which sometimes never actually occurs but which I like and want to avoid by giving every other character some sort of set bonus.\n\nThe third option was experimenting with Borderlands 3's Arena. It took me about ten minutes or so to build an optimized one that dealt with that kind of burst where your character is in full strength, playing as the enemies after each turn as you approach them, and which, you know, he's barely fighting, until you actually manage to dodge the hell out of them. If I'd have gotten it done in eight seconds or less. I suppose what I hate about Borderlands, which is everything is boring, but once you figure out the game, is const

In [101]:
wrapper.continue_text("I like that song that says", length=900)

Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence


'I like that song that says "she\'s not a good leader" or, "she might have been around for longer". But that never actually bothered me. Every single time I sang songs, I always thought that what I was doing was fine. So whenever I sang "Herd Leader", it became fun to have fun with the songs, since they all seemed to have this odd personal bond as well, like hard, repetitive songs.\n\nIt was very important to understand the myth of leadership and how much that\'s putting you back in the mindset of the player and your whole team. Most of the time you need to be part of that, because sometimes the idea that the player is in command, that the coach is letting it work out for them, it\'s not really what you want to be in your job. It\'s the thing you\'re used to dealing with the most and your team\'s challenge to that idea. We also\'re used to dealing with the players too, but some players\' personalities change. And so it makes it harder to be part of that situation because the player fee