In [1]:
!pip install transformers[sentencepiece] -q
!pip install gradio -q

[K     |████████████████████████████████| 4.4 MB 5.1 MB/s 
[K     |████████████████████████████████| 101 kB 3.9 MB/s 
[K     |████████████████████████████████| 6.6 MB 33.6 MB/s 
[K     |████████████████████████████████| 596 kB 55.6 MB/s 
[K     |████████████████████████████████| 1.2 MB 49.7 MB/s 
[K     |████████████████████████████████| 5.6 MB 4.5 MB/s 
[K     |████████████████████████████████| 272 kB 65.8 MB/s 
[K     |████████████████████████████████| 54 kB 3.2 MB/s 
[K     |████████████████████████████████| 54 kB 2.4 MB/s 
[K     |████████████████████████████████| 212 kB 63.0 MB/s 
[K     |████████████████████████████████| 84 kB 3.1 MB/s 
[K     |████████████████████████████████| 2.3 MB 24.3 MB/s 
[K     |████████████████████████████████| 84 kB 3.1 MB/s 
[K     |████████████████████████████████| 57 kB 2.5 MB/s 
[K     |████████████████████████████████| 1.1 MB 56.0 MB/s 
[K     |████████████████████████████████| 140 kB 56.7 MB/s 
[K     |████████████████████████████

In [2]:
from transformers import pipeline, GPT2Tokenizer, TFGPT2LMHeadModel, AutoModel, AutoTokenizer
import gradio as gr
import tensorflow as tf

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# add the EOS token as PAD token to avoid warnings
model = TFGPT2LMHeadModel.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id)

# encode context the generation is conditioned on
input_ids = tokenizer.encode('This database contains no insults because', return_tensors='tf')

Downloading:   0%|          | 0.00/0.99M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/475M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFGPT2LMHeadModel.

All the layers of TFGPT2LMHeadModel were initialized from the model checkpoint at gpt2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


Text generation methods

In [3]:
def generate_tokens(tokens, words_count, alternatives_count):
  return model.generate(
    tokens, 
    max_length=words_count, 
    num_beams=5, 
    no_repeat_ngram_size=2, 
    num_return_sequences=alternatives_count, 
  )

def decode(token):
  word = tokenizer.decode(token, skip_special_tokens=True)
  return word

def generate_sequences(prompt, new_words_count, alternatives_count):
  initial_tokens = tokenizer.encode(prompt, return_tensors='tf')
  words_count = len(initial_tokens[0]) + new_words_count
  generated_tokens = generate_tokens(initial_tokens, words_count, 1)

  ret_dict = []
  ret_dict.append({
      "word": decode(generated_tokens[0, -1]),
      "alternatives": []
      })

  parts = generated_tokens[:, :-1]
  while len(parts[0]) > len(initial_tokens[0]):
    midterms = generate_tokens(parts, words_count, alternatives_count)
    dict_entry = {
        "word": decode(parts[0, -1]),
        "alternatives": [decode(midterm) for i, midterm in enumerate(midterms)]
    }
    ret_dict.append(dict_entry)

    parts = parts[:, :-1]
  
  return reversed(ret_dict)
  

Launch GUI

In [None]:
def update(prompt, new_words_count, alternatives_count):
  the_dict = generate_sequences(prompt, new_words_count, alternatives_count)

  # hide alternative texts by default
  styles = " .alternative-hidden {display: none} "
  real_html = prompt + " "
  alternative_divs = ""
  for index, entry in enumerate(the_dict):
    span_class = f'span-nr-{index}'
    alternative_class = f'alternative-nr-{index}'

    # highlight word currently hovered over
    styles += f' .{span_class}:hover {{background-color: #442233; border-radius: 3px; }} '

    # display alternative follow-ups at each word upon hovering
    styles += f' .{span_class}:hover ~ .{alternative_class} {{display: block; background-color: #442233; border-radius: 3px}} '
                                                              
    # Append span for each generated word
    real_html += f'<span class="span-nr-{index}">{entry["word"]} </span>'

    # Append hidden divs for each generated word
    for alt_index, alternative in enumerate(entry["alternatives"]):
      alternative_div = f'<div class="alternative-hidden {alternative_class}">{alternative}</div>'
      alternative_divs += alternative_div

  return "<head><style>" + styles + "</style></head>" + real_html + alternative_divs

app = gr.Blocks()

with app:
  prompt = gr.Text()
  new_words_count = gr.Slider(label="Words to generate", value=5, minimum=1, maximum=50, step=1)
  alternatives_count = gr.Slider(label="Number of alternatives", value=5, minimum=1, maximum=20, step=1)
  button = gr.Button()
  output = gr.HTML("")

  button.click(fn=update,
               inputs=[prompt, new_words_count, alternatives_count],
               outputs=output)

app.launch(debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://54769.gradio.app

This share link expires in 72 hours. For free permanent hosting, check out Spaces (https://huggingface.co/spaces)
