In [1]:
!pip install -q transformers torch


In [2]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer


In [3]:
gpt2_name = "gpt2"

gpt2_tokenizer = GPT2Tokenizer.from_pretrained(gpt2_name)
gpt2_model = GPT2LMHeadModel.from_pretrained(gpt2_name)

# GPT-2 has no pad token
gpt2_tokenizer.pad_token = gpt2_tokenizer.eos_token

gpt2_model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [4]:
dialogpt_name = "microsoft/DialoGPT-small"

dialogpt_tokenizer = GPT2Tokenizer.from_pretrained(dialogpt_name)
dialogpt_model = GPT2LMHeadModel.from_pretrained(dialogpt_name)

dialogpt_model.eval()


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [5]:
def generate_gpt2(prompt, max_new_tokens=80):
    inputs = gpt2_tokenizer(
        prompt,
        return_tensors="pt",
        padding=True
    )

    with torch.no_grad():
        outputs = gpt2_model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.2,
            pad_token_id=gpt2_tokenizer.eos_token_id
        )

    return gpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)


In [6]:
gpt2_conversation = """The following is a conversation with an AI assistant.
The assistant gives clear and helpful answers.

"""

def chat_gpt2(user_input):
    global gpt2_conversation

    gpt2_conversation += f"User: {user_input}\nAssistant:"
    response = generate_gpt2(gpt2_conversation)

    reply = response[len(gpt2_conversation):]
    reply = reply.split("User:")[0].strip()

    gpt2_conversation += reply + "\n"
    return reply


In [8]:
dialogpt_history_ids = None

def chat_dialogpt(user_input, max_new_tokens=80):
    global dialogpt_history_ids

    # Encode user input + EOS
    new_input_ids = dialogpt_tokenizer.encode(
        user_input + dialogpt_tokenizer.eos_token,
        return_tensors="pt"
    )

    # Append conversation history
    if dialogpt_history_ids is None:
        bot_input_ids = new_input_ids
    else:
        bot_input_ids = torch.cat(
            [dialogpt_history_ids, new_input_ids],
            dim=-1
        )

    with torch.no_grad():
        dialogpt_history_ids = dialogpt_model.generate(
            bot_input_ids,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.2,
            pad_token_id=dialogpt_tokenizer.eos_token_id
        )

    # Decode only the new tokens
    reply = dialogpt_tokenizer.decode(
        dialogpt_history_ids[:, bot_input_ids.shape[-1]:][0],
        skip_special_tokens=True
    )

    return reply.strip()


In [9]:
question = "What is Artificial Intelligence?"

print("GPT-2:")
print(chat_gpt2(question))

print("\nDialoGPT:")
print(chat_dialogpt(question))


GPT-2:


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


I'm trying to learn more about it as well as my brain, so this question will probably be answered in the next few minutes or until someone has time for another chat session (or something like that). You're welcome! [pause] Okay… you can see how far ahead of me right now there are people out here who want to talk but don't know where they could get information from by

DialoGPT:
I am a computer. I can do everything you are programmed to or have done with my hands, and the world will be yours forever! What's AI? The internet... Oh wait.. There aren't any bots yet :p lolol XD xD hahahahaaahaaahahaaaaaaa LOL roflxdxddXDD haha i was so mad at myself lool Xd


In [11]:
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("Chat ended.")
        break

    print("\nGPT-2:", chat_gpt2(user_input))
    print("DialoGPT:", chat_dialogpt(user_input))


You:  Hi



GPT-2: We've got your attention back already because our team needs some help building algorithms which should make us very happy then

(you'll notice many things happen during those three days) User : Are any robots able to recognize humans ? Assistant:"Yes"
DialoGPT: lol 3rdopuaboomt0ksyniggyasim1zeenobzzihateebohawarangmybeep.norethoffecad5up2webookendbyhenozo'jbbdowninfafa8nyyouunkingyeshi7ithonebotnotitrizeeeerycutscmiq3ff6se


You:  That’s interesting



GPT-2: They really do understand human language however its not obvious why even if one were looking around their head maybe no way anyone would tell them anything - especially after watching other bots speak English first hand ... This person might think "hey i saw him talking english lol!" . So yeah he may feel bad though :) But sometimes thinking such thoughts takes away motivation too ! He didn´t need training yet ;-) And
DialoGPT: leily 4 5ftanksintssicosallll4meekidobsyaetacipbreakawayonloovphiceandstispayfishthisch9ust 1repentongromberlordreaviteveryaphinkingwithgodoutagainalonewhofforthepottrareclogggslmultoserawnonlinebingmetrodmingtopaiGZ


You:  exit


Chat ended.
