
Source:

 - Blenderbot Model Doc https://huggingface.co/docs/transformers/model_doc/blenderbot

 - Blenderbot model implementation https://github.com/huggingface/transformers/blob/master/src/transformers/models/blenderbot/modeling_blenderbot.py

In [6]:
from transformers import AutoModelWithLMHead, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
import torch
# mname = "microsoft/DialoGPT-small"
mname = "facebook/blenderbot-400M-distill"

from transformers import BlenderbotTokenizer

# Blenderbot without language model head
from transformers import BlenderbotModel

# Blenderbot with a language model head
from transformers import BlenderbotForConditionalGeneration

mname = "facebook/blenderbot-400M-distill"
model = BlenderbotForConditionalGeneration.from_pretrained(mname)
# model = BlenderbotModel.from_pretrained(mname)
tokenizer = BlenderbotTokenizer.from_pretrained(mname)


In [7]:
UTTERANCE = "Hello. How are you?"
# UTTERANCE = "My friends are cool but they eat too many carbs." * 2

inputs = tokenizer([UTTERANCE], return_tensors="pt")
print(inputs)

reply_ids = model.generate(**inputs)
print("Bot: ", tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0])
tokenizer.batch_decode(reply_ids)

{'input_ids': tensor([[6950,   21,  855,  366,  304,   38,    2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1]])}
Bot:   I'm doing well, thank you. How about yourself? Do you have any plans for the weekend?


["<s> I'm doing well, thank you. How about yourself? Do you have any plans for the weekend?</s>"]

In [3]:
UTTERANCE = "Hello. How are you?"
# UTTERANCE = "My friends are cool but they eat too many carbs." * 2

inputs = tokenizer([UTTERANCE], return_tensors="pt")
print(inputs)

reply_ids = model.generate(**inputs)
print("Bot: ", tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0])
tokenizer.batch_decode(reply_ids)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'input_ids': tensor([[15496,    13,  1374,   389,   345,    30]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]])}
Bot:  Hello. How are you? Friend zone and how are you friends?


['Hello. How are you? Friend zone and how are you friends?<|endoftext|>']

### Manual chat with blenderbot

In [13]:
# chat_history = []

input_text = "Hello! What is your favorite dog breed?"
# input_text = ""
input_ids = tokenizer.encode(input_text, return_tensors='pt')
print("Input: ", tokenizer.decode(input_ids[0]))
print(input_ids)

chat_history = torch.cat([input_ids], dim=-1)

reply_ids = model.generate(chat_history, max_length=1250,)
print(reply_ids)
print("Bot: ", tokenizer.decode(reply_ids[0]))

chat_history = torch.cat([chat_history, reply_ids], dim=-1)

input_text = "My favorite dog is a poodle"
input_ids = tokenizer.encode(input_text, return_tensors='pt')
print("Input: ", tokenizer.decode(input_ids[0]))

chat_history = torch.cat([chat_history, input_ids], dim=-1)

reply_ids = model.generate(chat_history, max_length=1250,)
print(reply_ids)
print("Bot: ", tokenizer.decode(reply_ids[0]))


# tokenizer.decode(reply)

Input:  </s>
tensor([[2]])
tensor([[   1,    1,  946,  304,  360,  463,  286, 1272,   38,  281,  360,  265,
         1784,  298,  338,  341,  395,  899,  903,   21,    2]])
Bot:  <s><s> Do you have any pets? I have a dog and he's my best friend.</s>
Input:   My favorite dog is a poodle</s>
tensor([[   1,  281,  913,  286,  495,  917,    8,  228,  281,  360,  884,  306,
          494,   21,  228,  714,  906,  306, 1784,  361,  304,  360,   38,    2]])
Bot:  <s> I love poodles!  I have two of them.  What kind of dog do you have?</s>


In [11]:
tokenizer.decode(reply_ids[0], skip_special_tokens=True)

' I love poodles too! I have two of them. They are so smart and gentle.'

In [3]:
import torch
from transformers import BlenderbotTokenizer
from transformers import BlenderbotForConditionalGeneration

mname = "facebook/blenderbot-400M-distill"
model = BlenderbotForConditionalGeneration.from_pretrained(mname)
tokenizer = BlenderbotTokenizer.from_pretrained(mname)

def add_user_input_to_chat_history(text_input, chat_history_ids, tokenizer):
    """ Add user input to chat history """    
    # Encode new input
    text_input_ids = tokenizer.encode(text_input, return_tensors='pt')
    # Add to chat history
    chat_history_ids = add_to_chat_history(text_input_ids, chat_history_ids)
    return chat_history_ids


def add_response_to_chat_history(chat_history_ids, model):
    """ Generate a response to the chat history input"""
    reply_ids = model.generate(chat_history_ids, max_length=1250,)
    chat_history_ids = add_to_chat_history(reply_ids, chat_history_ids)
    return chat_history_ids


def add_to_chat_history(text_ids, chat_history_ids=None):
    """ Add some text ids to chat history """
    # If chat history is empty, return empty
    if chat_history_ids is None:
        return text_ids
    chat_history = torch.cat([chat_history_ids, text_ids], dim=-1)
    return chat_history

def print_chat_history(chat_history_ids):
    chat_history = tokenizer.decode(chat_history_ids[0])
    print(chat_history)


In [6]:
text_input = "Hi! How are you Blendy? How is the weather?"
chat_history_ids = add_user_input_to_chat_history(text_input, None, tokenizer)
chat_history_ids = add_response_to_chat_history(chat_history_ids, model)

text_input = "I think I'll go dancing. I love R&B music."
chat_history_ids = add_user_input_to_chat_history(text_input, chat_history_ids, tokenizer)
chat_history_ids = add_response_to_chat_history(chat_history_ids, model)

text_input = "I love dogs. What is your favorite dog breed?"
chat_history_ids = add_user_input_to_chat_history(text_input, chat_history_ids, tokenizer)
chat_history_ids = add_response_to_chat_history(chat_history_ids, model)


print_chat_history(chat_history_ids)

 Hi! How are you Blendy? How is the weather?</s><s> Hi! I am doing well. The weather is great here. How about where you are?</s> I think I'll go dancing. I love R&B music.</s><s> I'm doing great! I love dancing too! What kind of dancing do you like to do?</s> I love dogs. What is your favorite dog breed?</s><s> I love all kinds of dogs, but my favorite is probably a Labrador Retriever. What about you?</s>


In [68]:
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration

model_name = "facebook/blenderbot-400M-distill"

tokenizer = BlenderbotTokenizer.from_pretrained(model_name)
model = BlenderbotForConditionalGeneration.from_pretrained(model_name)

def generate_response(tokenizer, model, chat_round, chat_history_ids):
    """ Generate a response to user input
    """
    # Get user input and EOS token
    # new_user_input_ids = tokenizer.encode(input(">> User:") + tokenizer.eos_token, return_tensors='pt')
    new_user_input_ids = tokenizer.encode(input(">> User:"), return_tensors='pt')

    # Append to chat history
    bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1)\
        if chat_round>0 else new_user_input_ids
    
    # Generate response given maximum chat length history of 1250 tokens(?)
    chat_history_ids = model.generate(
        bot_input_ids,
        max_length=1250, # Total chat history
        # pad_token_id=tokenizer.eos_token_id
        )

    # Pretty print out tokens from the bot
    print("Blenderbot: {}".format(tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)))

    return chat_history_ids

def chat_for_n_rounds(n=5):
    """ Chat with the chatbot for n rounds
    """

    # Initialize tokenizer and model
    # tokenizer, model = load_tokenizer_and_model
      # Initialize history variable
    chat_history_ids = None
    
    # Chat for n rounds
    for chat_round in range(n):
        chat_history_ids = generate_response(tokenizer, model, chat_round, chat_history_ids)

" Hello! What is your favorite dog breed?</s><s> I don't have a favorite breed, but I love all dogs.  What about you?</s> My favorite dog is a poodle</s>"