### Running enviroment is NMT
- https://mandgie.medium.com/how-to-build-your-own-chatbot-f5848ebcba8d
- https://github.com/butyr/huggingface-transformer-chatbots/blob/main/src/chatbots/blenderbot.py
- https://github.com/ruriko123/discord.py-chatbot

In [1]:
from transformers import BlenderbotSmallTokenizer, BlenderbotSmallModel

model = BlenderbotSmallModel.from_pretrained("facebook/blenderbot_small-90M",cache_dir="../cached")
tokenizer = BlenderbotSmallTokenizer.from_pretrained("facebook/blenderbot_small-90M",cache_dir="../cached")

inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt")
decoder_inputs = tokenizer("Studies show that", return_tensors="pt")  # Batch size 1
outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids)

last_hidden_states = outputs.last_hidden_state
list(last_hidden_states.shape)

  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at facebook/blenderbot_small-90M were not used when initializing BlenderbotSmallModel: ['final_logits_bias', 'lm_head.weight']
- This IS expected if you are initializing BlenderbotSmallModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BlenderbotSmallModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[1, 3, 512]

In [2]:
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration

mname = "facebook/blenderbot-400M-distill"
model = BlenderbotForConditionalGeneration.from_pretrained(mname,cache_dir="../cached")
tokenizer = BlenderbotTokenizer.from_pretrained(mname,cache_dir="../cached")
UTTERANCE = "My friends are cool but they eat too many carbs."
inputs = tokenizer([UTTERANCE], return_tensors="pt")
reply_ids = model.generate(**inputs)
print(tokenizer.batch_decode(reply_ids))



["<s> That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?</s>"]


In [3]:
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration

mname = "facebook/blenderbot-400M-distill"
model = BlenderbotForConditionalGeneration.from_pretrained(mname,cache_dir="../cached")
tokenizer = BlenderbotTokenizer.from_pretrained(mname,cache_dir="../cached")
UTTERANCE = "My friends are cool but they eat too many carbs."
print("Human: ", UTTERANCE)

inputs = tokenizer([UTTERANCE], return_tensors="pt")
reply_ids = model.generate(**inputs)
print("Bot: ", tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0])

REPLY = "I'm not sure"
print("Human: ", REPLY)

NEXT_UTTERANCE = (
    "My friends are cool but they eat too many carbs.</s> <s>That's unfortunate. "
    "Are they trying to lose weight or are they just trying to be healthier?</s> "
    "<s> I'm not sure."
)
inputs = tokenizer([NEXT_UTTERANCE], return_tensors="pt")
next_reply_ids = model.generate(**inputs)
print("Bot: ", tokenizer.batch_decode(next_reply_ids, skip_special_tokens=True)[0])

Human:  My friends are cool but they eat too many carbs.
Bot:   That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?
Human:  I'm not sure
Bot:   That's too bad. Have you tried encouraging them to change their eating habits? 


In [4]:
NEXT_UTTERANCE

"My friends are cool but they eat too many carbs.</s> <s>That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?</s> <s> I'm not sure."

In [5]:
"""
Adapted from:
https://mandgie.medium.com/how-to-build-your-own-chatbot-f5848ebcba8d
"""


from transformers import BlenderbotSmallTokenizer, BlenderbotSmallForConditionalGeneration
import os


class BlenderBot:
    def __init__(
        self,
        model_name: str ='facebook/blenderbot_small-90M',
    ):
        if not os.path.exists('./notebook/cached/blenderbot'): 
            BlenderbotSmallForConditionalGeneration.from_pretrained(model_name).save_pretrained('../cached/blenderbot')
            BlenderbotSmallTokenizer.from_pretrained(model_name).save_pretrained('../cached/blenderbot')

        self.model = BlenderbotSmallForConditionalGeneration.from_pretrained('../cached/blenderbot')
        self.tokenizer = BlenderbotSmallTokenizer.from_pretrained('../cached/blenderbot')

    def __call__(self, inputs: str) -> str:
        inputs_tokenized = self.tokenizer(inputs, return_tensors='pt')
        reply_ids = self.model.generate(**inputs_tokenized)
        reply = self.tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]

        return reply

    def run(self):
        while True:
            user_input = input("User: ")
            print("Bot:", self(user_input))

bot = BlenderBot()
bot.run()            



Bot: hi, how are you today? i just got back from a long day at the office.
Bot: i'm doing well. how about you? what do you like to do for fun?
Bot: that's good to hear. i'm not sure if i should be worried or not.


RuntimeError: cannot reshape tensor of 0 elements into shape [-1, 0] because the unspecified dimension size -1 can be any value and is ambiguous

In [2]:
from transformers import pipeline, Conversation
import os
os.environ['TRANSFORMERS_CACHE'] = '/home/chenboc1/localscratch2/chenboc1/Adver_Conv/cached/'
chatbot = pipeline(model="facebook/blenderbot_small-90M")
# chatbot = pipeline(model="microsoft/DialoGPT-medium")
conversation = Conversation("Going to the movies tonight - any suggestions?")
conversation = chatbot(conversation)
conversation.generated_responses[-1]
conversation.add_user_input("Is it an action movie?")
conversation = chatbot(conversation)
conversation.generated_responses[-1]

  from .autonotebook import tqdm as notebook_tqdm
Downloading: 100%|██████████| 1.64k/1.64k [00:00<00:00, 594kB/s]


"it's an action movie, but i'm not sure if i'll like it."

In [7]:
import torch
chatbot = pipeline(model="facebook/blenderbot-3B")

Downloading: 100%|██████████| 1.58k/1.58k [00:00<00:00, 744kB/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch


tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")

# Let's chat for 5 lines
for step in range(5):
    # encode the new user input, add the eos_token and return a tensor in Pytorch
    new_user_input_ids = tokenizer.encode(input(">> User:") + tokenizer.eos_token, return_tensors='pt')

    # append the new user input tokens to the chat history
    bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if step > 0 else new_user_input_ids

    # generated a response while limiting the total chat history to 1000 tokens, 
    chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)

    # pretty print last ouput tokens from bot
    print("DialoGPT: {}".format(tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)))


  from .autonotebook import tqdm as notebook_tqdm
Downloading: 100%|██████████| 26.0/26.0 [00:00<00:00, 8.60kB/s]
Downloading: 100%|██████████| 642/642 [00:00<00:00, 552kB/s]
Downloading: 100%|██████████| 1.04M/1.04M [00:00<00:00, 4.49MB/s]
Downloading: 100%|██████████| 456k/456k [00:00<00:00, 2.99MB/s]
Downloading: 100%|██████████| 1.75G/1.75G [00:45<00:00, 38.5MB/s]


DialoGPT: 
DialoGPT: 
DialoGPT: 
DialoGPT: 
DialoGPT: 
