### Running enviroment is NMT
- https://mandgie.medium.com/how-to-build-your-own-chatbot-f5848ebcba8d
- https://github.com/butyr/huggingface-transformer-chatbots/blob/main/src/chatbots/blenderbot.py

In [3]:
from transformers import BlenderbotSmallTokenizer, BlenderbotSmallModel

model = BlenderbotSmallModel.from_pretrained("facebook/blenderbot_small-90M",cache_dir="../cached")
tokenizer = BlenderbotSmallTokenizer.from_pretrained("facebook/blenderbot_small-90M",cache_dir="../cached")

inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt")
decoder_inputs = tokenizer("Studies show that", return_tensors="pt")  # Batch size 1
outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids)

last_hidden_states = outputs.last_hidden_state
list(last_hidden_states.shape)

Downloading: 100%|██████████| 1.51k/1.51k [00:00<00:00, 659kB/s]
Downloading: 100%|██████████| 350M/350M [00:04<00:00, 72.4MB/s] 
Some weights of the model checkpoint at facebook/blenderbot_small-90M were not used when initializing BlenderbotSmallModel: ['lm_head.weight', 'final_logits_bias']
- This IS expected if you are initializing BlenderbotSmallModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BlenderbotSmallModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Downloading: 100%|██████████| 964k/964k [00:00<00:00, 6.29MB/s]
Downloading: 100%|██████████| 345k/345k [00:00<00:00, 2.79MB/s]
Downloading: 100%|██████████| 205/205 [00:00<00:00, 155kB/s]
Downloading: 100%|██████████| 99.0/99.0 [0

[1, 3, 512]

In [4]:
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration

mname = "facebook/blenderbot-400M-distill"
model = BlenderbotForConditionalGeneration.from_pretrained(mname,cache_dir="../cached")
tokenizer = BlenderbotTokenizer.from_pretrained(mname,cache_dir="../cached")
UTTERANCE = "My friends are cool but they eat too many carbs."
inputs = tokenizer([UTTERANCE], return_tensors="pt")
reply_ids = model.generate(**inputs)
print(tokenizer.batch_decode(reply_ids))

Downloading: 100%|██████████| 1.57k/1.57k [00:00<00:00, 959kB/s]
Downloading: 100%|██████████| 730M/730M [00:09<00:00, 74.8MB/s] 
Downloading: 100%|██████████| 127k/127k [00:00<00:00, 1.50MB/s]
Downloading: 100%|██████████| 62.9k/62.9k [00:00<00:00, 1.02MB/s]
Downloading: 100%|██████████| 1.15k/1.15k [00:00<00:00, 639kB/s]
Downloading: 100%|██████████| 16.0/16.0 [00:00<00:00, 12.4kB/s]
Downloading: 100%|██████████| 772/772 [00:00<00:00, 315kB/s]


["<s> That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?</s>"]


In [7]:
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration

mname = "facebook/blenderbot-400M-distill"
model = BlenderbotForConditionalGeneration.from_pretrained(mname,cache_dir="../cached")
tokenizer = BlenderbotTokenizer.from_pretrained(mname,cache_dir="../cached")
UTTERANCE = "My friends are cool but they eat too many carbs."
print("Human: ", UTTERANCE)

inputs = tokenizer([UTTERANCE], return_tensors="pt")
reply_ids = model.generate(**inputs)
print("Bot: ", tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0])

REPLY = "I'm not sure"
print("Human: ", REPLY)

NEXT_UTTERANCE = (
    "My friends are cool but they eat too many carbs.</s> <s>That's unfortunate. "
    "Are they trying to lose weight or are they just trying to be healthier?</s> "
    "<s> I'm not sure."
)
inputs = tokenizer([NEXT_UTTERANCE], return_tensors="pt")
next_reply_ids = model.generate(**inputs)
print("Bot: ", tokenizer.batch_decode(next_reply_ids, skip_special_tokens=True)[0])

Human:  My friends are cool but they eat too many carbs.
Bot:   That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?
Human:  I'm not sure
Bot:   That's too bad. Have you tried encouraging them to change their eating habits? 


In [8]:
NEXT_UTTERANCE

"My friends are cool but they eat too many carbs.</s> <s>That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?</s> <s> I'm not sure."

In [None]:
"""
Adapted from:
https://mandgie.medium.com/how-to-build-your-own-chatbot-f5848ebcba8d
"""


from transformers import BlenderbotSmallTokenizer, BlenderbotSmallForConditionalGeneration
import os


class BlenderBot:
    def __init__(
        self,
        model_name: str ='facebook/blenderbot_small-90M',
    ):
        if not os.path.exists('./models/blenderbot'):
            BlenderbotSmallForConditionalGeneration.from_pretrained(model_name).save_pretrained('./models/blenderbot')
            BlenderbotSmallTokenizer.from_pretrained(model_name).save_pretrained('./models/blenderbot')

        self.model = BlenderbotSmallForConditionalGeneration.from_pretrained('./models/blenderbot')
        self.tokenizer = BlenderbotSmallTokenizer.from_pretrained('./models/blenderbot')

    def __call__(self, inputs: str) -> str:
        inputs_tokenized = self.tokenizer(inputs, return_tensors='pt')
        reply_ids = self.model.generate(**inputs_tokenized)
        reply = self.tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]

        return reply

    def run(self):
        while True:
            user_input = input("User: ")
            print("Bot:", self(user_input))