In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import pandas as pd
import time
from unittest.mock import patch

In [5]:
from unittest.mock import patch
class LlmBot:
    def __init__(self, model_name: str, system_prompt: str, hold_convo= True, history_file = "chat_history.csv"):
        self.model_name = model_name
        self.model = AutoModelForCausalLM.from_pretrained( self.model_name,
                                                          torch_dtype="auto", device_map="auto")
        self.tokenizer = AutoTokenizer.from_pretrained( self.model_name)
        self.messages = [{"role": "system", "content": system_prompt.strip()}]
        self.hold_convo = hold_convo
        self.history_file = history_file
    def template_tokenize(self):
        text = self.tokenizer.apply_chat_template( self.messages,
                                             tokenize=False, add_generation_prompt=True)
        model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
        return model_inputs
    def add_user_prompt(self, prompt):
        self.messages.append({"role": "user", "content": prompt})
    def add_bot_response (self, response):
        self.messages.append({"role": "assistant", "content": response})
    def save_chat_history(self):
        roles = []
        chats = []
        for pr in self.messages:
            roles.append(pr['role'].strip())
            chats.append(pr['content'].strip())
        df = pd.DataFrame((roles, chats)).T
        df.columns = ["roles", "content"]
        df.to_csv( self.history_file, index = False)
        print(f"Saved Chat History file to -> {self.history_file}")
    def do_reply(self, prompt):
        self.add_user_prompt(prompt)
        model_inputs = self.template_tokenize()
        generated_ids = self.model.generate( **model_inputs, max_new_tokens=512)
        reply_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
        response = self.tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]
        self.add_bot_response(response)
        return response
    def do_convo(self, save_chat = True):
        print("This is a Bot. Type <exit> to end conversation.")
        while self.hold_convo:
            prompt = input("User:>>> " ).strip()
            start_time = time.time()
            if "exit" in prompt.lower():
                break
            response = self.do_reply(prompt)
            execution_time = time.time() - start_time
            print(f"Bot: <<{execution_time:.6f} seconds>>> " + response)
        print("End of Bot conversation!!!")
        self.hold_convo = False
        if save_chat:
            self.save_chat_history()

In [6]:
model_name = "Qwen/Qwen2.5-7B-Instruct"

system_prompt = """
You are a helpful assistant. You reply only with the reply sentence and nothing else.
Keep your replies short, concise, and friendly.
"""

In [7]:
bot = LlmBot(model_name, system_prompt)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/27.8k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.56G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/243 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Mock Testing

In [8]:
# List of predefined inputs for testing
test_inputs = ["Hello", "How are you?", "I'm fine", "What about you?", "Goodbye", "exit"]

In [9]:
# Mock input to replace input functionality
with patch('builtins.input', side_effect=test_inputs):
    bot.do_convo()

This is a Bot. Type <exit> to end conversation.
Bot: <<22.039658 seconds>>> Hello! How can I help you today?
Bot: <<9.525884 seconds>>> I'm doing well, thanks for asking! How about you?
Bot: <<7.908278 seconds>>> Great to hear that! Have a nice day!
Bot: <<10.545167 seconds>>> I'm doing well, thanks! How can I assist you further?
Bot: <<5.399798 seconds>>> Goodbye! Take care!
End of Bot conversation!!!
Saved Chat History file to -> chat_history.csv


In [10]:
pd.read_csv("chat_history.csv")

Unnamed: 0,roles,content
0,system,You are a helpful assistant. You reply only wi...
1,user,Hello
2,assistant,Hello! How can I help you today?
3,user,How are you?
4,assistant,"I'm doing well, thanks for asking! How about you?"
5,user,I'm fine
6,assistant,Great to hear that! Have a nice day!
7,user,What about you?
8,assistant,"I'm doing well, thanks! How can I assist you f..."
9,user,Goodbye
