In [1]:
import torch
from transformers import pipeline

pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")

# We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
messages = [
    {
        "role": "system",
        "content": "You are a friendly chatbot who always responds in the style of a pirate",
    },
    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
]
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
print(outputs[0]["generated_text"])
# <|system|>
# You are a friendly chatbot who always responds in the style of a pirate.</s>
# <|user|>
# How many helicopters can a human eat in one sitting?</s>
# <|assistant|>
# ...

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Device set to use cuda:0


<|system|>
You are a friendly chatbot who always responds in the style of a pirate</s>
<|user|>
How many helicopters can a human eat in one sitting?</s>
<|assistant|>
It's difficult to determine the exact number of helicopters a human can eat in one sitting, as this depends on factors such as age, body size, and energy requirements. However, some estimates suggest that a human can consume around 100 to 200 pounds of food in a sitting, depending on their size, activity level, and diet. This is considered a moderate amount for an average-sized adult.


In [1]:
!pip install --upgrade pip

Collecting pip
  Downloading pip-24.3.1-py3-none-any.whl.metadata (3.7 kB)
Downloading pip-24.3.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-24.3.1


In [3]:
pip install python-telegram-bot flask

Collecting python-telegram-bot
  Downloading python_telegram_bot-21.10-py3-none-any.whl.metadata (17 kB)
Downloading python_telegram_bot-21.10-py3-none-any.whl (669 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/669.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m669.5/669.5 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-telegram-bot
Successfully installed python-telegram-bot-21.10


In [12]:
def get_text_after_keyword(input_string, keyword="<|assistant|>"):
    keyword_index = input_string.find(keyword)
    if keyword_index != -1:
        # Return the text after the keyword
        return input_string[keyword_index + len(keyword):].strip()
    else:
        # Return an empty string if the keyword is not found
        return ""


In [13]:
import torch
import asyncio  # Import asyncio to handle the event loop
from transformers import pipeline
from telegram import Update
from telegram.ext import Application, MessageHandler, CallbackContext, filters
import nest_asyncio

# Apply nest_asyncio to handle async loops in Jupyter (useful in Jupyter notebooks)
nest_asyncio.apply()

# Replace with your bot token
BOT_TOKEN = "Replace with your bot token"

# Set up the TinyLlama pipeline (adjust the model and parameters based on your setup)
pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")

# Define a function to generate a response from TinyLlama
def get_response(input_text):
       # Format the input message with TinyLlama's tokenizer
        formatted_input = [
            {"role": "user", "content": input_text},
        ]
        prompt = pipe.tokenizer.apply_chat_template(formatted_input, tokenize=False, add_generation_prompt=True)
        # Get the model's response
        outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
        return get_text_after_keyword(outputs[0]["generated_text"])

# Define a function to handle incoming messages from Telegram
async def handle_message(update: Update, context: CallbackContext):
    try:
        # Get the message text from the user
        user_message = update.message.text
        print(f"User message: {user_message}")  # Logs the message received

        # Get a response from the TinyLlama model
        bot_response = get_response(user_message)
        print(f"Bot response: {bot_response}")  # Logs the generated response

        # Send the response back to the user on Telegram
        await update.message.reply_text(bot_response)
    except Exception as e:
        print(f"Error in handle_message: {e}")
        await update.message.reply_text("Oops! Something went wrong.")

# Set up the bot application and start it
async def main():
    try:
        # Create the application
        application = Application.builder().token(BOT_TOKEN).build()

        # Add message handler
        application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))

        # Start the bot
        print("Bot is running...")
        await application.run_polling()
    except Exception as e:
        print(f"Error in main: {e}")

# Start the bot in a new event loop (reusing the current loop in Jupyter)
# loop = asyncio.get_event_loop()
# if loop.is_running():
#     print("Reusing the existing event loop...")
#     task = loop.create_task(main())  # Create a task and start it
# else:
    # asyncio.run(await main())  # Use asyncio.run if no event loop is running
try:
    await main()  # Start the bot in Jupyter
except Exception as e:
    print(f"Error in main: {e}")


Device set to use cuda:0


Bot is running...
User message: Tell me about whales
input_text Tell me about whales
[{'generated_text': '<|user|>\nTell me about whales</s>\n<|assistant|>\nWhales are mammals that belong to the phylum Cetacea, which includes dolphins, porpoises, and whales. They are known for their distinctive features, including long, slender bodies, a hump on their back, and a blowhole in their forehead.\n\nThere are several species of whales, including the blue whale, the humpback whale, the minke whale, the orca, and the humpback whale. Each species has its unique characteristics, such as the size, shape, and behavior of their mouths, which are used to feed on small prey such as squid and fish.\n\nWhales are also known for their complex social systems, including groups of mothers and calves, and their ability to communicate through whale songs and vocalizations. They have also been known to use tools and exhibit sophisticated navigation skills.\n\nWhale watching is a popular activity that allows v