In [1]:
!pip install --upgrade datasets evaluate fsspec telebot accelerate peft bitsandbytes fasttext
!pip install python-telegram-bot --upgrade

Collecting datasets
  Downloading datasets-4.1.1-py3-none-any.whl.metadata (18 kB)
Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Collecting fsspec
  Downloading fsspec-2025.9.0-py3-none-any.whl.metadata (10 kB)
Collecting telebot
  Downloading telebot-0.0.5-py3-none-any.whl.metadata (2.0 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Collecting fasttext
  Downloading fasttext-0.9.3.tar.gz (73 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.4/73.4 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pyarrow>=21.0.0 (from datasets)
  Downloading pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting pyTelegramBotAPI (from telebot)
  Downloading pytelegrambot

In [2]:
import evaluate
import json
import pickle
import requests
import base64
import re
import pandas as pd
import numpy as np
import torch
import datasets

from typing import Dict, Any
from telebot import TeleBot, types
from telebot.handler_backends import State, StatesGroup
from telebot.storage import StateMemoryStorage
from bs4 import BeautifulSoup
from tqdm.auto import tqdm, trange
from fasttext import load_model
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForMaskedLM, AutoModelForCausalLM
from transformers import DataCollatorWithPadding, DataCollatorForLanguageModeling, Trainer, TrainingArguments, BitsAndBytesConfig
from torch.utils.data import DataLoader
from torch.optim import Adam
from datasets import Dataset, DatasetDict, load_dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from torch.optim import AdamW
from torch.utils.data import DataLoader
from peft import get_peft_model, VBLoRAConfig, PeftType, prepare_model_for_kbit_training, LoraConfig

In [3]:
model_name = "Qwen/Qwen3-4B-Instruct-2507"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/99.6M [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/3.99G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/238 [00:00<?, ?B/s]

In [4]:
model_emb = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2")
cities_data_raw = pd.read_csv('IATA_cities_only.csv')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/352 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:
class CityIATASearcher:
    def __init__(self, data, model):

        self.model = model
        self.cities_data_raw = data

        self.cities = [list(self.cities_data_raw.index)[i][1][:-2] for i in range(len(self.cities_data_raw))]
        self.iatas = [list(self.cities_data_raw.index)[i][0] for i in range(len(self.cities_data_raw))]
        self.cities_data = dict(zip(self.cities, self.iatas))
        self.city_names = list(self.cities_data.keys())
        self.embeddings = self.model.encode(self.city_names, convert_to_tensor=True)

    def find_iata(self, query, threshold=0.7, top_k=3):

        query_embedding = self.model.encode([query], convert_to_tensor=True)

        similarities = cosine_similarity(query_embedding.cpu(), self.embeddings.cpu())

        results = []
        for i in np.argsort(similarities[0])[::-1][:top_k]:
            similarity_score = similarities[0][i]
            city_name = self.city_names[i]
            iata_code = self.cities_data[city_name]

            if similarity_score >= threshold:
                results.append({
                    'city': city_name,
                    'iata': iata_code,
                    'similarity': float(similarity_score)
                })

        return results

searcher = CityIATASearcher(cities_data_raw, model_emb)


In [None]:
class FlightAPI:
    def __init__(self):
        self.api_key = "..."
        self.base_url = "https://api.travelpayouts.com/aviasales/v3/prices_for_dates?"

    def search_flights(self, travel_info, departure_iata, destination_iata):
        origin = departure_iata
        destination = destination_iata
        departure_at = travel_info['date']
        one_way = travel_info['return_ticket']
        direct = travel_info['direct']
        if one_way == 1 or one_way == '1':
            one_way = 'true'
            return_at = travel_info['date_back']
        else:
            one_way = 'false'
            return_at = ''
        if direct == 1 or direct == '1':
            direct = 'true'
        else:
            direct = 'false'

        response = requests.get(f"{self.base_url}origin={origin}&destination={destination}&departure_at={departure_at}&return_at={return_at}&unique=false&sorting=price&direct={direct}&currency=rub&limit=30&page=1&one_way={one_way}&token={self.api_key}")
        return response.json()

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
import nest_asyncio
import asyncio

nest_asyncio.apply()

class QwenTravelAssistant:
    def __init__(self, model, tokenizer, searcher):
        self.tokenizer = tokenizer
        self.model = model
        self.searcher = searcher
        self.travel_api = FlightAPI()
        self.conversations = {}
        self.system_prompt = """Ты — ассистент для бронирования авиабилетов. Ничего не придумывай, если нет явной информации, пиши -1, если надо - задавай вопросы"""
        self.current_year = '2025'
        self.function_description = {
                "name": "extract_travel_info",
                "description": "Извлекает информацию о путешествии из текста",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "destination": {"type": "string", "description": "Город назначения/прибытия (если нет точной информации, то напиши -1)"},
                        "departure": {"type": "string", "description": "Город отправления/вылета (если нет точной информации, то напиши -1)"},
                        "date": {"type": "string", "description": f"Дата вылета из пункта отправления в СТРОГО формате 2022-01-01, используй {self.current_year} год если год не указан (если нет точной информации, то напиши -1)"},
                        "return_ticket": {"type": "integer", "description": "Нужен ли обратный билет (если нет точной информации, то напиши -1)"},
                        "date_back": {"type": "string", "description": f"Дата возвращения СТРОГО в формате 2022-01-01, используй {self.current_year} год если год не указан (если нет точной информации, то напиши -1)"},
                        "direct": {"type": "integer", "description": "Ищет ли пользователь прямые рейсы без пересадок (если нет точной информации, то напиши -1)"}
                    }
                }
            }


    def extract_from_tool_call(self, text):
        try:
            tool_call_match = json.loads('{'+re.findall(r'"arguments": {(.*?)}', text, re.DOTALL)[-1]+'}')
        except:
            tool_call_match = json.loads('{"destination":'+re.findall(r'{"destination": (.*?)"}', text, re.DOTALL)[-1]+'"}')
        return tool_call_match


    def extract_travel_info(self, text):
        prompt = f"""Извлеки информацию о путешествии из текста:

        Текст: "{text}"
        Ответ в формате JSON. В ответе могут быть ТОЛЬКО эти поля: destination, departure, date, return_ticket, date_back, direct.
        """

        messages = [
            {"role": "system", "content": self.system_prompt},
            {"role": "user", "content": prompt}
        ]

        inputs = self.tokenizer.apply_chat_template(
                messages,
                return_tensors="pt",
                return_dict=True,
                tools=[self.function_description],
                tool_choice="extract_travel_info"
            )

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=100,
                temperature=0.1,
                do_sample=True
            )

        json_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        response = self.extract_from_tool_call(json_response)
        return response

    def generate_response(self, user_message, chat_id):

        if chat_id in self.conversations:
            print('-----')
            print('Состояние chat id:')
            print(self.conversations[chat_id])
            print('-----')
            question = self.conversations[chat_id][-1]['content']
            new_travel_info = self.extract_travel_info(question+" "+user_message)
        else:
            new_travel_info = self.extract_travel_info(user_message)

        if all(item == -1 or item == '-1' for item in new_travel_info.values()) and chat_id in self.conversations:
            old_travel_info = self.conversations[chat_id][0]['travel_info']
            prompt = f"""Имея данную информацию о городе отправление/вылета, городе назначения/, дате вылета, есть ли обратный билет,
        дате возвращения, прямой ли рейс: {old_travel_info}, расскажи пользователю о его авиабилете. Начни со слова Assistant."""
            messages = [{"role": "system", "content": self.system_prompt},
                        {"role": "user", "content": prompt}]

            inputs = self.tokenizer.apply_chat_template(
                        messages,
                        return_tensors="pt",
                        return_dict=True)

            with torch.no_grad():
                outputs = self.model.generate(
                              **inputs,
                              max_new_tokens=100,
                              temperature=0.1,
                              do_sample=True)

            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            try:
                response = response.split("Assistant:")[-1].strip()
            except:
                response = response.split("Assistant")[-1].strip()
            self.conversations[chat_id].append({"role": "assistant", "content": ''})
            return response

        else:
            if chat_id not in self.conversations:
                self.conversations[chat_id] = []
                travel_info = new_travel_info
            else:
                old_travel_info = self.conversations[chat_id][0]['travel_info']
                for key, value in new_travel_info.items():
                    if new_travel_info[key] != '-1' and new_travel_info[key] != -1:
                        old_travel_info[key] = new_travel_info[key]
                travel_info = old_travel_info

            if travel_info['return_ticket'] == 0:
                travel_info['date_back'] = None

            self.conversations[chat_id].append({"role": "user", "content": user_message, 'travel_info': travel_info})


            if '-1' not in travel_info.values() and -1 not in travel_info.values():
                try:
                    departure_iata = self.searcher.find_iata(travel_info['departure'], threshold=0.7)[0]['iata']
                    destination_iata = self.searcher.find_iata(travel_info['destination'], threshold=0.7)[0]['iata']
                    response = self.travel_api.search_flights(travel_info, departure_iata, destination_iata)
                    response = 'https://www.aviasales.ru/' + response['data'][0]['link']
                    return f'Нашел подходящий билет! Заходи по ссылке: {response}'
                except:
                    return 'Не смог найти билеты :('
            else:
                missing_info_list = []
                for key, value in travel_info.items():
                  if value == '-1' or value == -1:
                                missing_info_list.append(key)

                short_dict = self.function_description['parameters']['properties']

                prompt = f"""Задай вопросы, чтобы получить нужную информацию по поводу этих пунктов: "{ {key: short_dict[key] for key in missing_info_list if key in short_dict} }". Напиши после слова Assistant вопросы, вопросов должно быть {len(missing_info_list)}."""


                messages = [
                    {"role": "system", "content": self.system_prompt},
                    {"role": "user", "content": prompt}
                ]

                inputs = self.tokenizer.apply_chat_template(
                        messages,
                        return_tensors="pt",
                        return_dict=True,
                    )

                with torch.no_grad():
                    outputs = self.model.generate(
                              **inputs,
                              max_new_tokens=500,
                              temperature=0.1,
                              do_sample=True
                          )

                response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
                try:
                    response = response.split("Assistant:")[-1].strip()
                except:
                    response = response.split("Assistant")[-1].strip()
                self.conversations[chat_id].append({"role": "assistant", "content": response})

                return response

In [None]:
class TravelTelegramBot:
    def __init__(self, token):
        self.token = token
        self.assistant = QwenTravelAssistant(model, tokenizer, searcher)
        self.application = Application.builder().token(token).build()

        self.application.add_handler(CommandHandler("start", self.start))
        self.application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, self.handle_message))

    async def start(self, update, context):
        welcome_text = """
        🛫 Добро пожаловать в Travel Bot!

        Я помогу вам найти и забронировать авиабилеты.

        Просто напишите:
        - Откуда вы вылетаете?
        - Куда хотите полететь?
        - Какого числа?
        - Нужен ли обратный билет, если да то когда?
        - Ищете ли вы только прямые рейсы?

        """
        await update.message.reply_text(welcome_text)

    async def handle_message(self, update, context):
        user_message = update.message.text
        chat_id = update.message.chat_id
        response = self.assistant.generate_response(user_message, chat_id)
        await update.message.reply_text(response)

    def run(self):
        print("🤖 Бот с Qwen LLM запущен...")
        self.application.run_polling()

bot = TravelTelegramBot("...")
bot.run()

🤖 Бот с Qwen LLM запущен...
Запросил ответ
-----
ответ получил:
system
Ты — ассистент для бронирования авиабилетов. Ничего не придумывай, если нет явной информации, пиши -1, если надо - задавай вопросы

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "extract_travel_info", "description": "Извлекает информацию о путешествии из текста", "parameters": {"type": "object", "properties": {"destination": {"type": "string", "description": "Город назначения/прибытия (если нет точной информации, то напиши -1)"}, "departure": {"type": "string", "description": "Город отправления/вылета (если нет точной информации, то напиши -1)"}, "date": {"type": "string", "description": "Дата вылета из пункта отправления в СТРОГО формате 2022-01-01, используй 2025 год если год не указан (если нет точной информации, то напиши -1)"}, "return_ticket": {"type": "integer", "description": "Нужен ли об

RuntimeError: Cannot close a running event loop