TELEGRAM_TOKEN и TMDB_API_KEY убраны из общего доступа

In [1]:
import logging
import requests
import threading
import sqlite3
import re
import numpy as np
import pandas as pd
import ast

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

from telegram import InlineKeyboardButton, InlineKeyboardMarkup, Update
from telegram.ext import (
    Updater,
    CommandHandler,
    MessageHandler,
    Filters,
    CallbackQueryHandler,
    CallbackContext
)

logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    level=logging.INFO
)
logger = logging.getLogger(__name__)

TELEGRAM_TOKEN = "token1"
TMDB_API_KEY = "token2"

TMDB_BASE_URL = "https://api.themoviedb.org/3"
IMAGE_BASE_URL = "https://image.tmdb.org/t/p/w500"

# ---------------------------------------------------
# 1. Локальный датасет (tmdb_5000_movies/credits) -> cosine_sim
# ---------------------------------------------------
movies_df = pd.read_csv("tmdb_5000_movies.csv")
credits_df = pd.read_csv("tmdb_5000_credits.csv")
credits_df.columns = ['id','title','cast','crew']
movies_df = movies_df.merge(credits_df, on='id')
movies_df = movies_df[['id','original_title','overview','genres','cast','keywords','crew']]
movies_df.dropna(inplace=True)

def convert(obj):
    L = []
    for i in ast.literal_eval(obj):
        L.append(i['name'])
    return L

movies_df['genres'] = movies_df['genres'].apply(convert)
movies_df['keywords'] = movies_df['keywords'].apply(convert)

def convert3(obj):
    L = []
    counter = 0
    for i in ast.literal_eval(obj):
        if counter < 3:
            L.append(i['name'])
            counter += 1
        else:
            break
    return L

movies_df['cast'] = movies_df['cast'].apply(convert3)

def fetch_director(obj):
    for i in ast.literal_eval(obj):
        if i['job'] == 'Director':
            return [i['name']]
    return []

movies_df['crew'] = movies_df['crew'].apply(fetch_director)

def clean_data(x):
    if isinstance(x, list):
        return [str.lower(i.replace(" ", "")) for i in x]
    else:
        if isinstance(x, str):
            return str.lower(x.replace(" ", ""))
        else:
            return ''

for feat in ['genres','keywords','cast','crew']:
    movies_df[feat] = movies_df[feat].apply(clean_data)

def create_soup(row):
    return ' '.join(row['keywords']) + ' ' + \
           ' '.join(row['cast']) + ' ' + \
           ' '.join(row['crew']) + ' ' + \
           ' '.join(row['genres'])

movies_df['soup'] = movies_df.apply(create_soup, axis=1)

count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(movies_df['soup'])
cosine_sim = cosine_similarity(count_matrix, count_matrix)

movies_df = movies_df.reset_index(drop=True)
indices = pd.Series(movies_df.index, index=movies_df['original_title'].str.lower())

def get_recommendations(original_title):
    title_lower = original_title.lower()
    if title_lower not in indices:
        return []
    idx = indices[title_lower]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Берём 14, т.к. [1:15] => это 14 фильмов
    sim_scores = sim_scores[1:15]
    movie_indices = [s[0] for s in sim_scores]
    return movies_df.iloc[movie_indices]

# ---------------------------------------------------
# 2. Перевод из ру названия в англ
# ---------------------------------------------------
def find_original_by_russian(rus_title):
    url = f"{TMDB_BASE_URL}/search/movie"
    params = {
        "api_key": TMDB_API_KEY,
        "language": "ru-RU",
        "query": rus_title
    }
    resp = requests.get(url, params=params)
    data = resp.json()
    results = data.get("results", [])
    if not results:
        return None
    return results[0].get("original_title", None)

# ---------------------------------------------------
# 3. Инициализация БД
# ---------------------------------------------------
def init_db():
    conn = sqlite3.connect("database.db")
    c = conn.cursor()
    c.execute('''
        CREATE TABLE IF NOT EXISTS user_movies (
            user_id INTEGER,
            movie_id INTEGER
        )
    ''')
    conn.commit()
    conn.close()

def add_watched_movie(user_id, movie_id):
    conn = sqlite3.connect("database.db")
    c = conn.cursor()
    c.execute('SELECT 1 FROM user_movies WHERE user_id=? AND movie_id=?',
              (user_id, movie_id))
    row = c.fetchone()
    if not row:
        c.execute('INSERT INTO user_movies (user_id, movie_id) VALUES (?, ?)',
                  (user_id, movie_id))
        conn.commit()
    conn.close()

def user_has_watched_movie(user_id, movie_id):
    conn = sqlite3.connect("database.db")
    c = conn.cursor()
    c.execute('SELECT 1 FROM user_movies WHERE user_id=? AND movie_id=?',
              (user_id, movie_id))
    row = c.fetchone()
    conn.close()
    return row is not None

def get_user_movies(user_id):
    conn = sqlite3.connect("database.db")
    c = conn.cursor()
    c.execute('SELECT movie_id FROM user_movies WHERE user_id=?', (user_id,))
    rows = c.fetchall()
    conn.close()
    return [r[0] for r in rows]

def clear_user_movies(user_id):
    conn = sqlite3.connect("database.db")
    c = conn.cursor()
    c.execute('DELETE FROM user_movies WHERE user_id=?', (user_id,))
    conn.commit()
    conn.close()

# ---------------------------------------------------
# Вспомогательные функции для поиска схожих фильмов
# ---------------------------------------------------
def send_movie_info_ru(update, info):
    ru_title = info['title']
    overview = info['overview']
    poster_path = info['poster_path']
    tmdb_id = info['id']
    rel_date = info.get('release_date') or "N/A"
    rating = info.get('vote_average', 0.0)

    text = (
        f"<b>{ru_title}</b>\n"
        f"Дата выхода: {rel_date}\n"
        f"Рейтинг: {rating:.1f}/10\n\n"
        f"{overview}"
    )

    watch_button = InlineKeyboardButton("Я посмотрел", callback_data=f"watched_{tmdb_id}")
    reply_markup = InlineKeyboardMarkup([[watch_button]])

    if poster_path:
        poster_url = f"{IMAGE_BASE_URL}{poster_path}"
        update.message.reply_photo(
            photo=poster_url,
            caption=text,
            parse_mode="HTML",
            reply_markup=reply_markup
        )
    else:
        update.message.reply_text(text, parse_mode="HTML", reply_markup=reply_markup)

def get_movie_details_tmdb(movie_id):
    url = f"{TMDB_BASE_URL}/movie/{movie_id}"
    params = {
        "api_key": TMDB_API_KEY,
        "language": "ru-RU"
    }
    resp = requests.get(url, params=params)
    if resp.status_code != 200:
        return None
    data = resp.json()
    return data

# ---------------------------------------------------
# 4. /similar_title -- основной блок
# ---------------------------------------------------
def similar_title_command(update, context):
    user_id = update.effective_user.id
    rus_title = " ".join(context.args).strip()
    if not rus_title:
        update.message.reply_text("Введите название фильма на русском. Пример: /similar_title Человек паук")
        return

    orig_title = find_original_by_russian(rus_title)
    if not orig_title:
        update.message.reply_text(f"Фильм '{rus_title}' не найден в TMDB.")
        return

    rec_titles = get_recommendations(orig_title) 
    print(rec_titles)
    if rec_titles.empty:
        update.message.reply_text(f"Не найдены похожие фильмы к '{orig_title}'.")
        return

    final_ids = []
    for index, row in rec_titles.iterrows():
        local_id = row['id']
        if not user_has_watched_movie(user_id, local_id):
            final_ids.append(local_id)

    if not final_ids:
        update.message.reply_text("Все похожие фильмы уже просмотрены или ничего не найдено.")
        return

    context.user_data["similar_list"] = final_ids
    context.user_data["similar_index"] = 0
    update.message.reply_text(f"Похожие на '{rus_title}' (orig: {orig_title}):")
    show_more_similar(update, context)

def show_more_similar(update, context):
    sim_ids = context.user_data.get("similar_list", [])
    idx = context.user_data.get("similar_index", 0)

    if idx >= len(sim_ids):
        update.message.reply_text("Больше фильмов нет.")
        return

    end_idx = idx + 3
    batch = sim_ids[idx:end_idx]

    for mid in batch:
        info = get_russian_info_by_id(mid)
        if info:
            send_movie_info_ru(update, info)
        else:
            update.message.reply_text(f"[ID={mid}] Нет данных.")

    context.user_data["similar_index"] = end_idx

    if end_idx < len(sim_ids):
        kb = [[InlineKeyboardButton("Показать ещё", callback_data="sim_more")]]
        rm = InlineKeyboardMarkup(kb)
        update.message.reply_text("Показать ещё?", reply_markup=rm)
    else:
        update.message.reply_text("Это все похожие фильмы.")

def get_russian_info_by_id(movie_id):
    url = f"{TMDB_BASE_URL}/movie/{movie_id}"
    params = {
        "api_key": TMDB_API_KEY,
        "language": "ru-RU"
    }
    resp = requests.get(url, params=params)
    if resp.status_code != 200:
        return None
    data = resp.json()
    return {
        "title": data.get("title",""),
        "overview": data.get("overview",""),
        "poster_path": data.get("poster_path"),
        "id": data.get("id", 0),
        "release_date": data.get("release_date","N/A"),
        "vote_average": data.get("vote_average",0.0)
    }

# ---------------------------------------------------
# 5. /search_title -- основной блок
# ---------------------------------------------------
def search_title_command(update, context):
    user_id = update.effective_user.id
    user_title = " ".join(context.args)
    if not user_title:
        update.message.reply_text("Введите название фильма. Пример: /search_title Аватар")
        return

    try:
        all_movies = get_movies_by_title(user_title)
        filtered_movies = [m for m in all_movies if not user_has_watched_movie(user_id, m['id'])]

        if filtered_movies:
            for m in filtered_movies[:5]:
                send_movie_info(update, m)
        else:
            if all_movies:
                update.message.reply_text("Все найденные фильмы уже были вами просмотрены!")
            else:
                update.message.reply_text(f"По названию '{user_title}' ничего не найдено.")
    except Exception as e:
        logger.error(f"Ошибка при поиске по названию: {e}")
        update.message.reply_text("Произошла ошибка при поиске. Попробуйте ещё раз.")

# ---------------------------------------------------
# 6. /search_genre -- основной блок
# ---------------------------------------------------
def search_genre_command(update, context):
    user_id = update.effective_user.id
    user_args = context.args

    if not user_args:
        update.message.reply_text("Введите жанр. Пример: /search_genre комедия 2015-2017")
        return

    possible_year_arg = user_args[-1]
    years_range = None

    if re.match(r"^\d{4}-\d{4}$", possible_year_arg):
        start_year_str, end_year_str = possible_year_arg.split("-")
        years_range = (start_year_str, end_year_str)
        user_genre_parts = user_args[:-1]
    elif re.match(r"^\d{4}$", possible_year_arg):
        start_year_str = possible_year_arg
        end_year_str = possible_year_arg
        years_range = (start_year_str, end_year_str)
        user_genre_parts = user_args[:-1]
    else:
        user_genre_parts = user_args

    user_genre = " ".join(user_genre_parts).strip()
    if not user_genre:
        update.message.reply_text("Пожалуйста, укажите жанр. Пример: /search_genre комедия 2020")
        return

    try:
        all_movies = get_movies_by_genre(user_genre, years_range=years_range)
        filtered_movies = [m for m in all_movies if not user_has_watched_movie(user_id, m['id'])]

        if filtered_movies:
            context.user_data["movies"] = filtered_movies
            context.user_data["next_index"] = 0
            show_more_movies(update, context)
        else:
            if all_movies:
                update.message.reply_text("Все найденные фильмы по этому жанру вы уже посмотрели!")
            else:
                update.message.reply_text(f"По жанру '{user_genre}' ничего не найдено (с учётом года).")
    except Exception as e:
        logger.error(f"Ошибка при поиске по жанру: {e}")
        update.message.reply_text("Произошла ошибка при поиске. Попробуйте ещё раз.")

def show_more_movies(update, context):
    movies = context.user_data.get("movies", [])
    next_index = context.user_data.get("next_index", 0)

    if next_index >= len(movies):
        update.message.reply_text("Больше фильмов нет по этому запросу.")
        return

    end_index = next_index + 3
    for m in movies[next_index:end_index]:
        send_movie_info(update, m)

    context.user_data["next_index"] = end_index

    if end_index < len(movies):
        keyboard = [[InlineKeyboardButton("Показать ещё", callback_data="show_more")]]
        reply_markup = InlineKeyboardMarkup(keyboard)
        update.message.reply_text(
            "Показать ещё фильмы?",
            reply_markup=reply_markup
        )
    else:
        update.message.reply_text("Это все фильмы по данному запросу.")

# ---------------------------------------------------
# 7. CALLBACK
# ---------------------------------------------------
def handle_callback(update, context):
    query = update.callback_query
    query.answer()
    data = query.data

    if data == "sim_more":
        temp_update = TempUpdate(query.message)
        show_more_similar(temp_update, context)

    elif data == "show_more":
        temp_update = TempUpdate(query.message)
        show_more_movies(temp_update, context)

    elif data.startswith("watched_"):
        try:
            mid_str = data.split("_", 1)[1]
            mid = int(mid_str)
            user_id = query.from_user.id
            add_watched_movie(user_id, mid)
            query.message.reply_text("Отлично, записал, что вы посмотрели этот фильм!")
        except Exception as ex:
            logger.error(f"Ошибка при watched_: {ex}")

class TempUpdate:
    def __init__(self, msg):
        self.message = msg

# ---------------------------------------------------
# 8. Команды start, my_watched, clear_watched
# ---------------------------------------------------
def start_command(update, context):
    update.message.reply_text(
        "Привет! Я бот для поиска фильмов.\n\n"
        "Команды:\n"
        "/search_title <название> — поиск фильмов по названию.\n"
        "/search_genre <жанр> [год или диапазон годов] — поиск по жанру.\n"
        "/similar_title <название> — найти фильмы, похожие на указанный.\n"
        "/my_watched — показать все просмотренные фильмы.\n"
        "/clear_watched — очистить список просмотренных.\n\n"
        "Например:\n"
        "/search_title Аватар\n"
        "/search_genre комедия 2019-2020\n"
        "/similar_title Человек-паук"
    )

def my_watched_command(update, context):
    user_id = update.effective_user.id
    watched_ids = get_user_movies(user_id)
    if not watched_ids:
        update.message.reply_text("Нет просмотренных фильмов.")
        return

    lines = []
    for mid in watched_ids:
        movie_data = get_movie_details_tmdb(mid)
        if movie_data:
            title_ru = movie_data.get("title","Без названия")
            rel_date = movie_data.get("release_date","N/A")
            year = rel_date[:4] if rel_date else "N/A"
            line = f"• {title_ru} ({year})"
            lines.append(line)
        else:
            lines.append(f"• [ID={mid}] Данные не найдены")

    full_text = "Ваши просмотренные фильмы:\n\n" + "\n".join(lines)
    update.message.reply_text(full_text)

def clear_watched_command(update, context):
    user_id = update.effective_user.id
    clear_user_movies(user_id)
    update.message.reply_text("Список просмотренных фильмов очищен!")

# ---------------------------------------------------
# 9. Дополнительные функции к поиску по жанру и названию
# ---------------------------------------------------
def get_movies_by_title(title):
    url = f"{TMDB_BASE_URL}/search/movie"
    params = {
        "api_key": TMDB_API_KEY,
        "language": "ru-RU",
        "query": title
    }
    response = requests.get(url, params=params)
    data = response.json()
    return data.get("results", [])

def get_movies_by_genre(genre_name, years_range=None):
    genre_id = find_genre_id(genre_name)
    if not genre_id:
        return []

    params = {
        "api_key": TMDB_API_KEY,
        "language": "ru-RU",
        "sort_by": "popularity.desc",
        "with_genres": genre_id,
    }

    if years_range:
        start_year_str, end_year_str = years_range
        params["primary_release_date.gte"] = f"{start_year_str}-01-01"
        params["primary_release_date.lte"] = f"{end_year_str}-12-31"

    url = f"{TMDB_BASE_URL}/discover/movie"
    response = requests.get(url, params=params)
    data = response.json()
    return data.get("results", [])

def find_genre_id(genre_name):
    genres = get_genres()
    lower_name = genre_name.lower()
    for g in genres:
        if lower_name in g["name"].lower():
            return g["id"]
    return None

def get_genres():
    url = f"{TMDB_BASE_URL}/genre/movie/list"
    params = {
        "api_key": TMDB_API_KEY,
        "language": "ru-RU"
    }
    response = requests.get(url, params=params)
    data = response.json()
    return data.get("genres", [])

def get_movie_details(movie_id):
    url = f"{TMDB_BASE_URL}/movie/{movie_id}"
    params = {
        "api_key": TMDB_API_KEY,
        "language": "ru-RU"
    }
    resp = requests.get(url, params=params)
    if resp.status_code == 200:
        return resp.json()
    else:
        return None

def send_movie_info(update, movie):
    title = movie.get("title", "Без названия")
    release_date = movie.get("release_date", "N/A")
    overview = movie.get("overview", "Нет описания :(")
    poster_path = movie.get("poster_path")
    vote_average = movie.get("vote_average") or 0.0
    movie_id = movie.get("id")

    caption_text = (
        f"<b>{title}</b>\n"
        f"Дата выхода: {release_date}\n"
        f"Рейтинг: {vote_average:.1f}/10\n\n"
        f"{overview}"
    )

    watch_button = InlineKeyboardButton("Я посмотрел", callback_data=f"watched_{movie_id}")
    keyboard = [[watch_button]]
    reply_markup = InlineKeyboardMarkup(keyboard)

    if poster_path:
        poster_url = f"{IMAGE_BASE_URL}{poster_path}"
        update.message.reply_photo(
            photo=poster_url,
            caption=caption_text,
            parse_mode="HTML",
            reply_markup=reply_markup
        )
    else:
        update.message.reply_text(
            caption_text,
            parse_mode="HTML",
            reply_markup=reply_markup
        )

def send_watched_movie_info(update, movie):
    title = movie.get("title", "Без названия")
    release_date = movie.get("release_date", "N/A")
    text = f"— {title} ({release_date})"
    update.message.reply_text(text)
    
# ------------------------------------------------------------------------------
# 10. Обработчик «всех прочих» сообщений
# ------------------------------------------------------------------------------
def handle_text(update, context):
    update.message.reply_text(
        "Используйте команды:\n"
        "/search_title <название фильма>\n"
        "/search_genre <жанр> [год или диапазон годов]\n"
        "/similar_title <название> — фильмы, похожие на...\n"
        "/my_watched — все просмотренные фильмы\n"
        "/clear_watched — очистить список просмотренных\n\n"
        "Пример: /search_genre боевик 2015-2017\n"
        "Пример: /search_title Аватар\n"
        "Пример: /similar_title Человек-паук"
    )

# ---------------------------------------------------
# 11. Запуск бота
# ---------------------------------------------------
def run_bot():
    init_db()
    updater = Updater(TELEGRAM_TOKEN, use_context=True)
    dp = updater.dispatcher

    dp.add_handler(CommandHandler("start", start_command))
    dp.add_handler(CommandHandler("search_title", search_title_command))
    dp.add_handler(CommandHandler("search_genre", search_genre_command))
    dp.add_handler(CommandHandler("similar_title", similar_title_command))
    dp.add_handler(CommandHandler("my_watched", my_watched_command))
    dp.add_handler(CommandHandler("clear_watched", clear_watched_command))

    dp.add_handler(CallbackQueryHandler(handle_callback))

    dp.add_handler(MessageHandler(Filters.text & ~Filters.command, handle_text))

    updater.start_polling()
    print("Бот запущен")

In [2]:
import threading

bot_thread = threading.Thread(target=run_bot, daemon=True)
bot_thread.start()

2025-01-22 19:20:51,906 - apscheduler.scheduler - INFO - Scheduler started


Бот запущен
         id                       original_title  \
79    10138                           Iron Man 2   
31    68721                           Iron Man 3   
16    24428                         The Avengers   
182  102899                              Ant-Man   
26   271110           Captain America: Civil War   
7     99861              Avengers: Age of Ultron   
85   100402  Captain America: The Winter Soldier   
169    1771   Captain America: The First Avenger   
511   36657                                X-Men   
174    1724                  The Incredible Hulk   
126   76338                 Thor: The Dark World   
203   36658                                   X2   
33    36668                X-Men: The Last Stand   
94   118340              Guardians of the Galaxy   

                                              overview  \
79   With the world now aware of his dual life as t...   
31   When Tony Stark's world is torn apart by a for...   
16   When an unexpected enemy eme