In [3]:
import os
# Отключает все логи TensorFlow (0 - все, 1 - предупреждения, 2 - ошибки, 3 - критичные ошибки)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer, BertModel
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Dense, Dropout, BatchNormalization
from genre_recommender import preprocess_popularity, recommend_top_movies_by_genres, show_recommendations, get_all_genres
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics.pairwise import euclidean_distances, cosine_distances
from scipy.sparse import csr_matrix
import implicit
from implicit.als import AlternatingLeastSquares
from sklearn.preprocessing import LabelEncoder, normalize
from sklearn.decomposition import NMF, TruncatedSVD
from scipy.sparse import coo_matrix
from tqdm import tqdm
import json
import os
import math
import glob
import dill
import matplotlib.pyplot as plt
import hdbscan
from sklearn.model_selection import train_test_split
from keras.optimizers import Adam
from keras.losses import MeanSquaredError
import psutil
import time
import tracemalloc
import multiprocessing as mp
from sklearn.cluster import DBSCAN
import traceback
import sys
import gc
from sklearn.cluster import SpectralClustering
from multiprocessing import Process, Queue
from sklearn.mixture import GaussianMixture
import joblib
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
from implicit.nearest_neighbours import bm25_weight
from datetime import datetime

## Загрузка данных (т.к вектора были уже получены в предыдущих заданиях)

In [5]:
movies = pd.read_csv('content/films/movies.csv')
ratings = pd.read_csv('content/films/ratings.csv')
tags = pd.read_csv('content/films/tags.csv')
genome_tags = pd.read_csv('content/films/genome-tags.csv')
genome_scores = pd.read_csv('content/films/genome-scores.csv')
importance_df = pd.DataFrame(columns=['movieId', 'importance_score'])

# Проверим размеры
print("movies:", movies.shape)
print("ratings:", ratings.shape)
print("tags:", tags.shape)
print("genome_tags:", genome_tags.shape)
print("genome_scores:", genome_scores.shape)

movies: (86537, 3)
ratings: (33832162, 4)
tags: (2328315, 4)
genome_tags: (1128, 2)
genome_scores: (18472128, 3)


In [6]:
# Преобразуем genome_scores в широкую таблицу: movieId × tagId
movie_tag_matrix = genome_scores.pivot(index='movieId', columns='tagId', values='relevance').fillna(0)

# Добавим к movie_tag_matrix названия тегов
tag_id_to_name = genome_tags.set_index('tagId')['tag']
movie_tag_matrix.columns = movie_tag_matrix.columns.map(tag_id_to_name)

In [7]:
X_content = movie_tag_matrix.values
scaler = MinMaxScaler()
X_content_scaled = scaler.fit_transform(X_content)

In [162]:
X_train_c, X_test_c = train_test_split(X_content_scaled, test_size=0.1, random_state=42)

In [163]:
# Autoencoder для movie_tag_matrix

input_dim = X_content_scaled.shape[1]
latent_dim = 64

# Архитектура автоэнкодера
input_layer = Input(shape=(input_dim,))
encoded = Dense(256, activation='relu')(input_layer)
encoded = Dense(128, activation='relu')(encoded)
latent = Dense(latent_dim, activation='relu')(encoded)

decoded = Dense(128, activation='relu')(latent)
decoded = Dense(256, activation='relu')(decoded)
output_layer = Dense(input_dim, activation='sigmoid')(decoded)

autoencoder_content = Model(input_layer, output_layer)
encoder_content = Model(input_layer, latent)

autoencoder_content.compile(optimizer='adam', loss='mse')
autoencoder_content.fit(X_train_c, X_train_c, 
                        validation_data=(X_test_c, X_test_c), 
                        epochs=30, batch_size=64, verbose=1)

# Получаем movie_content_vectors
movie_content_vector = encoder_content.predict(X_content_scaled)

Epoch 1/30
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - loss: 0.0461 - val_loss: 0.0123
Epoch 2/30
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 0.0111 - val_loss: 0.0091
Epoch 3/30
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 0.0085 - val_loss: 0.0075
Epoch 4/30
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 0.0073 - val_loss: 0.0067
Epoch 5/30
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 0.0066 - val_loss: 0.0062
Epoch 6/30
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 0.0061 - val_loss: 0.0060
Epoch 7/30
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 0.0057 - val_loss: 0.0056
Epoch 8/30
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - loss: 0.0055 - val_loss: 0.0054
Epoch 9/30
[1m231/231[0m [32m━━━━━

In [4]:
data = np.load('content/films/clasters/movie_content_vector.npz')
movie_content_vector = data['vectors']
movie_ids = data['movie_ids']


In [5]:
user_encoder = LabelEncoder()
user_encoder.fit(ratings['userId'])
joblib.dump(user_encoder, 'user_encoder.pkl')

item_encoder = LabelEncoder()
item_encoder.fit(ratings['movieId'])
joblib.dump(item_encoder, 'item_encoder.pkl')

# Применение энкодеров
user_encoder = joblib.load('user_encoder.pkl')
item_encoder = joblib.load('item_encoder.pkl')

ratings['user_idx'] = user_encoder.transform(ratings['userId'])
ratings['item_idx'] = item_encoder.transform(ratings['movieId'])

In [6]:
print(f"Создаем разреженную матрицу ...")

ratings_coo = coo_matrix(
    (ratings['rating'].astype(float), (ratings['user_idx'], ratings['item_idx']))
)

ratings_csr = ratings_coo.tocsr()  # CSR формат

Создаем разреженную матрицу ...


In [7]:
ratings_csr.shape

(330975, 83239)

In [8]:
# Применяем TruncatedSVD для понижения размерности
svd = TruncatedSVD(n_components=64, random_state=42)
user_vector = svd.fit_transform(ratings_csr)  # Размерность: (n_users × 64)

item_matrix = svd.components_.T  # Размерность: (n_items × 64)

print(f"Размерность user_matrix: {user_vector.shape}")
print(f"Размерность item_matrix: {item_matrix.shape}")


Размерность user_matrix: (330975, 64)
Размерность item_matrix: (83239, 64)


## Паттерн 1. Ансамбль ансамблей

Суть: Использование несколих разных методов на каждом из векторов отдельно(контентном и коллаборативном)ю Затем асамблирование результатов методов внутри каждого вектора. А затем асамблирование этих векторных ансамблей

In [9]:
movie_vectors_reduced = movie_content_vector

In [10]:
# Функция для получения 10 самых похожих фильмов
def get_similar_movies(movie_id, movie_vectors_reduced, top_n=30):
    cosine_sim = cosine_similarity(movie_vectors_reduced)  # Косинусное сходство
    movie_index = movie_id - 1  # Индексы начинаются с 0, а movie_id - с 1
    sim_scores = list(enumerate(cosine_sim[movie_index]))  # Список сходства
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)  # Сортируем
    return sim_scores[1:top_n+1]  # Топ-N схожих фильмов


In [11]:
def cluster_movies(movie_vectors_reduced, n_clusters=100):
    clustering_model = AgglomerativeClustering(n_clusters=n_clusters)
    cluster_labels = clustering_model.fit_predict(movie_vectors_reduced)
    return cluster_labels

In [12]:
knn_model = NearestNeighbors(n_neighbors=51, metric='cosine')  # 50 + 1, потому что первый будет сам фильм
knn_model.fit(movie_vectors_reduced)

In [13]:
def get_similar_movies_knn(movie_id, movie_vectors, top_n=30):
    # Получаем индекс фильма
    movie_index = movies[movies['movieId'] == movie_id].index[0]

    # Находим ближайшие фильмы
    distances, indices = knn_model.kneighbors([movie_vectors[movie_index]], n_neighbors=top_n+1)

    # Возвращаем список (индекс, 1 - расстояние) — кроме самого себя
    similar_movies = []
    for i in range(1, len(indices[0])):  # пропускаем 0-й элемент (сам фильм)
        idx = indices[0][i]
        similarity_score = 1 - distances[0][i]  # чем меньше расстояние, тем выше сходство
        similar_movies.append((idx, similarity_score))
    
    return similar_movies

In [14]:
def recommend_movies_with_strict_genres_filter(movie_ids, top_n=10, internal_top_n=30, mode='custom'):
    all_recommendations = []

    for movie_id in movie_ids:
        original_movie_row = movies[movies['movieId'] == movie_id]
        if original_movie_row.empty:
            continue
        
        original_movie_title = original_movie_row['title'].values[0]
        original_movie_genres = original_movie_row['genres'].values[0]
        original_genres_set = set(original_movie_genres.split('|'))

        print(f"Получаем рекомендации для фильма с ID: {movie_id} (Название: {original_movie_title}) (Жанры: {original_movie_genres})")

        if mode == 'knn':
            similar_movies = get_similar_movies_knn(movie_id, movie_vectors_reduced, top_n=internal_top_n)
        else:
            similar_movies = get_similar_movies(movie_id, movie_vectors_reduced, top_n=internal_top_n)

        filtered_similar_movies = []
        for idx, score in similar_movies:
            movie_row = movies.iloc[idx]
            movie_title = movie_row['title']
            movie_genres = movie_row['genres']

            if pd.isna(movie_title) or pd.isna(movie_genres):
                continue

            movie_genres_set = set(movie_genres.split('|'))

            if not original_genres_set.intersection(movie_genres_set):
                score = 0

            filtered_similar_movies.append({
                'movie_id': movies.iloc[idx]['movieId'],
                'title': movie_title,
                'genres': movie_genres,
                'similarity_score': score
            })

        # Применяем адаптивные веса
        n = len(filtered_similar_movies)
        if n > 0:
            weights = np.linspace(20, 1, n).round()
            for i, rec in enumerate(filtered_similar_movies):
                rec['weight'] = rec['similarity_score'] * weights[i]

        all_recommendations.extend(filtered_similar_movies)

    recommendations_df = pd.DataFrame(all_recommendations)
    if recommendations_df.empty:
        return pd.DataFrame(columns=['movie_id', 'title', 'genres', 'weight'])

    final_scores = recommendations_df.groupby('movie_id').agg({'weight': 'sum'}).reset_index()
    final_recommendations_df = final_scores.sort_values('weight', ascending=False).head(top_n)

    final_recommendations_df = final_recommendations_df.merge(
        movies[['movieId', 'title', 'genres']],
        left_on='movie_id',
        right_on='movieId',
        how='left'
    ).dropna(subset=['title', 'genres'])

    return final_recommendations_df[['movie_id', 'title', 'genres', 'weight']]

In [17]:
def recommend_movies_by_cluster_filtered(movie_ids, movie_vectors_reduced, movies, cluster_labels, top_n=10):
    all_recommendations = []

    for movie_id in movie_ids:
        try:
            movie_idx = movies[movies['movieId'] == movie_id].index[0]
            movie_title = movies.iloc[movie_idx]['title']
            movie_genres = set(movies.iloc[movie_idx]['genres'].split('|'))

            cluster_label = cluster_labels[movie_idx]
            same_cluster_indices = np.where(cluster_labels == cluster_label)[0]
            same_cluster_indices = same_cluster_indices[same_cluster_indices != movie_idx]

            if len(same_cluster_indices) == 0:
                continue

            distances = euclidean_distances(
                movie_vectors_reduced[movie_idx].reshape(1, -1),
                movie_vectors_reduced[same_cluster_indices]
            )[0]
            similarities = 1 / (1 + distances)

            for idx, score in zip(same_cluster_indices, similarities):
                title = movies.iloc[idx]['title']
                genres = movies.iloc[idx]['genres']
                genres_set = set(genres.split('|'))

                if pd.isna(title) or pd.isna(genres):
                    continue

                if not movie_genres.intersection(genres_set):
                    score = 0

                all_recommendations.append({
                    'movie_id': movies.iloc[idx]['movieId'],
                    'title': title,
                    'genres': genres,
                    'similarity_score': score
                })
        except IndexError:
            continue

    all_recommendations_df = pd.DataFrame(all_recommendations)
    if all_recommendations_df.empty:
        return all_recommendations_df

    all_recommendations_df = all_recommendations_df.groupby(['movie_id', 'title', 'genres']).agg({'similarity_score': 'sum'}).reset_index()
    all_recommendations_df = all_recommendations_df.sort_values('similarity_score', ascending=False).head(top_n)

    # Присваиваем адаптивные веса
    n = len(all_recommendations_df)
    weights = np.linspace(20, 1, n).round()
    all_recommendations_df['weight'] = weights

    return all_recommendations_df

In [18]:
def ensemble_recommendation(movie_ids_to_check, top_n=30):
    # Получаем рекомендации от KNN с фильтрацией по жанрам
    rec_knn = recommend_movies_with_strict_genres_filter(
        movie_ids=movie_ids_to_check,
        top_n=top_n,
        internal_top_n=50,
        mode='knn'  # для KNN
    )

    # Получаем рекомендации с использованием простого косинусного сходства
    rec_cosine = recommend_movies_with_strict_genres_filter(
        movie_ids=movie_ids_to_check,
        top_n=top_n,
        internal_top_n=50,
        mode='custom'  # обычное косинусное сходство
    )

    cluster_labels = cluster_movies(movie_vectors_reduced, n_clusters=100)
    
    # Получаем рекомендации от кластеризации
    rec_cluster = recommend_movies_by_cluster_filtered(
        movie_ids=movie_ids_to_check,
        movie_vectors_reduced=movie_content_vector,
        movies=movies,
        cluster_labels=cluster_labels,
        top_n=top_n
    )

    # Объединяем все рекомендации
    all_recs = pd.concat([rec_knn, rec_cosine, rec_cluster], ignore_index=True)

    # Суммируем веса по movie_id
    combined_scores = all_recs.groupby('movie_id').agg({'weight': 'sum'}).reset_index()

    # Добавляем названия и жанры
    combined_scores = combined_scores.merge(
        movies[['movieId', 'title', 'genres']],
        left_on='movie_id',
        right_on='movieId',
        how='left'
    ).dropna(subset=['title', 'genres'])

    combined_scores['final_score'] = combined_scores['weight']
    # Возвращаем топ-N рекомендаций
    return combined_scores.sort_values('final_score', ascending=False).head(top_n)[['movie_id', 'title', 'genres', 'final_score']]

In [137]:
# Рекомендации для списка фильмов
movie_ids_to_check = [1, 10, 100]
recommendation = ensemble_recommendation(movie_ids_to_check, top_n=30)

Получаем рекомендации для фильма с ID: 1 (Название: Toy Story (1995)) (Жанры: Adventure|Animation|Children|Comedy|Fantasy)
Получаем рекомендации для фильма с ID: 10 (Название: GoldenEye (1995)) (Жанры: Action|Adventure|Thriller)
Получаем рекомендации для фильма с ID: 100 (Название: City Hall (1996)) (Жанры: Drama|Thriller)
Получаем рекомендации для фильма с ID: 1 (Название: Toy Story (1995)) (Жанры: Adventure|Animation|Children|Comedy|Fantasy)
Получаем рекомендации для фильма с ID: 10 (Название: GoldenEye (1995)) (Жанры: Action|Adventure|Thriller)
Получаем рекомендации для фильма с ID: 100 (Название: City Hall (1996)) (Жанры: Drama|Thriller)


In [138]:
recommendation

Unnamed: 0,movie_id,title,genres,final_score
18,2881,Double Jeopardy (1999),Action|Crime|Drama|Thriller,59.506148
27,4533,"Return of the Living Dead, The (1985)",Comedy|Horror|Sci-Fi,57.478096
12,1591,Spawn (1997),Action|Adventure|Sci-Fi|Thriller,56.488427
16,2794,European Vacation (aka National Lampoon's Euro...,Adventure|Comedy|Romance,54.483886
22,3696,Night of the Creeps (1986),Comedy|Horror|Sci-Fi|Thriller,50.336165
37,7370,A Foreign Affair (2003),Comedy|Drama|Romance,47.939191
35,6695,Jeepers Creepers 2 (2003),Horror|Thriller,45.390136
33,5378,Star Wars: Episode II - Attack of the Clones (...,Action|Adventure|Sci-Fi|IMAX,44.385234
49,59143,Super High Me (2007),Comedy|Documentary,38.909281
42,26750,Quigley Down Under (1990),Adventure|Drama|Western,38.365734


In [131]:
# Сохраняем DataFrame
with open('content/films/clasters/recommendation.dill', 'wb') as f:
    dill.dump(recommendation, f)

In [27]:
als_model = AlternatingLeastSquares(factors=64, regularization=0.1, iterations=15)
als_model.fit(bm25_weight(ratings_csr.T).T)  # bm25 для нормализации


  check_blas_config()


  0%|          | 0/15 [00:00<?, ?it/s]

In [28]:
def recommend_by_als(user_id, top_n=30):
    # Преобразуем user_id в индекс
    user_idx = user_encoder.transform([user_id])[0]

    # Получаем строку взаимодействий для этого пользователя
    user_interaction = ratings_csr[user_idx]

    # Рекомендации с использованием ALS
    ids, scores = als_model.recommend(
        userid=user_idx,
        user_items=user_interaction,  # Передаем строку взаимодействий
        N=top_n
    )

    # Формируем список рекомендаций
    recommendations = []
    for item_idx, score in zip(ids, scores):
        movie_id = item_encoder.inverse_transform([item_idx])[0]  # Обратное преобразование item_id
        recommendations.append({'movieId': movie_id, 'score': score})

    all_recommendations_df = pd.DataFrame(recommendations)
    all_recommendations_df['weight'] = np.linspace(20, 1, len(all_recommendations_df)).round()

    return all_recommendations_df

In [20]:
knn_model = NearestNeighbors(n_neighbors=51, metric='cosine')  # 50 + 1, потому что первый будет сам фильм
knn_model.fit(movie_vectors_reduced)

In [21]:
def recommend_by_knn(user_id, top_n=30):
    user_idx = user_encoder.transform([user_id])[0]
    user_vec = user_vector[user_idx].reshape(1, -1)

    # Находим ближайших пользователей
    distances, indices = knn_model.kneighbors(user_vec)

    # Собираем фильмы, которые понравились соседям
    neighbor_ids = indices.flatten()[1:]  # исключаем самого пользователя
    neighbor_ratings = ratings[ratings['user_idx'].isin(neighbor_ids)]

    # Агрегируем по среднему рейтингу
    movie_scores = (
        neighbor_ratings.groupby('movieId')['rating']
        .mean()
        .sort_values(ascending=False)
        .head(top_n)
        .reset_index()
        .rename(columns={'rating': 'score'})
    )

    all_recommendations_df = movie_scores.copy()
    all_recommendations_df['weight'] = np.linspace(20, 1, len(all_recommendations_df)).round()

    return all_recommendations_df

In [22]:
def recommend_by_svd(user_id, top_n=30):
    user_idx = user_encoder.transform([user_id])[0]
    user_vec = user_vector[user_idx].reshape(1, -1)  # (1, 64)

    # Вычисляем косинусное сходство с векторами всех фильмов
    similarities = cosine_similarity(user_vec, item_matrix).flatten()

    # Получаем топ-N рекомендаций (по индексам)
    top_indices = similarities.argsort()[::-1][:top_n]

    recommendations = []
    for item_idx in top_indices:
        movie_id = item_encoder.inverse_transform([item_idx])[0]
        score = similarities[item_idx]
        recommendations.append({'movieId': movie_id, 'score': score})

    all_recommendations_df = pd.DataFrame(recommendations)
    all_recommendations_df['weight'] = np.linspace(20, 1, len(all_recommendations_df)).round()

    return all_recommendations_df

In [33]:
def ensemble_user_based(user_id, top_n=30):
    # Получаем рекомендации для каждого метода
    rec_svd = recommend_by_svd(user_id, top_n)
    rec_als = recommend_by_als(user_id, top_n)
    rec_knn = recommend_by_knn(user_id, top_n)

    # Убедимся, что столбец movie_id есть в каждом DataFrame
    rec_svd = rec_svd.rename(columns={'movieId': 'movie_id'})
    rec_als = rec_als.rename(columns={'movieId': 'movie_id'})
    rec_knn = rec_knn.rename(columns={'movieId': 'movie_id'})

    # Объединяем все рекомендации в один DataFrame
    all_recs = pd.concat([rec_svd, rec_als, rec_knn], ignore_index=True)

    # Агрегируем по movie_id, рассчитывая среднее значение для оценок и суммируя веса
    final_recs = (
        all_recs.groupby('movie_id')
        .agg({'score': 'mean', 'weight': 'sum'})
        .reset_index()
    )

    # Расчитываем итоговый рейтинг (score * weight)
    final_recs['final_score'] = final_recs['score'] * final_recs['weight']

    # Добавляем названия и жанры фильмов
    final_recs = final_recs.merge(
        movies[['movieId', 'title', 'genres']], 
        left_on='movie_id', 
        right_on='movieId', 
        how='left'
    ).dropna(subset=['title', 'genres'])  # Удаляем строки без названия или жанра

    # Сортируем по финальному рейтингу и возвращаем топ-N рекомендаций
    final_recs = final_recs.sort_values('final_score', ascending=False).head(top_n)

    # Возвращаем в нужном формате
    return final_recs[['movie_id', 'title', 'genres', 'final_score']]


In [34]:
recommendation_user = ensemble_user_based(user_id =1)

In [35]:
recommendation_user

Unnamed: 0,movie_id,title,genres,final_score
11,1982,Halloween (1978),Horror,100.0
17,2736,Brighton Beach Memoirs (1986),Comedy,95.0
18,3034,Robin Hood (1973),Adventure|Animation|Children|Comedy|Musical,95.0
13,2085,101 Dalmatians (One Hundred and One Dalmatians...,Adventure|Animation|Children,90.0
22,3916,Remember the Titans (2000),Drama,85.0
14,2087,Peter Pan (1953),Animation|Children|Fantasy|Musical,85.0
34,5971,My Neighbor Totoro (Tonari no Totoro) (1988),Animation|Children|Drama|Fantasy,80.0
20,3163,Topsy-Turvy (1999),Comedy|Drama|Musical,75.0
72,105355,Blue Is the Warmest Color (La vie d'Adèle) (2013),Drama|Romance,75.0
3,661,James and the Giant Peach (1996),Adventure|Animation|Children|Fantasy|Musical,70.0


In [152]:
def final_ensemble(user_df, content_df, top_n=30, user_weight=0.6, content_weight=0.4):
    # Приводим к одному названию столбца
    user_df = user_df.rename(columns={'movieId': 'movie_id'})

    # Нормализуем final_score от 0 до 1
    user_df['normalized_score'] = (user_df['final_score'] - user_df['final_score'].min()) / (user_df['final_score'].max() - user_df['final_score'].min())
    content_df['normalized_score'] = (content_df['final_score'] - content_df['final_score'].min()) / (content_df['final_score'].max() - content_df['final_score'].min())

    # Применяем веса
    user_df['weighted_score'] = user_df['normalized_score'] * user_weight
    content_df['weighted_score'] = content_df['normalized_score'] * content_weight

    # Объединяем
    combined = pd.concat([user_df[['movie_id', 'title', 'genres', 'weighted_score']],
                          content_df[['movie_id', 'title', 'genres', 'weighted_score']]],
                         ignore_index=True)

    # Агрегируем по movie_id
    final = (
        combined.groupby('movie_id')
        .agg({'weighted_score': 'sum'})
        .reset_index()
    )

    # Добавляем инфу о фильмах
    final = final.merge(
        combined[['movie_id', 'title', 'genres']].drop_duplicates('movie_id'),
        on='movie_id',
        how='left'
    ).dropna(subset=['title', 'genres'])

    # Возвращаем топ-N
    return final.sort_values('weighted_score', ascending=False).head(top_n)[['movie_id', 'title', 'genres', 'weighted_score']]

In [154]:
final_ensemble = final_ensemble(recommendation_user, recommendation, top_n=15)
final_ensemble

Unnamed: 0,movie_id,title,genres,weighted_score
15,1982,Halloween (1978),Horror,0.6
20,2736,Brighton Beach Memoirs (1986),Comedy,0.566881
23,3034,Robin Hood (1973),Adventure|Animation|Children|Comedy|Musical,0.566881
16,2085,101 Dalmatians (One Hundred and One Dalmatians...,Adventure|Animation|Children,0.533761
27,3916,Remember the Titans (2000),Drama,0.500642
17,2087,Peter Pan (1953),Animation|Children|Fantasy|Musical,0.500642
36,5971,My Neighbor Totoro (Tonari no Totoro) (1988),Animation|Children|Drama|Fantasy,0.467523
25,3163,Topsy-Turvy (1999),Comedy|Drama|Musical,0.434404
58,105355,Blue Is the Warmest Color (La vie d'Adèle) (2013),Drama|Romance,0.434404
4,661,James and the Giant Peach (1996),Adventure|Animation|Children|Fantasy|Musical,0.401284


## Паттерн 2. Простое объединение контентных и коллаборативных признаков.

Суть паттерна :
Мы объединяем признаки из movie_content_vector (контент) и item_matrix (коллаборативные, например, из SVD/ALS) в единый вектор для каждого фильма. После этого мы можем использовать любые методы (например, косинусное сходство, KNN, кластеризацию) уже на этом объединённом векторе.

In [175]:
# movie_tag_matrix.index — это movieId'ы, соответствующие этому вектору
movie_ids_with_tags = movie_tag_matrix.index.to_numpy()

# Получаем индексы этих movieId'ов в item_matrix с помощью item_encoder
item_indices = item_encoder.transform(movie_ids_with_tags)

# Отбираем из item_matrix только нужные строки
filtered_item_matrix = item_matrix[item_indices]

# Проверим:
print("Размерность movie_content_vector:", movie_content_vector.shape)
print("Размерность filtered_item_matrix:", filtered_item_matrix.shape)

# Объединяем по признакам (горизонтально)
hybrid_movie_vector = np.hstack([
    movie_content_vector,
    filtered_item_matrix
])
print("Размерность объединённого вектора:", hybrid_movie_vector.shape)

Размерность movie_content_vector: (16376, 64)
Размерность filtered_item_matrix: (16376, 64)
Размерность объединённого вектора: (16376, 128)


In [168]:
print("movie_content_vector:", movie_content_vector.shape)
print("item_matrix:", item_matrix.shape)
print("hybrid_movie_vector:", hybrid_movie_vector.shape)

movie_content_vector: (16376, 64)
item_matrix: (83239, 64)
hybrid_movie_vector: (3, 128)


In [176]:
movie_vectors_reduced = hybrid_movie_vector

In [190]:
movie_id_to_index = {
    movie_id: idx for idx, movie_id in enumerate(movies['movieId'].values)
}

In [201]:
knn_model_fut = NearestNeighbors(n_neighbors=51, metric='cosine')  # 50 + 1, потому что первый будет сам фильм
knn_model_fut.fit(hybrid_movie_vector)

In [202]:
def get_similar_movies_knn_fut(movie_id, hybrid_movie_vector, movie_id_to_index, top_n=30):
    # Получаем индекс фильма в reduced-гибридной матрице
    if movie_id not in movie_id_to_index:
        return []

    movie_index = movie_id_to_index[movie_id]

    # Находим ближайшие фильмы
    distances, indices = knn_model_fut.kneighbors([hybrid_movie_vector[movie_index]], n_neighbors=top_n + 1)

    similar_movies = []
    for i in range(1, len(indices[0])):  # Пропускаем самого себя
        idx = indices[0][i]
        score = 1 - distances[0][i]
        similar_movies.append((idx, score))

    return similar_movies

In [203]:
def recommend_movies_with_strict_genres_filter_fut(movie_ids, hybrid_movie_vector, movie_id_to_index, top_n=10, internal_top_n=30, mode='custom'):
    all_recommendations = []

    for movie_id in movie_ids:
        original_movie_row = movies[movies['movieId'] == movie_id]
        if original_movie_row.empty or movie_id not in movie_id_to_index:
            continue

        original_movie_title = original_movie_row['title'].values[0]
        original_movie_genres = original_movie_row['genres'].values[0]
        original_genres_set = set(original_movie_genres.split('|'))

        print(f"Получаем рекомендации для фильма ID {movie_id} - {original_movie_title}")

        if mode == 'knn':
            similar_movies = get_similar_movies_knn_fut(movie_id=movie_id,
                                        hybrid_movie_vector=hybrid_movie_vector,
                                        movie_id_to_index=movie_id_to_index,
                                        top_n=internal_top_n)
        else:
            movie_index = movie_id_to_index[movie_id]
            query_vector = hybrid_movie_vector[movie_index].reshape(1, -1)
            cosine_sim = cosine_similarity(query_vector, hybrid_movie_vector)[0]
            similar_indices = cosine_sim.argsort()[::-1][1:internal_top_n+1]
            similar_movies = [(idx, cosine_sim[idx]) for idx in similar_indices]

        for idx, score in similar_movies:
            row = movies.iloc[idx]
            if pd.isna(row['title']) or pd.isna(row['genres']):
                continue

            genre_set = set(row['genres'].split('|'))
            if not original_genres_set.intersection(genre_set):
                score = 0

            all_recommendations.append({
                'movie_id': row['movieId'],
                'title': row['title'],
                'genres': row['genres'],
                'similarity_score': score
            })

    if not all_recommendations:
        return pd.DataFrame(columns=['movie_id', 'title', 'genres', 'weight'])

    df = pd.DataFrame(all_recommendations)
    df = df.groupby(['movie_id', 'title', 'genres']).agg({'similarity_score': 'sum'}).reset_index()
    df = df.sort_values('similarity_score', ascending=False)

    weights = np.linspace(20, 1, len(df)).round()
    df['weight'] = df['similarity_score'] * weights

    return df[['movie_id', 'title', 'genres', 'weight']]

In [204]:
def recommend_movies_by_cluster_filtered_fut(movie_ids, hybrid_movie_vector, movie_id_to_index, cluster_labels, movies, top_n=10):
    all_recommendations = []

    for movie_id in movie_ids:
        if movie_id not in movie_id_to_index:
            continue

        movie_idx = movie_id_to_index[movie_id]
        movie_title = movies[movies['movieId'] == movie_id]['title'].values[0]
        movie_genres = set(movies[movies['movieId'] == movie_id]['genres'].values[0].split('|'))

        cluster_label = cluster_labels[movie_idx]
        same_cluster_indices = np.where(cluster_labels == cluster_label)[0]
        same_cluster_indices = same_cluster_indices[same_cluster_indices != movie_idx]

        if len(same_cluster_indices) == 0:
            continue

        distances = euclidean_distances(
            hybrid_movie_vector[movie_idx].reshape(1, -1),
            hybrid_movie_vector[same_cluster_indices]
        )[0]
        similarities = 1 / (1 + distances)

        for idx, score in zip(same_cluster_indices, similarities):
            row = movies.iloc[idx]
            if pd.isna(row['title']) or pd.isna(row['genres']):
                continue

            genres_set = set(row['genres'].split('|'))
            if not movie_genres.intersection(genres_set):
                score = 0

            all_recommendations.append({
                'movie_id': row['movieId'],
                'title': row['title'],
                'genres': row['genres'],
                'similarity_score': score
            })

    df = pd.DataFrame(all_recommendations)
    if df.empty:
        return df

    df = df.groupby(['movie_id', 'title', 'genres']).agg({'similarity_score': 'sum'}).reset_index()
    df = df.sort_values('similarity_score', ascending=False).head(top_n)
    df['weight'] = np.linspace(20, 1, len(df)).round()

    return df

In [205]:
def ensemble_recommendation_hybrid(movie_ids_to_check, hybrid_movie_vector, movie_id_to_index, movies, top_n=30):
    # Кластеризация фильмов
    cluster_labels = cluster_movies(hybrid_movie_vector, n_clusters=100)

    # Рекомендации KNN
    rec_knn = recommend_movies_with_strict_genres_filter_fut(
        movie_ids=movie_ids_to_check,
        hybrid_movie_vector=hybrid_movie_vector,
        movie_id_to_index=movie_id_to_index,
        top_n=top_n,  # передаем top_n
        internal_top_n=50,  # внутреннее ограничение
        mode='knn'
    )

    # Рекомендации по косинусному сходству
    rec_cosine = recommend_movies_with_strict_genres_filter_fut(
        movie_ids=movie_ids_to_check,
        hybrid_movie_vector=hybrid_movie_vector,
        movie_id_to_index=movie_id_to_index,
        top_n=top_n,  # передаем top_n
        internal_top_n=50,  # внутреннее ограничение
        mode='custom'
    )

    # Рекомендации по кластеризации
    rec_cluster = recommend_movies_by_cluster_filtered_fut(
        movie_ids=movie_ids_to_check,
        hybrid_movie_vector=hybrid_movie_vector,
        movie_id_to_index=movie_id_to_index,
        cluster_labels=cluster_labels,
        movies=movies,
        top_n=top_n  # передаем top_n
    )

    # Объединяем все рекомендации
    all_recs = pd.concat([rec_knn, rec_cosine, rec_cluster], ignore_index=True)
    combined_scores = all_recs.groupby('movie_id').agg({'weight': 'sum'}).reset_index()

    # Мерджим с фильмами для получения названия и жанров
    combined_scores = combined_scores.merge(
        movies[['movieId', 'title', 'genres']],
        left_on='movie_id',
        right_on='movieId',
        how='left'
    ).dropna(subset=['title', 'genres'])

    # Считаем итоговый балл для каждого фильма
    combined_scores['final_score'] = combined_scores['weight']

    # Сортируем по итоговому баллу и возвращаем топ-N
    return combined_scores.sort_values('final_score', ascending=False).head(top_n)[['movie_id', 'title', 'genres', 'final_score']]

In [206]:
# Рекомендации для списка фильмов
movie_ids_to_check = [1, 10, 100]
user_id = 1

final_recs = ensemble_recommendation_hybrid(
    movie_ids_to_check=movie_ids_to_check,
    hybrid_movie_vector=hybrid_movie_vector,
    movie_id_to_index=movie_id_to_index,
    movies=movies,
    top_n=30
)

Получаем рекомендации для фильма ID 1 - Toy Story (1995)
Получаем рекомендации для фильма ID 10 - GoldenEye (1995)
Получаем рекомендации для фильма ID 100 - City Hall (1996)
Получаем рекомендации для фильма ID 1 - Toy Story (1995)
Получаем рекомендации для фильма ID 10 - GoldenEye (1995)
Получаем рекомендации для фильма ID 100 - City Hall (1996)


In [207]:
final_recs

Unnamed: 0,movie_id,title,genres,final_score
22,1591,Spawn (1997),Action|Adventure|Sci-Fi|Thriller,59.815871
41,2794,European Vacation (aka National Lampoon's Euro...,Adventure|Comedy|Romance,58.580706
46,2881,Double Jeopardy (1999),Action|Crime|Drama|Thriller,58.557576
66,4533,"Return of the Living Dead, The (1985)",Comedy|Horror|Sci-Fi,56.512709
72,5378,Star Wars: Episode II - Attack of the Clones (...,Action|Adventure|Sci-Fi|IMAX,55.44673
57,3696,Night of the Creeps (1986),Comedy|Horror|Sci-Fi|Thriller,54.487742
81,6695,Jeepers Creepers 2 (2003),Horror|Thriller,53.182717
100,26750,Quigley Down Under (1990),Adventure|Drama|Western,52.27544
92,8658,Zandalee (1991),Drama|Thriller,51.927947
33,2237,Without Limits (1998),Drama,49.969122


In [209]:
def final_final_ensemble(df1=None, df2=None, df3=None,
                   weight1=0.4, weight2=0.4, weight3=0.2,
                   top_n=30):
    import pandas as pd

    dfs = []
    weights = []

    # Упаковываем все датафреймы и соответствующие веса
    if df1 is not None:
        df1 = df1.rename(columns={'movieId': 'movie_id'})
        dfs.append(df1)
        weights.append(weight1)
    if df2 is not None:
        df2 = df2.rename(columns={'movieId': 'movie_id'})
        dfs.append(df2)
        weights.append(weight2)
    if df3 is not None:
        df3 = df3.rename(columns={'movieId': 'movie_id'})
        dfs.append(df3)
        weights.append(weight3)

    combined_frames = []

    for df, weight in zip(dfs, weights):
        # Нормализация
        min_score = df['final_score'].min()
        max_score = df['final_score'].max()
        df['normalized_score'] = (df['final_score'] - min_score) / (max_score - min_score + 1e-8)  # добавим epsilon, чтобы избежать деления на 0
        df['weighted_score'] = df['normalized_score'] * weight

        combined_frames.append(df[['movie_id', 'title', 'genres', 'weighted_score']])

    # Объединяем все источники
    combined = pd.concat(combined_frames, ignore_index=True)

    # Агрегация по фильму
    final = (
        combined.groupby('movie_id')
        .agg({'weighted_score': 'sum'})
        .reset_index()
    )

    # Присоединяем title и genres
    final = final.merge(
        combined[['movie_id', 'title', 'genres']].drop_duplicates('movie_id'),
        on='movie_id',
        how='left'
    ).dropna(subset=['title', 'genres'])

    # Сортируем и возвращаем топ-N
    return final.sort_values('weighted_score', ascending=False).head(top_n)[['movie_id', 'title', 'genres', 'weighted_score']]

In [212]:
final_df = final_final_ensemble(
    df1=recommendation_user,      # паттерн 2
    df2=recommendation,           # паттерн 3 (контент)
    df3=final_recs,               # паттерн 1 (гибрид)
    weight1=0.4,
    weight2=0.3,
    weight3=0.3,
    top_n=15
)

In [213]:
final_df

Unnamed: 0,movie_id,title,genres,weighted_score
25,2881,Double Jeopardy (1999),Action|Crime|Drama|Thriller,0.586041
17,1591,Spawn (1997),Action|Adventure|Sci-Fi|Thriller,0.578835
24,2794,European Vacation (aka National Lampoon's Euro...,Adventure|Comedy|Romance,0.551073
33,4533,"Return of the Living Dead, The (1985)",Comedy|Horror|Sci-Fi,0.549132
30,3696,Night of the Creeps (1986),Comedy|Horror|Sci-Fi|Thriller,0.476577
38,5378,Star Wars: Episode II - Attack of the Clones (...,Action|Adventure|Sci-Fi|IMAX,0.445478
42,6695,Jeepers Creepers 2 (2003),Horror|Thriller,0.42741
18,1982,Halloween (1978),Horror,0.4
47,7370,A Foreign Affair (2003),Comedy|Drama|Romance,0.397677
26,3034,Robin Hood (1973),Adventure|Animation|Children|Comedy|Musical,0.37792


In [24]:
def update_importance_scores(movie_ids_to_promote=None, movie_ids_to_block=None):
    """
    Обновляет importance_df на основе списков фильмов для продвижения и блокировки.
    """
    global importance_df

    if 'importance_df' not in globals():
        importance_df = pd.DataFrame(columns=['movieId', 'importance_score'])

    promote_df = pd.DataFrame({
        'movieId': movie_ids_to_promote or [],
        'importance_score': 1
    })

    block_df = pd.DataFrame({
        'movieId': movie_ids_to_block or [],
        'importance_score': -1
    })

    updated = pd.concat([importance_df, promote_df, block_df], ignore_index=True)
    importance_df = (
        updated.drop_duplicates('movieId', keep='last')  # Сохраняем последнюю установку
    )

In [45]:
importance_df

Unnamed: 0,movieId,importance_score
0,42,1
1,99,1
2,1,-1
3,2,-1
4,4427,-1
5,7586,-1


In [62]:
def ensemble_user_based_with_koef(user_id, top_n=30):
    # Получаем рекомендации для каждого метода
    rec_svd = recommend_by_svd(user_id, top_n)
    rec_als = recommend_by_als(user_id, top_n)
    rec_knn = recommend_by_knn(user_id, top_n)

    # Убедимся, что столбец movie_id есть в каждом DataFrame
    rec_svd = rec_svd.rename(columns={'movieId': 'movie_id'})
    rec_als = rec_als.rename(columns={'movieId': 'movie_id'})
    rec_knn = rec_knn.rename(columns={'movieId': 'movie_id'})

    # Объединяем все рекомендации в один DataFrame
    all_recs = pd.concat([rec_svd, rec_als, rec_knn], ignore_index=True)

    # Агрегируем по movie_id, рассчитывая среднее значение для оценок и суммируя веса
    final_recs = (
        all_recs.groupby('movie_id')
        .agg({'score': 'mean', 'weight': 'sum'})
        .reset_index()
    )

    # Расчитываем итоговый рейтинг (score * weight)
    final_recs['final_score'] = final_recs['score'] * final_recs['weight']

    # Добавляем названия и жанры фильмов
    final_recs = final_recs.merge(
        movies[['movieId', 'title', 'genres']], 
        left_on='movie_id', 
        right_on='movieId', 
        how='left'
    ).dropna(subset=['title', 'genres'])  # Удаляем строки без названия или жанра


    # Получаем список фильмов, которые нужно заблокировать
    drop_film = importance_df[importance_df['importance_score'] == -1]['movieId']

    # Преобразуем drop_film в числовой тип, если это необходимо
    drop_film = drop_film.astype(int)  # Если movie_id в final_recs - int
    
    # Удаляем фильмы, которые имеют importance_score == -1
    new_recs = final_recs[~final_recs['movie_id'].isin(drop_film)]

    
    # Сортируем по финальному рейтингу и возвращаем топ-N рекомендаций
    final_recs = new_recs.sort_values('final_score', ascending=False).head(top_n)

    temp_list = importance_df[importance_df['importance_score']==1]
    temp_list = temp_list.rename(columns={'movie_id': 'movieId'})
    
    # Выполняем слияние с DataFrame movies
    temp_list = temp_list.merge(
        movies[['movieId', 'title', 'genres']], 
        on='movieId',  
        how='left'
    ).dropna(subset=['title', 'genres'])  # Удаляем строки без названия или жанра
    
    # Выводим горячие новинки
    print('Горячие новинки')
    print(temp_list[['movieId', 'title', 'genres']])  

    
    # Возвращаем в нужном формате
    return final_recs[['movie_id', 'title', 'genres', 'final_score']]


In [30]:
# # Укажем какие фильмы продвигать и блокировать
# update_importance_scores(
#     movie_ids_to_promote=[42, 99],
#     movie_ids_to_block=[1, 2]
# )


In [63]:

# Получаем рекомендации
result_zn = ensemble_user_based_with_koef(user_id=5)

Горячие новинки
  movieId                                 title              genres
0      42                Dead Presidents (1995)  Action|Crime|Drama
1      99  Heidi Fleiss: Hollywood Madam (1995)         Documentary


In [52]:
result_zn

Unnamed: 0,movie_id,title,genres,final_score
75,34072,"March of the Penguins (Marche de l'empereur, L...",Documentary,95.0
60,7618,Chaplin (1992),Drama,90.0
61,7889,Pat Garrett and Billy the Kid (1973),Western,85.0
62,8014,"Spring, Summer, Fall, Winter... and Spring (Bo...",Drama,85.0
63,8407,"Molly Maguires, The (1970)",Drama,80.0
64,8530,Dear Frankie (2004),Drama|Romance,75.0
65,8582,Manufacturing Consent: Noam Chomsky and the Me...,Documentary|War,75.0
66,8622,Fahrenheit 9/11 (2004),Documentary,70.0
67,8690,Slaughterhouse-Five (1972),Comedy|Drama|Sci-Fi|War,65.0
68,8873,"Motorcycle Diaries, The (Diarios de motociclet...",Adventure|Drama,65.0


In [40]:
update_importance_scores(movie_ids_to_block=[4427, 7586])


In [68]:
all_genres = get_all_genres(movies)

In [69]:
all_genres

['(no genres listed)',
 'Action',
 'Adventure',
 'Animation',
 'Children',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Fantasy',
 'Film-Noir',
 'Horror',
 'IMAX',
 'Musical',
 'Mystery',
 'Romance',
 'Sci-Fi',
 'Thriller',
 'War',
 'Western']

In [98]:
temp = genome_tags[genome_tags['tag'].str.contains('relig', case=False, na=False)]
temp

Unnamed: 0,tagId,tag
846,847,religion


In [84]:
temp = genome_tags[genome_tags['tag'].str.contains('world war', case=False, na=False)]
temp

Unnamed: 0,tagId,tag
1119,1120,world war i
1120,1121,world war ii


In [99]:
def time_promotion():

    current_datetime = datetime.now()
    print("Текущая дата и время:", current_datetime)

    if datetime(2025, 4, 14)<current_datetime <datetime(2025, 4, 28):
        pasha = movies[movies['genres'].str.contains('Mystery', case=False, na=False)]

        include_movies = genome_scores[genome_scores['tagId'] == 847]['movieId'].unique()
        filtered_pasha = pasha[pasha['movieId'].isin(include_movies)]
        return filtered_pasha.head(10)
        

    if datetime(2025, 5, 1)<current_datetime <datetime(2025, 5, 11):
        war_drama = movies[movies['genres'].str.contains('War', case=False, na=False) & 
                   movies['genres'].str.contains('Drama', case=False, na=False)]

        include_movies = genome_scores[genome_scores['tagId'] == 1121]['movieId'].unique()
        filtered_war = war_drama[war_drama['movieId'].isin(include_movies)]
        return filtered_war.head(10)


In [100]:
pasha = time_promotion()
pasha

Текущая дата и время: 2025-04-17 21:29:07.925062


Unnamed: 0,movieId,title,genres
40,41,Richard III (1995),Drama|War
72,73,"Misérables, Les (1995)",Drama|War
108,110,Braveheart (1995),Action|Drama|War
149,151,Rob Roy (1995),Action|Drama|Romance|War
153,155,Beyond Rangoon (1995),Adventure|Drama|War
159,161,Crimson Tide (1995),Drama|Thriller|War
212,214,Before the Rain (Pred dozhdot) (1994),Drama|War
263,266,Legends of the Fall (1994),Drama|Romance|War|Western
331,336,"Walking Dead, The (1995)",Drama|War
335,340,"War, The (1994)",Adventure|Drama|War


In [97]:
pasha = time_promotion()
pasha

Текущая дата и время: 2025-04-17 21:10:26.075071


Unnamed: 0,movieId,title,genres
21,22,Copycat (1995),Crime|Drama|Horror|Mystery|Thriller
28,29,"City of Lost Children, The (Cité des enfants p...",Adventure|Drama|Fantasy|Mystery|Sci-Fi
31,32,Twelve Monkeys (a.k.a. 12 Monkeys) (1995),Mystery|Sci-Fi|Thriller
46,47,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
49,50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
58,59,"Confessional, The (Confessionnal, Le) (1995)",Drama|Mystery
101,103,Unforgettable (1996),Mystery|Sci-Fi|Thriller
111,113,Before and After (1996),Drama|Mystery
121,123,Chungking Express (Chung Hing sam lam) (1994),Drama|Mystery|Romance
157,159,Clockers (1995),Crime|Drama|Mystery


In [None]:
 def ensemble_recommendation_koef(movie_ids_to_check, top_n=30):
    # Получаем рекомендации от KNN с фильтрацией по жанрам
    rec_knn = recommend_movies_with_strict_genres_filter(
        movie_ids=movie_ids_to_check,
        top_n=top_n,
        internal_top_n=50,
        mode='knn'  # для KNN
    )

    # Получаем рекомендации с использованием простого косинусного сходства
    rec_cosine = recommend_movies_with_strict_genres_filter(
        movie_ids=movie_ids_to_check,
        top_n=top_n,
        internal_top_n=50,
        mode='custom'  # обычное косинусное сходство
    )

    cluster_labels = cluster_movies(movie_vectors_reduced, n_clusters=100)
    
    # Получаем рекомендации от кластеризации
    rec_cluster = recommend_movies_by_cluster_filtered(
        movie_ids=movie_ids_to_check,
        movie_vectors_reduced=movie_content_vector,
        movies=movies,
        cluster_labels=cluster_labels,
        top_n=top_n
    )

    # Объединяем все рекомендации
    all_recs = pd.concat([rec_knn, rec_cosine, rec_cluster], ignore_index=True)

    # Суммируем веса по movie_id
    combined_scores = all_recs.groupby('movie_id').agg({'weight': 'sum'}).reset_index()

    # Добавляем названия и жанры
    combined_scores = combined_scores.merge(
        movies[['movieId', 'title', 'genres']],
        left_on='movie_id',
        right_on='movieId',
        how='left'
    ).dropna(subset=['title', 'genres'])

     # Получаем список фильмов, которые нужно заблокировать
    drop_film = importance_df[importance_df['importance_score'] == -1]['movieId']

    # Преобразуем drop_film в числовой тип, если это необходимо
    drop_film = drop_film.astype(int)  # Если movie_id в final_recs - int
    
    # Удаляем фильмы, которые имеют importance_score == -1
    combined_scores = combined_scores[~combined_scores['movie_id'].isin(drop_film)]

    combined_scores['final_score'] = combined_scores['weight']
    # Возвращаем топ-N рекомендаций
    return combined_scores.sort_values('final_score', ascending=False).head(top_n)[['movie_id', 'title', 'genres', 'final_score']]

In [None]:

    # Получаем список фильмов, которые нужно заблокировать
    drop_film = importance_df[importance_df['importance_score'] == -1]['movieId']

    # Преобразуем drop_film в числовой тип, если это необходимо
    drop_film = drop_film.astype(int)  # Если movie_id в final_recs - int
    
    # Удаляем фильмы, которые имеют importance_score == -1
    new_recs = final_recs[~final_recs['movie_id'].isin(drop_film)]

    
    # Сортируем по финальному рейтингу и возвращаем топ-N рекомендаций
    final_recs = new_recs.sort_values('final_score', ascending=False).head(top_n)

    temp_list = importance_df[importance_df['importance_score']==1]
    temp_list = temp_list.rename(columns={'movie_id': 'movieId'})
    
    # Выполняем слияние с DataFrame movies
    temp_list = temp_list.merge(
        movies[['movieId', 'title', 'genres']], 
        on='movieId',  
        how='left'
    ).dropna(subset=['title', 'genres'])  # Удаляем строки без названия или жанра
    
    # Выводим горячие новинки
    print('Горячие новинки')
    print(temp_list[['movieId', 'title', 'genres']]) 