In [118]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import MultiLabelBinarizer

In [119]:
movies = pd.DataFrame({
    'movie_id': [1, 2, 3],
    'title': ['Movie A', 'Movie B', 'Movie C'],
    'genres': ['Action|Adventure', 'Adventure|Fantasy', 'Comedy|Drama'],
    'actors': ['Actor X|Actor Y', 'Actor Y|Actor Z', 'Actor X|Actor Z']
})

In [120]:
ratings = pd.DataFrame({
    'user_id': [1, 2, 1, 2, 3],
    'movie_id': [1, 1, 2, 2, 3],
    'rating': [4, 5, 3, 4, 2]
})

In [121]:
user_movie_matrix = ratings.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)

In [122]:
n_components = min(50, user_movie_matrix.shape[1] - 1)  # корректируем количество компонентов
svd = TruncatedSVD(n_components=n_components)
matrix_svd = svd.fit_transform(user_movie_matrix)

In [123]:
movies['features'] = movies['genres'] + '|' + movies['actors']
movies['features_vectorized'] = movies['features'].apply(lambda x: x.split('|'))

In [124]:
mlb = MultiLabelBinarizer()
movie_features_matrix = mlb.fit_transform(movies['features_vectorized'])

In [125]:
cosine_sim = cosine_similarity(movie_features_matrix)

In [126]:
def recommend(user_id, user_movie_matrix, matrix_svd, cosine_sim, movies, top_n=10):
    user_ratings = user_movie_matrix.loc[user_id].values.reshape(1, -1)
    user_pred = np.dot(user_ratings, svd.components_.T)

    user_watched_movies = user_movie_matrix.columns[user_movie_matrix.loc[user_id] > 0]
    movie_indices = [movies[movies['movie_id'] == i].index[0] for i in user_watched_movies]
    similar_movies = cosine_sim[movie_indices].mean(axis=0)

    print("user_pred shape:", user_pred.shape)
    print("similar_movies shape:", similar_movies.shape)

    hybrid_scores = user_pred.flatten() + similar_movies
    hybrid_scores = hybrid_scores.argsort()[::-1]

    recommended_movie_indices = [i for i in hybrid_scores if i not in movie_indices][:top_n]
    recommended_movies = movies.iloc[recommended_movie_indices]
    return recommended_movies

In [127]:
recommendations = recommend(1, user_movie_matrix, matrix_svd, cosine_sim, movies)
print(recommendations)

user_pred shape: (1, 2)
similar_movies shape: (3,)


ValueError: operands could not be broadcast together with shapes (2,) (3,) 