# Загрузка данных и Библиотек

In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
import pandas as pd

In [2]:
movies = pd.read_csv('ml-latest-small//movies.csv')
ratings = pd.read_csv('ml-latest-small//ratings.csv')
tags = pd.read_csv('ml-latest-small//tags.csv')

In [3]:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [4]:
ratings = ratings.rename(columns={'userId': 'user_id', 'movieId': 'movie_id'})
ratings

Unnamed: 0,user_id,movie_id,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [5]:
tags

Unnamed: 0,userId,movieId,tag,timestamp
0,2,60756,funny,1445714994
1,2,60756,Highly quotable,1445714996
2,2,60756,will ferrell,1445714992
3,2,89774,Boxing story,1445715207
4,2,89774,MMA,1445715200
...,...,...,...,...
3678,606,7382,for katie,1171234019
3679,606,7936,austere,1173392334
3680,610,3265,gun fu,1493843984
3681,610,3265,heroic bloodshed,1493843978


## Объеденение

In [6]:
movies['genres'] = movies['genres'].str.replace('|', ' ')
tags['tag'] = tags['tag'].fillna('')
tags_merged = tags.groupby('movieId')['tag'].apply(lambda x: ' '.join(x)).reset_index()
movies = movies.merge(tags_merged, on='movieId', how='left')
movies['tag'] = movies['tag'].fillna('')
movies['metadata'] = movies['genres'] + ' ' + movies['tag']

## Создание 2-у уровненвой архитектуры

####  Первый уровень: Фильтрация по популярным жанрам и рейтингам


In [7]:
def get_candidate_movies(movie_title, n_candidates=100):
    movie_genre = movies[movies['title'] == movie_title]['genres'].values[0]
    candidates = movies[movies['genres'].str.contains(movie_genre)]
    candidates = candidates.sort_values(by='movieId', ascending=False).head(n_candidates)
    return candidates

#### Второй уровень: Применение более точного метода (Nearest Neighbors) среди кандидатов



In [8]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['metadata'])

In [9]:
nn_model = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=100, n_jobs=-1)
nn_model.fit(tfidf_matrix)

In [10]:
def get_hybrid_recommendations(user_id, movie_title, n_candidates=100):
    candidates = get_candidate_movies(movie_title, n_candidates)
    candidate_indices = candidates.index
    query_index = movies[movies['title'] == movie_title].index[0]
    
    distances, indices = nn_model.kneighbors(tfidf_matrix[query_index], n_neighbors=100)
    
    similar_movies = [movies['title'].iloc[i] for i in indices.flatten() if i in candidate_indices]
    return similar_movies[:n_candidates]

In [11]:
user_id = 1
movie_title = 'Toy Story (1995)'
recommended_movies = get_hybrid_recommendations(user_id, movie_title, n_candidates=100)


In [12]:
print(f"Рекомендации для пользователя {user_id} по фильму '{movie_title}':")
for movie in recommended_movies:
    print(movie)

Рекомендации для пользователя 1 по фильму 'Toy Story (1995)':
Toy Story (1995)
Toy Story 2 (1999)
The Good Dinosaur (2015)
Antz (1998)
Adventures of Rocky and Bullwinkle, The (2000)
Tale of Despereaux, The (2008)
Asterix and the Vikings (Astérix et les Vikings) (2006)
Shrek the Third (2007)
Monsters, Inc. (2001)
Emperor's New Groove, The (2000)
Wild, The (2006)
Moana (2016)
Turbo (2013)
TMNT (Teenage Mutant Ninja Turtles) (2007)
Twelve Tasks of Asterix, The (Les douze travaux d'Astérix) (1976)
Gnomeo & Juliet (2011)
Puss in Boots (Nagagutsu o haita neko) (1969)
Valiant (2005)
Home (2015)
