In [9]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize

In [10]:
movies = pd.read_csv("/kaggle/input/ml-10m/ml-10M100K/movies.dat", sep="::", engine='python', header=None, names=['movieId', 'title', 'genres'])
ratings = pd.read_csv("/kaggle/input/ml-10m/ml-10M100K/ratings.dat", sep="::", engine='python', header=None, names=['userId', 'movieId', 'rating', 'timestamp'])
tags = pd.read_csv("/kaggle/input/ml-10m/ml-10M100K/tags.dat", sep="::", engine='python', header=None, names=['userId', 'movieId', 'tag', 'timestamp'])

In [11]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [12]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,122,5.0,838985046
1,1,185,5.0,838983525
2,1,231,5.0,838983392
3,1,292,5.0,838983421
4,1,316,5.0,838983392


In [13]:
tags.head()

Unnamed: 0,userId,movieId,tag,timestamp
0,15,4973,excellent!,1215184630
1,20,1747,politics,1188263867
2,20,1747,satire,1188263867
3,20,2424,chick flick 212,1188263835
4,20,2424,hanks,1188263835


In [14]:
tags['tag'] = tags['tag'].replace(np.nan, '', regex=True)
movie_tags = tags.groupby('movieId')['tag'].apply(lambda x: ' '.join(x)).reset_index()
movies_with_tags = pd.merge(movies, movie_tags, on='movieId', how='left')
movies_with_tags['tag'] = movies_with_tags['tag'].fillna('')

In [15]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(movies_with_tags['tag'])
svd_content = TruncatedSVD(n_components=50, random_state=50)
latent_matrix_content = svd_content.fit_transform(tfidf_matrix)

In [16]:
reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2, random_state=50)

In [17]:
model_collaborative = SVD(n_factors=50, biased=True, random_state=50)
model_collaborative.fit(trainset)
predictions = model_collaborative.test(testset)
print('Collaborative Filter RMSE:', rmse(predictions))

RMSE: 0.7977
Collaborative Filter RMSE: 0.7976651425446813


In [18]:
movie_factors = model_collaborative.qi
user_factors = model_collaborative.pu
latent_matrix_collaborative = np.dot(user_factors, movie_factors.T)

In [19]:
min_rows = min(latent_matrix_content.shape[0], latent_matrix_collaborative.shape[0])
latent_matrix_content = latent_matrix_content[:min_rows, :]
latent_matrix_collaborative = latent_matrix_collaborative[:min_rows, :]

In [21]:
hybrid_matrix = np.concatenate([latent_matrix_content, latent_matrix_collaborative], axis=1)
movie_titles = movies['title'].tolist()

In [23]:
def hybrid_recommender(user_id, movie_title):
    movie_idx = movie_titles.index(movie_title)
    
    weight_content = 0.7
    weight_collaborative = 0.3
    hybrid_score = (
        weight_content * latent_matrix_content[user_id, movie_idx] +
        weight_collaborative * latent_matrix_collaborative[user_id, movie_idx]
    )
    
    return hybrid_score

In [24]:
user_id = 1
movie_title = 'Toy Story (1995)'
score = hybrid_recommender(user_id, movie_title)
print(f'Hybrid Score for User {user_id} and Movie "{movie_title}": {score}')

Hybrid Score for User 1 and Movie "Toy Story (1995)": 0.0317605655288499
