In [None]:
import pandas as pd
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse

ratings = pd.read_csv(r"cleaned_ratings.csv")

reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

svd_model = SVD()
svd_model.fit(trainset)

predictions = svd_model.test(testset)

print("RMSE:", rmse(predictions))

svd_preds = pd.DataFrame(predictions)
svd_preds = svd_preds.rename(columns={'uid': 'userId', 'iid': 'movieId', 'est': 'svd_score'})


RMSE: 0.9344
RMSE: 0.9344438323095944


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

movies = pd.read_csv(r"cleaned_movies.csv")

tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(movies['genres'])

cos_sim = cosine_similarity(tfidf_matrix)

movie_indices = pd.Series(movies.index, index=movies['movieId'])

def get_content_scores(user_id, ratings_df, movies_df, sim_matrix, top_n=10):
    user_ratings = ratings_df[ratings_df['userId'] == user_id]
    scores = pd.Series(dtype='float64')

    for _, row in user_ratings.iterrows():
        movie_id = row['movieId']
        rating = row['rating']
        if movie_id in movie_indices:
            idx = movie_indices[movie_id]
            sim_scores = sim_matrix[idx] * rating
            scores = scores.add(pd.Series(sim_scores, index=movies_df['movieId']), fill_value=0)
    
    return scores.sort_values(ascending=False).head(top_n)

user_id = int(input("Enter your user ID: "))

content_scores = get_content_scores(user_id, ratings, movies, cos_sim, top_n=100)
content_df = pd.DataFrame(content_scores).reset_index()
content_df.columns = ['movieId', 'content_score']

hybrid_df = pd.merge(content_df, svd_preds[svd_preds['userId'] == user_id][['movieId', 'svd_score']], on='movieId')

scaler = MinMaxScaler()
hybrid_df[['content_score', 'svd_score']] = scaler.fit_transform(hybrid_df[['content_score', 'svd_score']])

alpha = 0.5
hybrid_df['hybrid_score'] = alpha * hybrid_df['content_score'] + (1 - alpha) * hybrid_df['svd_score']

recommended_movies = pd.merge(hybrid_df.sort_values(by='hybrid_score', ascending=False), movies, on='movieId')
print(recommended_movies[['title', 'hybrid_score']].head(10))


Enter your user ID:  4


                      title  hybrid_score
0  Conspiracy Theory (1997)           0.0
