In [None]:
pip install surprise



In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from surprise import Dataset, Reader, SVD, KNNBasic, accuracy
from surprise.model_selection import train_test_split, GridSearchCV

# Load the datasets
ratings = pd.read_csv('/content/drive/MyDrive/MovieLensDataset/rating.csv')
movies = pd.read_csv('/content/drive/MyDrive/MovieLensDataset/movie.csv')

# Prepare the data for Surprise (Collaborative Filtering)
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Hyperparameter tuning for SVD
param_grid_svd = {
    'n_factors': [50, 100, 150],
    'n_epochs': [20, 30, 40],
    'lr_all': [0.002, 0.005, 0.01],
    'reg_all': [0.02, 0.05, 0.1]
}
#gs_svd = GridSearchCV(SVD, param_grid_svd, measures=['rmse'], cv=3)
#gs_svd.fit(data)

#best_svd = gs_svd.best_estimator['rmse']
#best_svd.fit(trainset)

svd = SVD()
svd.fit(trainset)

# Preprocess the data for content-based filtering
movies['genres'] = movies['genres'].fillna('')
movies['combined_features'] = movies['genres']

# Feature extraction using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['combined_features'])

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to get collaborative filtering recommendations
def get_collaborative_recommendations(user_id, n=10):
    movie_ids = ratings['movieId'].unique()
    predictions = [svd.predict(user_id, str(movie_id)) for movie_id in movie_ids]
    recommendations = sorted(predictions, key=lambda x: x.est, reverse=True)[:n]
    recommended_movie_ids = [int(pred.iid) for pred in recommendations]
    recommended_movies = movies[movies['movieId'].isin(recommended_movie_ids)]
    return recommended_movies

# Function to get content-based recommendations
def get_content_based_recommendations(title, n=10):
    idx = movies[movies['title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:n+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies.iloc[movie_indices]

# Function to combine recommendations
def get_hybrid_recommendations(user_id, title, n=10):
    collaborative_recs = get_collaborative_recommendations(user_id, n)
    content_recs = get_content_based_recommendations(title, n)

    # Combine both recommendations
    hybrid_recs = pd.concat([collaborative_recs, content_recs]).drop_duplicates().head(n)
    return hybrid_recs

# Example usage
user_id = 1
title = 'Toy Story (1995)'

print(f"Top 10 hybrid recommendations for User ID {user_id} and movie '{title}':")
print(get_hybrid_recommendations(user_id, title))