In [None]:
!pip install surprise

In [3]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [5]:
# Load the dataset
ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')

In [6]:
ratings.head(2)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247


In [7]:
movies.head(2)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy


In [8]:
# Collaborative Filtering using Surprise library
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset = data.build_full_trainset()
algo = SVD()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7a8f62c6c670>

In [10]:
from surprise import KNNBasic

# Use KNNBasic algorithm
algo = KNNBasic()
algo.fit(trainset)

def collaborative_filtering_recommendations(movie_title, top_n=5):
    movie_id = movies[movies['title'] == movie_title]['movieId'].values[0]

    # Get the inner id of the movie
    movie_inner_id = algo.trainset.to_inner_iid(movie_id)

    # Get top-N similar movies
    movie_neighbors = algo.get_neighbors(movie_inner_id, k=top_n)

    # Convert inner ids of the movies back to raw ids
    movie_neighbors = (algo.trainset.to_raw_iid(inner_id) for inner_id in movie_neighbors)

    # Convert movie IDs to movie titles
    collab_recommendation_titles = [movies[movies['movieId'] == movie]['title'].values[0] for movie in movie_neighbors]

    return collab_recommendation_titles

Computing the msd similarity matrix...
Done computing similarity matrix.


In [11]:
movie_title = "Toy Story (1995)"
collab_rating = collaborative_filtering_recommendations(movie_title)
collab_rating

['Psycho (1960)',
 'Groundhog Day (1993)',
 'Wolf of Wall Street, The (2013)',
 '2012 (2009)',
 'Dunston Checks In (1996)']

In [12]:
# Content-Based Filtering using TF-IDF
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(movies['genres'].fillna(''))

In [13]:
# Function for Content-Based Filtering Recommendations
def content_based_recommendations(movie_title):
    tfidf_index = movies[movies['title'] == movie_title].index[0]
    cosine_similarities = linear_kernel(tfidf_matrix[tfidf_index], tfidf_matrix).flatten()
    content_indices = cosine_similarities.argsort()[::-1][1:]  # Exclude the current movie
    return [(movies.iloc[idx]['title'], cosine_similarities[idx]) for idx in content_indices]

In [14]:
# Example: Get recommendations for a specific movie
movie_title = "Toy Story (1995)"
content_ratings = content_based_recommendations(movie_title)

print("\nContent-Based Recommendations:")
for movie, similarity in content_ratings[:5]:
    print(f"{movie} - Similarity: {similarity:.2f}")


Content-Based Recommendations:
Emperor's New Groove, The (2000) - Similarity: 1.00
Antz (1998) - Similarity: 1.00
Adventures of Rocky and Bullwinkle, The (2000) - Similarity: 1.00
Wild, The (2006) - Similarity: 1.00
Asterix and the Vikings (Astérix et les Vikings) (2006) - Similarity: 1.00
