In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

movies = pd.read_csv('/content/movies.csv')
ratings = pd.read_csv('/content/ratings.csv')
# Merge the two datasets on 'movieId'
data = pd.merge(ratings, movies, on='movieId')
print(data)

Create a popularity based recommender system at a genre level. User will input a genre (g), minimum ratings threshold (t) for a movie and no. of recommendations(N) for which it should be recommended top N movies which are most popular within that genre (g) ordered by ratings in descending order where each movie has at least (t) reviews.

In [None]:
def genre_based_popular_recommendation(data, genre, min_reviews, top_n):
    # Filter the data for the selected genre
    genre_data = data[data['genres'].str.contains(genre, case=False, na=False)]

    # Calculate the number of reviews and average rating per movie
    popularity_data = genre_data.groupby('title').agg(
        avg_rating=('rating', 'mean'),
        num_reviews=('rating', 'count')
    ).reset_index()

    # Filter by the minimum reviews threshold
    filtered_data = popularity_data[popularity_data['num_reviews'] >= min_reviews]

    # Sort the movies by average rating in descending order
    sorted_data = filtered_data.sort_values(by='avg_rating', ascending=False)

    # Select the top N movies
    top_movies = sorted_data.head(top_n)
    top_movies = top_movies.reset_index(drop=True)
    top_movies.index += 1

    return top_movies

# Example usage
genre = 'Comedy'
min_reviews = 100
top_n = 5

recommendations = genre_based_popular_recommendation(data, genre, min_reviews, top_n)
print(recommendations)


Create a content-based recommender system which recommends top N movies based on similar movie(m) genres.

In [6]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize TF-IDF Vectorizer to transform the 'genres' column into a matrix of TF-IDF features
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Compute the cosine similarity matrix based on the genres
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

def content_based_recommendation(movie_title, top_n=5):
    # Get the index of the movie that matches the title
    movie_title_lower = movie_title.lower()
    matching_movies = movies[movies['title'].str.lower() == movie_title_lower]

    if matching_movies.empty:
        print(f"Movie '{movie_title}' not found in the dataset.")
        return None

    idx = matching_movies.index[0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores in descending order
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the top_n most similar movies (excluding the movie itself)
    sim_scores = sim_scores[1:top_n + 1]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top N most similar movies as a DataFrame
    recommended_movies = movies.iloc[movie_indices][['title', 'genres']]
    return recommended_movies.reset_index(drop=True)

# Example
movie_title = 'Toy Story (1995)'
top_n = 5
recommendations = content_based_recommendation(movie_title, top_n)
print(recommendations)


                                            title  \
0                                     Antz (1998)   
1                              Toy Story 2 (1999)   
2  Adventures of Rocky and Bullwinkle, The (2000)   
3                Emperor's New Groove, The (2000)   
4                           Monsters, Inc. (2001)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1  Adventure|Animation|Children|Comedy|Fantasy  
2  Adventure|Animation|Children|Comedy|Fantasy  
3  Adventure|Animation|Children|Comedy|Fantasy  
4  Adventure|Animation|Children|Comedy|Fantasy  


Create a collaborative based recommender system which recommends top N movies based on “K” similar users for a target user “u”

In [8]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

# Create a pivot table where rows are users, columns are movies, and values are ratings
user_movie_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Convert the user-movie matrix to a sparse matrix format for efficiency
user_movie_sparse = csr_matrix(user_movie_matrix.values)

# Calculate cosine similarity between users
user_similarity = cosine_similarity(user_movie_sparse)

# Create a DataFrame for similarity scores for easier lookup
user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)

def collaborative_recommendation(user_id, top_n=5, k=10):
    # Get similarity scores for the target user and sort them in descending order
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)[1:k+1].index

    # Filter ratings of the similar users
    similar_users_ratings = user_movie_matrix.loc[similar_users]

    # Calculate the mean rating of each movie by similar users
    movie_recommendations = similar_users_ratings.mean(axis=0)

    # Filter out movies that the target user has already rated
    user_rated_movies = user_movie_matrix.loc[user_id]
    unrated_movies = movie_recommendations[user_rated_movies == 0]

    # Sort the recommendations by average rating in descending order and get the top N
    top_recommendations = unrated_movies.sort_values(ascending=False).head(top_n)

    # Join with movies DataFrame to get movie titles
    recommended_movies = movies.set_index('movieId').loc[top_recommendations.index][['title']]
    recommended_movies['Average Rating from Similar Users'] = top_recommendations.values

    return recommended_movies.reset_index(drop=True)

# Example
user_id = 1
top_n = 5
k = 100

recommendations = collaborative_recommendation(user_id, top_n, k)
print(recommendations)


                                       title  \
0  Indiana Jones and the Last Crusade (1989)   
1                           Toy Story (1995)   
2                            Die Hard (1988)   
3                              Aliens (1986)   
4                             Memento (2000)   

   Average Rating from Similar Users  
0                              2.570  
1                              2.420  
2                              2.410  
3                              2.355  
4                              2.320  
