In [None]:
import pandas as pd

movies_path = '/content/movies.csv'
ratings_path = '/content/ratings.csv'

movies_df = pd.read_csv(movies_path)
ratings_df = pd.read_csv(ratings_path)

movies_df.head(), ratings_df.head()


(   movieId                               title  \
 0        1                    Toy Story (1995)   
 1        2                      Jumanji (1995)   
 2        3             Grumpier Old Men (1995)   
 3        4            Waiting to Exhale (1995)   
 4        5  Father of the Bride Part II (1995)   
 
                                         genres  
 0  Adventure|Animation|Children|Comedy|Fantasy  
 1                   Adventure|Children|Fantasy  
 2                               Comedy|Romance  
 3                         Comedy|Drama|Romance  
 4                                       Comedy  ,
    userId  movieId  rating   timestamp
 0       1       16     4.0  1217897793
 1       1       24     1.5  1217895807
 2       1       32     4.0  1217896246
 3       1       47     4.0  1217896556
 4       1       50     4.0  1217896523)

In [None]:

unique_users = ratings_df['userId'].nunique()
unique_movies = ratings_df['movieId'].nunique()

rating_stats = ratings_df['rating'].describe()

all_genres = set(genre for sublist in movies_df['genres'].str.split('|').tolist() for genre in sublist)
unique_genres = len(all_genres)

genre_counts = pd.Series([genre for sublist in movies_df['genres'].str.split('|').tolist() for genre in sublist]).value_counts()

unique_users, unique_movies, rating_stats, unique_genres, genre_counts.head(10)


(668,
 10325,
 count    105339.000000
 mean          3.516850
 std           1.044872
 min           0.500000
 25%           3.000000
 50%           3.500000
 75%           4.000000
 max           5.000000
 Name: rating, dtype: float64,
 20,
 Drama        5220
 Comedy       3515
 Thriller     2187
 Romance      1788
 Action       1737
 Crime        1440
 Adventure    1164
 Horror       1001
 Sci-Fi        860
 Mystery       675
 Name: count, dtype: int64)

In [None]:
def popularity_based_recommender(movies_df, ratings_df, genre, min_reviews, num_recommendations):
    genre_movies = movies_df[movies_df['genres'].str.contains(genre, case=False, na=False)]

    genre_ratings = pd.merge(genre_movies[['movieId', 'title']], ratings_df, on='movieId')

    popularity_df = genre_ratings.groupby(['movieId', 'title']).agg(
        avg_rating=('rating', 'mean'),
        num_reviews=('rating', 'count')
    ).reset_index()

    popular_movies = popularity_df[popularity_df['num_reviews'] >= min_reviews]
    top_movies = popular_movies.sort_values(by='avg_rating', ascending=False).head(num_recommendations)

    return top_movies[['title', 'avg_rating', 'num_reviews']]

genre = "Comedy"
min_reviews = 100
num_recommendations = 5
print(popularity_based_recommender(movies_df, ratings_df, genre, min_reviews, num_recommendations))


                                      title  avg_rating  num_reviews
326  Monty Python and the Holy Grail (1975)    4.301948          154
193                            Fargo (1996)    4.271144          201
337              Princess Bride, The (1987)    4.163743          171
90                      Pulp Fiction (1994)    4.160000          325
110                     Forrest Gump (1994)    4.138264          311


In [None]:
def content_based_recommender(movies_df, movie_title, num_recommendations):
    target_movie = movies_df[movies_df['title'].str.contains(movie_title, case=False, na=False)]
    if target_movie.empty:
        return f"No movie found with title {movie_title}."

    target_genres = target_movie.iloc[0]['genres'].split('|')

    similar_movies = movies_df[movies_df['genres'].apply(lambda x: any(genre in x for genre in target_genres))]
    similar_movies = similar_movies[similar_movies['title'] != target_movie.iloc[0]['title']]

    return similar_movies[['title']].head(num_recommendations)

movie_title = "Toy Story"
num_recommendations = 5
print(content_based_recommender(movies_df, movie_title, num_recommendations))


                                title
1                      Jumanji (1995)
2             Grumpier Old Men (1995)
3            Waiting to Exhale (1995)
4  Father of the Bride Part II (1995)
6                      Sabrina (1995)


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def collaborative_based_recommender(ratings_df, user_id, num_recommendations, k_similar_users):
    user_movie_ratings = ratings_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)

    similarity_matrix = cosine_similarity(user_movie_ratings)
    similarity_df = pd.DataFrame(similarity_matrix, index=user_movie_ratings.index, columns=user_movie_ratings.index)

    similar_users = similarity_df[user_id].sort_values(ascending=False).index[1:k_similar_users+1]

    similar_users_ratings = user_movie_ratings.loc[similar_users]
    recommended_movies = similar_users_ratings.mean(axis=0).sort_values(ascending=False)

    watched_movies = user_movie_ratings.loc[user_id][user_movie_ratings.loc[user_id] > 0].index
    recommended_movies = recommended_movies[~recommended_movies.index.isin(watched_movies)]

    return recommended_movies.head(num_recommendations).index.tolist()

user_id = 1
num_recommendations = 5
k_similar_users = 100
print(collaborative_based_recommender(ratings_df, user_id, num_recommendations, k_similar_users))


[1291, 1, 1036, 1200, 4226]
