In [1]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Load datasets
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

In [2]:
# Exploreing  Movie dataset
movies.head(5)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [3]:
# Exploreing   Rating dataset
ratings.head(5)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523


In [6]:
# Movies Data set Information
print(movies.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10329 entries, 0 to 10328
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  10329 non-null  int64 
 1   title    10329 non-null  object
 2   genres   10329 non-null  object
dtypes: int64(1), object(2)
memory usage: 242.2+ KB
None


In [7]:
# Rating Data set Information
print(ratings.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105339 entries, 0 to 105338
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     105339 non-null  int64  
 1   movieId    105339 non-null  int64  
 2   rating     105339 non-null  float64
 3   timestamp  105339 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.2 MB
None


In [8]:
#Finding the unique users and movies

print("Unique Users:", ratings['userId'].nunique())
print("Unique Movies:", movies['movieId'].nunique())


Unique Users: 668
Unique Movies: 10329


In [11]:
# Average rating and total movies per genre
ratings['rating'].describe()
#spliting the movie genres
movies['genres'] = movies['genres'].str.split('|')


0        [Adventure, Animation, Children, Comedy, Fantasy]
1                           [Adventure, Children, Fantasy]
2                                        [Comedy, Romance]
3                                 [Comedy, Drama, Romance]
4                                                 [Comedy]
                               ...                        
10324                        [Animation, Children, Comedy]
10325                                             [Comedy]
10326                                             [Comedy]
10327                                              [Drama]
10328                                 [(no genres listed)]
Name: genres, Length: 10329, dtype: object

In [12]:
# Extract genres and count unique genres
all_genres = set(i for genres in movies['genres'] for i in genres)
print("Unique genres:", all_genres)

Unique genres: {'Western', 'Horror', 'War', '(no genres listed)', 'Musical', 'Crime', 'Animation', 'IMAX', 'Sci-Fi', 'Comedy', 'Thriller', 'Drama', 'Film-Noir', 'Action', 'Children', 'Documentary', 'Fantasy', 'Mystery', 'Adventure', 'Romance'}


In [59]:
# 1.Creating a popularity-based Recommender system at a genre level
#That suggests top N movies based on popularity within a given genre, filtered by minimum reviews.

def Popularity_Based_Recommender_System(genre, min_reviews_threshold, num_recommendations):
    # Filter movies by genre
    genre_movies = movies[movies['genres'].apply(lambda x: genre in x)]
    
    genre_ratings = ratings[ratings['movieId'].isin(genre_movies['movieId'])]
    
    # Aggregate ratings by movieId
    movie_stats = genre_ratings.groupby('movieId').agg({'rating': ['mean', 'count']})
    movie_stats.columns = ['Average_Movie_Rating', 'Num_Reviews']
    movie_stats = movie_stats[movie_stats['Num_Reviews'] >= min_reviews_threshold]
    
    # Merge with movies data
    movie_stats = movie_stats.merge(movies, on='movieId')
    
    # Sort and get top N recommendations
    recommendations = movie_stats.sort_values(by='Average_Movie_Rating', ascending=False).head(num_recommendations)
    
    return  recommendations[['title', 'Average_Movie_Rating', 'Num_Reviews']]

# Example 
print("Popularity Based Recommender System")
print("\n",Popularity_Based_Recommender_System('Comedy', 100, 5))



Popularity Based Recommender System

                                      title  Average_Movie_Rating  Num_Reviews
25  monty python and the holy grail (1975)              4.301948          154
19                            fargo (1996)              4.271144          201
26              princess bride, the (1987)              4.163743          171
6                      pulp fiction (1994)              4.160000          325
9                      forrest gump (1994)              4.138264          311


In [None]:
Create a DataFrame for the recommendations
    recommended_movies = movies.iloc[movie_indices][['title', 'genres', 'average_rating', 'num_reviews']]
    recommended_movies.reset_index(drop=True, inplace=True)
    recommended_movies.columns = ['Movie Title', 'Genres', 'Average Rating', 'Number of Reviews']
    
    return recommended_movies

In [57]:
#2.CreateING a content-based recommender system.
# That suggests top N movies based on similar movie genres.

def content_based_recommender(movie_title, num_recommendations):
    # Vectorize movie genres
    movies['genres_str'] = movies['genres'].apply(lambda x: ' '.join(x))
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(movies['genres_str'])
    
    # Compute similarity matrix
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
    
    # Standardize movie titles for comparison
    movies['title'] = movies['title'].str.lower()
    movie_title = movie_title.lower()
    
    # Check if the movie exists in the dataset
    if movie_title not in movies['title'].values:
        return f"Movie '{movie_title}' not found in the dataset."
    
    # Get movie index
    movie_idx = movies[movies['title'] == movie_title].index[0]
    
    # Get similarity scores
    sim_scores = list(enumerate(cosine_sim[movie_idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]
    
    # Get movie indices
    movie_indices = [i[0] for i in sim_scores]
    
    if not movie_indices:
        return "No similar movies found."
    
    m=movies['title'].iloc[movie_indices]
    
    
    return m

print("Content Based Recomendation System.")
print("\n",content_based_recommender('Toy Story (1995)', 5))


Content Based Recomendation System.

 1815                                       antz (1998)
2496                                toy story 2 (1999)
2967    adventures of rocky and bullwinkle, the (2000)
3166                  emperor's new groove, the (2000)
3811                             monsters, inc. (2001)
Name: title, dtype: object


In [60]:
# 3.Creating a Collabrative based recommendation System.
#That Implement a collaborative filtering system that recommends top N movies based on similar users.

def collaborative_based_recommender(user_id, num_recommendations, num_similar_users):
    # Create user-movie matrix
    user_movie_matrix = ratings.pivot(index='userId', columns='movieId', values='rating')
    
    # Normalize ratings
    user_movie_matrix = user_movie_matrix.subtract(user_movie_matrix.mean(axis=1), axis='rows')
    
    # Compute cosine similarity between users
    user_similarity = cosine_similarity(user_movie_matrix.fillna(0))
    user_sim_df = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)
    
    # Get similar users
    similar_users = user_sim_df[user_id].sort_values(ascending=False)[1:num_similar_users+1].index
    
    # Aggregate ratings of similar users
    similar_users_ratings = user_movie_matrix.loc[similar_users].mean(axis=0)
    user_rated_movies = user_movie_matrix.loc[user_id].dropna().index
    recommendations = similar_users_ratings.drop(user_rated_movies).sort_values(ascending=False).head(num_recommendations)
    k=movies[movies['movieId'].isin(recommendations.index)]['title']
    return k

# Example usage
print("Collaborative based recomendation system")
print("\n",collaborative_based_recommender(1, 5, 100))





Collaborative based recomendation system

 356           in the mouth of madness (1995)
2123    thing from another world, the (1951)
2292             barefoot in the park (1967)
2612                   bad lieutenant (1992)
7227                 plague dogs, the (1982)
Name: title, dtype: object


In [63]:
#creatin GUI interface using python libraries ipwidgets
import ipywidgets as widgets
from IPython.display import display

def interactive_popularity_recommender(genre, min_reviews, num_recommendations):
    recommendations = Popularity_Based_Recommender_System(genre, min_reviews, num_recommendations)
    display(recommendations)

genre_widget = widgets.Dropdown(options=list(all_genres), description='Genre:')
min_reviews_widget = widgets.IntSlider(value=100, min=1, max=500, step=1, description='Min Reviews:')
num_recommendations_widget = widgets.IntSlider(value=5, min=1, max=20, step=1, description='Recommendations:')
recommend_button = widgets.Button(description='Recommend')

recommend_button.on_click(lambda x: interactive_popularity_recommender(genre_widget.value, min_reviews_widget.value, num_recommendations_widget.value))

display(genre_widget, min_reviews_widget, num_recommendations_widget, recommend_button)


Dropdown(description='Genre:', options=('Western', 'Horror', 'War', '(no genres listed)', 'Musical', 'Crime', …

IntSlider(value=100, description='Min Reviews:', max=500, min=1)

IntSlider(value=5, description='Recommendations:', max=20, min=1)

Button(description='Recommend', style=ButtonStyle())

Unnamed: 0,title,Average_Movie_Rating,Num_Reviews
1,"silence of the lambs, the (1991)",4.194828,290
2,aliens (1986),4.146497,157
6,"sixth sense, the (1999)",4.13089,191
3,alien (1979),4.064103,156
4,"shining, the (1980)",4.004464,112
