In [1]:
import pandas as pd

Step 1:Load Dataset

In [2]:
# Load datasets
movies = pd.read_csv('Movies.csv')
ratings = pd.read_csv('Ratings.csv')

Step 2: Exploratory Data Analysis (EDA)
Understanding Distribution of Features

In [5]:
# Display basic info about the datasets
print("Movies Dataset:")
print(movies.info())

Movies Dataset:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10329 entries, 0 to 10328
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  10329 non-null  int64 
 1   title    10329 non-null  object
 2   genres   10329 non-null  object
dtypes: int64(1), object(2)
memory usage: 242.2+ KB
None


In [6]:
print("\nRatings Dataset:")
print(ratings.info())


Ratings Dataset:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105339 entries, 0 to 105338
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     105339 non-null  int64  
 1   movieId    105339 non-null  int64  
 2   rating     105339 non-null  float64
 3   timestamp  105339 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.2 MB
None


Finding Unique Users and Movies

In [8]:
unique_users = ratings['userId'].nunique()
unique_movies = ratings['movieId'].nunique()

print(f"\nUnique Users: {unique_users}")
print(f"Unique Movies: {unique_movies}")



Unique Users: 668
Unique Movies: 10325


Average Rating and Total Movies at Genre Level

In [9]:
# Merge datasets
movie_ratings = pd.merge(ratings, movies, on='movieId')

In [10]:
# Average rating at genre level
genre_avg_rating = movie_ratings.groupby('genres')['rating'].mean()

In [11]:
# Total movies at genre level
genre_total_movies = movie_ratings.groupby('genres')['movieId'].nunique()

print("\nAverage Rating at Genre Level:")
print(genre_avg_rating)

print("\nTotal Movies at Genre Level:")
print(genre_total_movies)


Average Rating at Genre Level:
genres
(no genres listed)                     3.071429
Action                                 2.836406
Action|Adventure                       3.739804
Action|Adventure|Animation             4.125000
Action|Adventure|Animation|Children    3.550000
                                         ...   
Sci-Fi|Thriller|IMAX                   3.500000
Thriller                               3.473430
Thriller|War                           3.500000
War                                    3.613636
Western                                3.500000
Name: rating, Length: 938, dtype: float64

Total Movies at Genre Level:
genres
(no genres listed)                       7
Action                                  48
Action|Adventure                        32
Action|Adventure|Animation               3
Action|Adventure|Animation|Children      1
                                      ... 
Sci-Fi|Thriller|IMAX                     1
Thriller                               106
Thriller|W

Unique Genres Considered

In [12]:
unique_genres = movies['genres'].str.split('|').explode().unique()
print("\nUnique Genres Considered:")
print(unique_genres)



Unique Genres Considered:
['Adventure' 'Animation' 'Children' 'Comedy' 'Fantasy' 'Romance' 'Drama'
 'Action' 'Crime' 'Thriller' 'Horror' 'Mystery' 'Sci-Fi' 'IMAX' 'War'
 'Musical' 'Documentary' 'Western' 'Film-Noir' '(no genres listed)']


Step 3: Design the 3 Different Recommendation Modules

Popularity-Based Recommender System

In [21]:
def popularity_recommender(genre, min_reviews, num_recommendations):
    # Filter movies based on genre and minimum reviews threshold
    genre_movies = movie_ratings[movie_ratings['genres'].str.contains(genre, case=False)]
    genre_movies = genre_movies.groupby('title').filter(lambda x: len(x) >= min_reviews)
    
    # Calculate average rating and number of reviews
    genre_stats = genre_movies.groupby('title').agg({'rating': 'mean', 'movieId': 'count'})
    genre_stats = genre_stats.rename(columns={'rating': 'AverageMovieRating', 'movieId': 'NumReviews'})
    
    # Sort by average rating in descending order
    genre_stats = genre_stats.sort_values(by=['AverageMovieRating'], ascending=False)
    
    # Display top N recommendations
    top_recommendations = genre_stats.head(num_recommendations)
    print(top_recommendations)
    
# Example usage
popularity_recommender('Comedy', 100, 5)


                                        AverageMovieRating  NumReviews
title                                                                 
Monty Python and the Holy Grail (1975)            4.301948         154
Fargo (1996)                                      4.271144         201
Princess Bride, The (1987)                        4.163743         171
Pulp Fiction (1994)                               4.160000         325
Forrest Gump (1994)                               4.138264         311


Content-Based Recommender System

In [22]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel


In [25]:
def content_based_recommender(movie_title, num_recommendations):
    # Check if the movie exists in the dataset
    if movie_title not in movies['title'].values:
        print(f"Movie '{movie_title}' not found in the dataset.")
        return

    # TF-IDF vectorizer
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')

    # Compute TF-IDF matrix
    tfidf_matrix = tfidf_vectorizer.fit_transform(movies['genres'])

    # Compute cosine similarity
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

    # Get index of the movie
    movie_index = movies[movies['title'] == movie_title].index

    # Get similarity scores
    sim_scores = list(enumerate(cosine_sim[movie_index][0]))

    # Sort movies based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Display top N recommendations
    top_recommendations = sim_scores[1:num_recommendations + 1]
    recommended_movies = [movies['title'].iloc[i[0]] for i in top_recommendations]
    print(recommended_movies)

# Example usage
content_based_recommender('Toy Story', 5)


Movie 'Toy Story' not found in the dataset.


Collaborative-Based Recommender System

In [26]:
from sklearn.metrics.pairwise import cosine_similarity


In [27]:
def collaborative_recommender(user_id, num_recommendations, k_similar_users):
    # Pivot ratings table
    user_movie_ratings = ratings.pivot_table(index='userId', columns='movieId', values='rating')
    
    # Fill NaN values with 0
    user_movie_ratings = user_movie_ratings.fillna(0)
    
    # Get user ratings
    user_ratings = user_movie_ratings.loc[user_id].values.reshape(1, -1)
    
    # Compute cosine similarity
    similarities = cosine_similarity(user_movie_ratings.values, user_ratings)
    
    # Get indices of k most similar users
    similar_users = similarities.flatten().argsort()[-k_similar_users-1:-1][::-1]
    
    # Get movies rated by similar users
    recommended_movies = user_movie_ratings.iloc[similar_users].sum().sort_values(ascending=False)
    
    # Exclude movies already rated by the target user
    recommended_movies = recommended_movies[user_movie_ratings.loc[user_id] == 0]
    
    # Display top N recommendations
    top_recommendations = recommended_movies.head(num_recommendations)
    print(top_recommendations)

# Example usage
collaborative_recommender(1, 5, 100)

movieId
1291    257.0
1       242.0
1036    241.0
1200    235.5
4226    232.0
dtype: float64


In [28]:
!pip install ipywidgets






Additional/Optional: Create a GUI interface using Python libraries (ipywidgetsetc.) to play around with there commendation module


In [29]:
import ipywidgets as widgets
from IPython.display import display


In [30]:
# Function to handle button click
def on_button_click(b):
    movie_title = movie_title_input.value
    num_recommendations = int(num_recommendations_input.value)
    
    # Call the content-based recommender function
    content_based_recommender(movie_title, num_recommendations)

In [31]:
# Create input widgets
movie_title_input = widgets.Text(value='Toy Story', description='Movie Title:')
num_recommendations_input = widgets.IntText(value=5, description='Num Recommendations:')
submit_button = widgets.Button(description='Submit')
submit_button.on_click(on_button_click)


In [32]:
# Display the widgets
display(movie_title_input, num_recommendations_input, submit_button)

Text(value='Toy Story', description='Movie Title:')

IntText(value=5, description='Num Recommendations:')

Button(description='Submit', style=ButtonStyle())

['Four Rooms (1995)', 'Ace Ventura: When Nature Calls (1995)', 'Bio-Dome (1996)', 'Friday (1995)', 'Black Sheep (1996)']
['Four Rooms (1995)', 'Ace Ventura: When Nature Calls (1995)', 'Bio-Dome (1996)', 'Friday (1995)', 'Black Sheep (1996)']
['Four Rooms (1995)', 'Ace Ventura: When Nature Calls (1995)', 'Bio-Dome (1996)', 'Friday (1995)', 'Black Sheep (1996)']
['Four Rooms (1995)', 'Ace Ventura: When Nature Calls (1995)', 'Bio-Dome (1996)', 'Friday (1995)', 'Black Sheep (1996)']
['American President, The (1995)', 'Mighty Aphrodite (1995)', 'Postman, The (Postino, Il) (1994)', 'Beautiful Girls (1996)']
['American President, The (1995)', 'Mighty Aphrodite (1995)', 'Postman, The (Postino, Il) (1994)', 'Beautiful Girls (1996)']
