In [1]:
# importing libraries
import pandas as pd
import numpy as np

In [2]:
# load ratings data
ratings = pd.read_csv("ratings.csv")

In [3]:
# drop timestamp (not needed)
ratings = ratings.drop(columns=['timestamp'])

In [4]:
# load movies data
movies = pd.read_csv("movies.csv")

In [8]:
# merge ratings with movies
data = pd.merge(ratings, movies)

In [9]:
# select required columns
data = data[['userId', 'movieId', 'rating', 'title','genres']]

In [14]:
data.head(3)

Unnamed: 0,userId,movieId,rating,title,genres
0,1,1,4.0,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]"
1,1,3,4.0,Grumpier Old Men (1995),"[Comedy, Romance]"
2,1,6,4.0,Heat (1995),"[Action, Crime, Thriller]"


In [11]:
data["genres"] = data["genres"].str.split("|")
data_exploded = data.explode("genres")

In [23]:
data_exploded = data_exploded[data_exploded["genres"] != "(no genres listed)"]

In [24]:
data_exploded.head(10)

Unnamed: 0,userId,movieId,rating,title,genres
0,1,1,4.0,Toy Story (1995),Adventure
0,1,1,4.0,Toy Story (1995),Animation
0,1,1,4.0,Toy Story (1995),Children
0,1,1,4.0,Toy Story (1995),Comedy
0,1,1,4.0,Toy Story (1995),Fantasy
1,1,3,4.0,Grumpier Old Men (1995),Comedy
1,1,3,4.0,Grumpier Old Men (1995),Romance
2,1,6,4.0,Heat (1995),Action
2,1,6,4.0,Heat (1995),Crime
2,1,6,4.0,Heat (1995),Thriller


In [25]:
# Group The Data
grouped_data = data_exploded.groupby(["genres", "movieId", "title"])

In [26]:
#Calculate Average Rating and Count
popularity = grouped_data["rating"].agg(avg_rating="mean",rating_count="count")

In [27]:
#Reset Index 
#After groupby, the result is hard to read. So we convert it back to a normal table.
popularity = popularity.reset_index()

In [28]:
popularity.head()

Unnamed: 0,genres,movieId,title,avg_rating,rating_count
0,Action,6,Heat (1995),3.946078,102
1,Action,9,Sudden Death (1995),3.125,16
2,Action,10,GoldenEye (1995),3.496212,132
3,Action,15,Cutthroat Island (1995),3.0,13
4,Action,20,Money Train (1995),2.5,15


In [30]:
popularity_sorted = popularity.sort_values(
    by=["genres", "rating_count", "avg_rating"],
    ascending=[True, False, False]
)

top_5_per_genre = popularity_sorted.groupby("genres").head(5)
top_5_per_genre

Unnamed: 0,genres,movieId,title,avg_rating,rating_count
310,Action,2571,"Matrix, The (1999)",4.192446,278
36,Action,260,Star Wars: Episode IV - A New Hope (1977),4.231076,251
71,Action,480,Jurassic Park (1993),3.750000,238
16,Action,110,Braveheart (1995),4.031646,237
90,Action,589,Terminator 2: Judgment Day (1991),3.970982,224
...,...,...,...,...,...
21856,Western,590,Dances with Wolves (1990),3.835366,164
21874,Western,2012,Back to the Future Part III (1990),3.369318,88
21849,Western,368,Maverick (1994),3.500000,74
21863,Western,1201,"Good, the Bad and the Ugly, The (Buono, il bru...",4.145833,72


In [31]:
def get_top_5_by_genre(genre_name):
    # Filter only the given genre
    genre_data = popularity[popularity["genres"] == genre_name]
    
    # Sort by popularity (rating count first, then avg rating)
    genre_data_sorted = genre_data.sort_values(
        by=["rating_count", "avg_rating"],
        ascending=False
    )
    
    # Select top 5 movies
    top_5 = genre_data_sorted.head(5)
    
    return top_5


In [32]:
get_top_5_by_genre("Comedy")

Unnamed: 0,genres,movieId,title,avg_rating,rating_count
4479,Comedy,356,Forrest Gump (1994),4.164134,329
4458,Comedy,296,Pulp Fiction (1994),4.197068,307
4364,Comedy,1,Toy Story (1995),3.92093,215
4558,Comedy,588,Aladdin (1992),3.79235,183
4561,Comedy,608,Fargo (1996),4.116022,181


In [33]:
get_top_5_by_genre("Action")

Unnamed: 0,genres,movieId,title,avg_rating,rating_count
310,Action,2571,"Matrix, The (1999)",4.192446,278
36,Action,260,Star Wars: Episode IV - A New Hope (1977),4.231076,251
71,Action,480,Jurassic Park (1993),3.75,238
16,Action,110,Braveheart (1995),4.031646,237
90,Action,589,Terminator 2: Judgment Day (1991),3.970982,224


In [34]:
genresNames = popularity["genres"].unique()
genresNames

array(['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',
       'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'IMAX',
       'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War',
       'Western'], dtype=object)

In [35]:
get_top_5_by_genre("Fantasy")

Unnamed: 0,genres,movieId,title,avg_rating,rating_count
14100,Fantasy,1,Toy Story (1995),3.92093,215
14328,Fantasy,4993,"Lord of the Rings: The Fellowship of the Ring,...",4.106061,198
14357,Fantasy,5952,"Lord of the Rings: The Two Towers, The (2002)",4.021277,188
14397,Fantasy,7153,"Lord of the Rings: The Return of the King, The...",4.118919,185
14303,Fantasy,4306,Shrek (2001),3.867647,170
