In [6]:
import pandas as pd

In [7]:
# Load dataset paths
data_folder = "ml-100k/"
u_data_path = data_folder + "u.data"
u_item_path = data_folder + "u.item"
u_genre_path = data_folder + "u.genre"

In [8]:
# Load ratings
df_ratings = pd.read_csv(u_data_path, sep="\t", names=["user_id", "movie_id", "rating", "timestamp"], usecols=[1, 2])

In [9]:
# Load genres
df_genres = pd.read_csv(u_genre_path, sep="|", names=["genre", "genre_id"], index_col="genre_id")

In [10]:
# Load movies with genres
df_movies = pd.read_csv(u_item_path, sep="|", names=["movie_id", "title", "release_date", "empty", "imdb_url"] + list(df_genres["genre"]), encoding="latin-1")
df_movies = df_movies.drop(columns=["empty", "imdb_url"])

In [11]:
# Calculate popularity
df_popularity = df_ratings.groupby("movie_id").size().reset_index(name="num_ratings")
df_movies = df_movies.merge(df_popularity, on="movie_id", how="left").fillna(0)

In [12]:
# Extract top 5 movies per genre
top_movies = {}
for genre in df_genres["genre"]:
    if genre != "unknown":
        genre_movies = df_movies[df_movies[genre] == 1]
        top_movies[genre] = genre_movies.nlargest(5, "num_ratings")[["title", "num_ratings"]]

In [13]:
# Display top movies per genre
for genre, movies in top_movies.items():
    print(f"Top 5 movies for genre: {genre}")
    print(movies, "\n")

Top 5 movies for genre: Action
                              title  num_ratings
49                 Star Wars (1977)          583
180       Return of the Jedi (1983)          507
299            Air Force One (1997)          431
120   Independence Day (ID4) (1996)          429
173  Raiders of the Lost Ark (1981)          420 

Top 5 movies for genre: Adventure
                               title  num_ratings
49                  Star Wars (1977)          583
180        Return of the Jedi (1983)          507
173   Raiders of the Lost Ark (1981)          420
116                 Rock, The (1996)          378
171  Empire Strikes Back, The (1980)          367 

Top 5 movies for genre: Animation
                           title  num_ratings
0               Toy Story (1995)          452
70         Lion King, The (1994)          220
94                Aladdin (1992)          219
587  Beauty and the Beast (1991)          202
431              Fantasia (1940)          174 

Top 5 movies for genre: C