In [None]:
import pandas as pd

ratings = pd.read_csv("row_data/MovieLens 20M Dataset/rating.csv")
movies = pd.read_csv("row_data/MovieLens 20M Dataset/movie.csv")

ratings_movies = pd.merge(ratings, movies, on="movieId", how="inner")

metadata = pd.read_csv("row_data/The Movies Dataset/movies_metadata.csv", low_memory=False)

In [8]:
ratings_movies

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,2,3.5,2005-04-02 23:53:47,Jumanji (1995),Adventure|Children|Fantasy
1,1,29,3.5,2005-04-02 23:31:16,"City of Lost Children, The (Cité des enfants p...",Adventure|Drama|Fantasy|Mystery|Sci-Fi
2,1,32,3.5,2005-04-02 23:33:39,Twelve Monkeys (a.k.a. 12 Monkeys) (1995),Mystery|Sci-Fi|Thriller
3,1,47,3.5,2005-04-02 23:32:07,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
4,1,50,3.5,2005-04-02 23:29:40,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
...,...,...,...,...,...,...
20000258,138493,68954,4.5,2009-11-13 15:42:00,Up (2009),Adventure|Animation|Children|Drama
20000259,138493,69526,4.5,2009-12-03 18:31:48,Transformers: Revenge of the Fallen (2009),Action|Adventure|Sci-Fi|IMAX
20000260,138493,69644,3.0,2009-12-07 18:10:57,Ice Age: Dawn of the Dinosaurs (2009),Action|Adventure|Animation|Children|Comedy|Rom...
20000261,138493,70286,5.0,2009-11-13 15:42:24,District 9 (2009),Mystery|Sci-Fi|Thriller


In [None]:
# 1. 最多評分的電影
ratings_movies.groupby(["movieId", "title"]).size().reset_index(name="rating_count").sort_values('rating_count',ascending=False).head(10)

# 不用 reset_index()？
# 不會報錯，但是：你拿到的是 Series，不是 DataFrame

Unnamed: 0,movieId,title,rating_count
293,296,Pulp Fiction (1994),67310
352,356,Forrest Gump (1994),66172
315,318,"Shawshank Redemption, The (1994)",63366
587,593,"Silence of the Lambs, The (1991)",63299
476,480,Jurassic Park (1993),59715
257,260,Star Wars: Episode IV - A New Hope (1977),54502
108,110,Braveheart (1995),53769
583,589,Terminator 2: Judgment Day (1991),52244
2486,2571,"Matrix, The (1999)",51334
523,527,Schindler's List (1993),50054


In [None]:
# 2. genre 哪一個出現最多次
df_genre = ratings_movies.copy()
df_genre["genre"] = df_genre["genres"].str.split("|")
df_genre = df_genre.explode("genre")

genre_count_df = df_genre.groupby('genre').size().reset_index(name='genre_count').sort_values('genre_count', ascending=False)
genre_count_df

Unnamed: 0,genre,genre_count
8,Drama,8857853
5,Comedy,7502234
1,Action,5614208
17,Thriller,5313506
2,Adventure,4380351
15,Romance,3802002
6,Crime,3298335
16,Sci-Fi,3150141
9,Fantasy,2111403
4,Children,1669249


In [None]:
# 3. 先過濾，接著計算機於「分類」的最高平均分數
df_without_blank = df_genre[(df_genre["genre"].notna()) & (df_genre["genre"] != "(no genres listed)")]

r = df_without_blank.groupby('genre').agg(
  mean = ("rating", "mean"),
  count = ("rating","count")
).sort_values('mean', ascending=False)

# 下面也是可以，只是就只能一次作一個
# .reset_index(name='mean').sort_values('mean', ascending=False)
r

Unnamed: 0_level_0,mean,count
genre,Unnamed: 1_level_1,Unnamed: 2_level_1
Film-Noir,3.965381,216689
War,3.809531,1048618
Documentary,3.739718,244619
Crime,3.674528,3298335
Drama,3.674296,8857853
Mystery,3.663509,1557282
IMAX,3.655946,492366
Animation,3.617494,1140476
Western,3.570498,423714
Musical,3.558091,870915


In [None]:
# 4. 最高分的，電影「平均分數最高」
# 建議加一個門檻（例如：至少 50 筆評分），避免只有 1–2 筆評分的電影衝到第一名。

movie_stats = (
    ratings_movies.groupby(["movieId", "title"])
      .agg(
          rating_mean = ("rating", "mean"),
          rating_cnt  = ("rating", "count")
      )
      .reset_index()
)

MIN_CNT = 50
top_by_mean = (
    movie_stats[movie_stats["rating_cnt"] >= MIN_CNT]
    .sort_values(["rating_mean", "rating_cnt"], ascending=[False, False])
    .head(10)
)

top_by_mean

In [5]:
metadata

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45461,False,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 10751, 'n...",http://www.imdb.com/title/tt6209470/,439050,tt6209470,fa,رگ خواب,Rising and falling between a man and woman.,...,,0.0,90.0,"[{'iso_639_1': 'fa', 'name': 'فارسی'}]",Released,Rising and falling between a man and woman,Subdue,False,4.0,1.0
45462,False,,0,"[{'id': 18, 'name': 'Drama'}]",,111109,tt2028550,tl,Siglo ng Pagluluwal,An artist struggles to finish his work while a...,...,2011-11-17,0.0,360.0,"[{'iso_639_1': 'tl', 'name': ''}]",Released,,Century of Birthing,False,9.0,3.0
45463,False,,0,"[{'id': 28, 'name': 'Action'}, {'id': 18, 'nam...",,67758,tt0303758,en,Betrayal,"When one of her hits goes wrong, a professiona...",...,2003-08-01,0.0,90.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,A deadly game of wits.,Betrayal,False,3.8,6.0
45464,False,,0,[],,227506,tt0008536,en,Satana likuyushchiy,"In a small town live two brothers, one a minis...",...,1917-10-21,0.0,87.0,[],Released,,Satan Triumphant,False,0.0,0.0


In [6]:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
27273,131254,Kein Bund für's Leben (2007),Comedy
27274,131256,"Feuer, Eis & Dosenbier (2002)",Comedy
27275,131258,The Pirates (2014),Adventure
27276,131260,Rentun Ruusu (2001),(no genres listed)
