In [1]:
import pandas as pd

r_cols=["user_id", "movie_id", "rating"]
ratings = pd.read_csv("ml-100k/u.data", sep="\t", names=r_cols, usecols=range(3))
ratings.head()

Unnamed: 0,user_id,movie_id,rating
0,0,50,5
1,0,172,5
2,0,133,1
3,196,242,3
4,186,302,3


In [2]:
m_cols = ['movie_id', 'title']
movies = pd.read_csv('ml-100k/u.item', sep='|', names=m_cols, encoding="iso-8859-1", usecols=range(2))
movies.head()

Unnamed: 0,movie_id,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [3]:
# We will combine the names of the movies with the ratings, so let's combine their ids.

In [4]:
ratings=pd.merge(movies,ratings)
ratings.head()

Unnamed: 0,movie_id,title,user_id,rating
0,1,Toy Story (1995),308,4
1,1,Toy Story (1995),287,5
2,1,Toy Story (1995),148,4
3,1,Toy Story (1995),280,4
4,1,Toy Story (1995),66,3


In [5]:
ratings.title.nunique()   , ratings.user_id.nunique()

(1664, 944)

In [6]:
import numpy as np

In [7]:
movie_grouped=ratings.groupby("title").agg({"rating":[np.size,np.sum,np.mean]})
# size--> How many people commented
# sum--> Sum of comments
# mean--> Average of ratings 

In [8]:
movie_grouped

Unnamed: 0_level_0,rating,rating,rating
Unnamed: 0_level_1,size,sum,mean
title,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
'Til There Was You (1997),9,21,2.333333
1-900 (1994),5,13,2.600000
101 Dalmatians (1996),109,317,2.908257
12 Angry Men (1957),125,543,4.344000
187 (1997),41,124,3.024390
...,...,...,...
Young Guns II (1990),44,122,2.772727
"Young Poisoner's Handbook, The (1995)",41,137,3.341463
Zeus and Roxanne (1997),6,13,2.166667
unknown,9,31,3.444444


### <font color='red'> How can I recommend the best movie?
### It may be the maximum of the average, but this time, for example, 1 person may have given 5 ratings to 1 movie.
### Therefore, we need to evaluate the number of people who comment.

In [9]:
popular_movies=movie_grouped.sort_values(("rating","mean"),ascending=False) # Not like that

In [10]:
popular_movies

Unnamed: 0_level_0,rating,rating,rating
Unnamed: 0_level_1,size,sum,mean
title,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
They Made Me a Criminal (1939),1,5,5.0
Marlene Dietrich: Shadow and Light (1996),1,5,5.0
"Saint of Fort Washington, The (1993)",2,10,5.0
Someone Else's America (1995),1,5,5.0
Star Kid (1997),3,15,5.0
...,...,...,...
"Eye of Vichy, The (Oeil de Vichy, L') (1993)",1,1,1.0
King of New York (1990),1,1,1.0
Touki Bouki (Journey of the Hyena) (1973),1,1,1.0
"Bloody Child, The (1996)",1,1,1.0


In [11]:
grouped_sum=movie_grouped["rating"]["sum"].sum() 
grouped_sum

352997

In [12]:
popular_movies["percentage"]=popular_movies["rating","sum"].div(grouped_sum)*100 
# We'll get a percentage of ratings.

In [13]:
popular_movies.head()

Unnamed: 0_level_0,rating,rating,rating,percentage
Unnamed: 0_level_1,size,sum,mean,Unnamed: 4_level_1
title,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
They Made Me a Criminal (1939),1,5,5.0,0.001416
Marlene Dietrich: Shadow and Light (1996),1,5,5.0,0.001416
"Saint of Fort Washington, The (1993)",2,10,5.0,0.002833
Someone Else's America (1995),1,5,5.0,0.001416
Star Kid (1997),3,15,5.0,0.004249


In [14]:
popular_movies.sort_values(("percentage"), ascending=False)

Unnamed: 0_level_0,rating,rating,rating,percentage
Unnamed: 0_level_1,size,sum,mean,Unnamed: 4_level_1
title,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Star Wars (1977),584,2546,4.359589,0.721253
Fargo (1996),508,2111,4.155512,0.598022
Return of the Jedi (1983),507,2032,4.007890,0.575642
Contact (1997),509,1936,3.803536,0.548447
Raiders of the Lost Ark (1981),420,1786,4.252381,0.505953
...,...,...,...,...
Getting Away With Murder (1996),1,1,1.000000,0.000283
The Courtyard (1995),1,1,1.000000,0.000283
"Promise, The (Versprechen, Das) (1994)",1,1,1.000000,0.000283
JLG/JLG - autoportrait de décembre (1994),1,1,1.000000,0.000283


In [15]:
popular_movies=popular_movies.sort_values(("percentage"), ascending=False)

In [16]:
popular_movies["Rank"]=popular_movies["percentage"].rank(ascending=False,method="min")
# Add Rank column

In [17]:
popular_movies

Unnamed: 0_level_0,rating,rating,rating,percentage,Rank
Unnamed: 0_level_1,size,sum,mean,Unnamed: 4_level_1,Unnamed: 5_level_1
title,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Star Wars (1977),584,2546,4.359589,0.721253,1.0
Fargo (1996),508,2111,4.155512,0.598022,2.0
Return of the Jedi (1983),507,2032,4.007890,0.575642,3.0
Contact (1997),509,1936,3.803536,0.548447,4.0
Raiders of the Lost Ark (1981),420,1786,4.252381,0.505953,5.0
...,...,...,...,...,...
Getting Away With Murder (1996),1,1,1.000000,0.000283,1608.0
The Courtyard (1995),1,1,1.000000,0.000283,1608.0
"Promise, The (Versprechen, Das) (1994)",1,1,1.000000,0.000283,1608.0
JLG/JLG - autoportrait de décembre (1994),1,1,1.000000,0.000283,1608.0


# <font color='red'> If the person has watched Star Wars, I can recommend Fargo.