In [80]:
#Simple Recommenders simple recommenders are basic systems that recommends the top items based on 
#a certain metric or score.

#Conisdering Movie rating as a metric to recomment the top movies:
#However, it does not take into consideration the popularity of a movie.
#Thus, a movie with a rating of 9 from 5 voters will be considered 'better' than a movie with a rating of
#8.8 from 10,000 voters i.e this metric  favors movies with smaller number of voters with skewed 
#or extremely high ratings.Since as the number of voters increase, the rating of a movie regularizes
#and approaches towards a value that reflects the movie's quality. 

#weighted rating : To avoid the above shortcomings, we'll use the weighted raing formula as a metric/score which is
# often used by IMDB. Mathematically, it is represented as follows:
#Weighted Rating (WR) = (v/v+m).R+(m/v+m).C
#where
#v is the number of votes for the movie;
#m is the minimum votes required to be listed in the chart;
#R is the average rating of the movie
#C is the mean vote across the whole report



#There is no right value for m. You can view it as a preliminary negative filter that ignores movies 
#which have less than a certain number of votes. The selectivity of your filter is up to your discretion.





In [81]:
import pandas as pd 
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [82]:
#Reading movies data
movies=pd.read_csv("movies.csv")

In [83]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [84]:
ratings=pd.read_csv("ratings.csv")

In [85]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [86]:
#C is the mean vote across the whole report
c=ratings.rating.mean()

In [112]:
#v is the number of votes for the movie
v=ratings.groupby('movieId').size()
v.head()


Int64Index([     1,      2,      3,      4,      5,      6,      7,      8,
                 9,     10,
            ...
            193565, 193567, 193571, 193573, 193579, 193581, 193583, 193585,
            193587, 193609],
           dtype='int64', name='movieId', length=9724)

In [88]:
#m is the minimum votes required to be listed in the chart. Let m be in the 90th percentile.
m=v.quantile(0.90)
print(m)

27.0


In [89]:
#R is the average rating of the movie
r=ratings.groupby('movieId')['rating'].mean()
r.head()

movieId
1    3.920930
2    3.431818
3    3.259615
4    2.357143
5    3.071429
Name: rating, dtype: float64

In [125]:
def weighted_rating(x,averageV=r,countV=v, m=m, C=c):
    
    v = countV[x.movieId]
    R = averageV[x.movieId]
    # Calculation based on the IMDB formula
    return (v/(v+m) * R) + (m/(m+v) * C)


In [127]:

movies['score']=movies.apply(weighted_rating,axis=1)

In [132]:
movies.head()

Unnamed: 0,movieId,title,genres,score
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.874141
1,2,Jumanji (1995),Adventure|Children|Fantasy,3.445562
2,3,Grumpier Old Men (1995),Comedy|Romance,3.342304
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,3.265942
4,5,Father of the Bride Part II (1995),Comedy,3.224237


In [135]:
movies.sort_values(['score'],ascending=False,inplace=True)

In [137]:
movies.head(10)

Unnamed: 0,movieId,title,genres,score
277,318,"Shawshank Redemption, The (1994)",Crime|Drama,4.356227
659,858,"Godfather, The (1972)",Crime|Drama,4.191973
2226,2959,Fight Club (1999),Action|Crime|Drama|Thriller,4.187927
224,260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi,4.160223
46,50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller,4.151697
461,527,Schindler's List (1993),Drama|War,4.145919
257,296,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,4.140844
898,1196,Star Wars: Episode V - The Empire Strikes Back...,Action|Adventure|Sci-Fi,4.13463
1939,2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller,4.131285
922,1221,"Godfather: Part II, The (1974)",Crime|Drama,4.128475
