# A simple recommender system using popularity (non-personalized)
A simple baseline to start with if we don't have any user data.

In [20]:
import os 
import pandas as pd 

In [21]:
def load_data():
    ratings_df = pd.read_csv("../data/ml-latest-small/ratings.csv")
    movies_df = pd.read_csv("../data/ml-latest-small/movies.csv")
    return ratings_df, movies_df

ratings_df, movies_df = load_data()

In [22]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [23]:
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [46]:
def calculate_popularity(df, ratings_df, damping_factor=10):
    num_ratings = ratings_df.groupby("movieId")["rating"].count() # How many ratings for each movie id
    mean_ratings = ratings_df.groupby("movieId")["rating"].mean()
    sum_ratings = ratings_df.groupby("movieId")["rating"].sum()
    global_mean = ratings_df["rating"].mean()
    damped_numerator = sum_ratings + damping_factor * global_mean
    damped_denominator = num_ratings + damping_factor
    damped_mean_rating = damped_numerator / damped_denominator

    movies_df["num_ratings"] = movies_df["movieId"].map(num_ratings)    
    movies_df["mean_rating"] = movies_df["movieId"].map(mean_ratings)
    movies_df["sum_ratings"] = movies_df["movieId"].map(sum_ratings)
    movies_df["damped_mean_rating"] = movies_df["movieId"].map(damped_mean_rating)
    return df

In [47]:
movies_df = calculate_popularity(movies_df, ratings_df)

In [48]:
movies_df.sort_values(by="num_ratings", ascending=False).head(10) # Most popular based on the number of ratings

Unnamed: 0,movieId,title,genres,num_ratings,mean_rating,sum_ratings,damped_mean_rating
314,356,Forrest Gump (1994),Comedy|Drama|Romance|War,329.0,4.164134,1370.0,4.144589
277,318,"Shawshank Redemption, The (1994)",Crime|Drama,317.0,4.429022,1404.0,4.400659
257,296,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,307.0,4.197068,1288.5,4.175128
510,593,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller,279.0,4.16129,1161.0,4.138462
1939,2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller,278.0,4.192446,1165.5,4.168457
224,260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi,251.0,4.231076,1062.0,4.203125
418,480,Jurassic Park (1993),Action|Adventure|Sci-Fi|Thriller,238.0,3.75,892.5,3.739982
97,110,Braveheart (1995),Action|Drama|War,237.0,4.031646,955.5,4.010184
507,589,Terminator 2: Judgment Day (1991),Action|Sci-Fi,224.0,3.970982,889.5,3.950921
461,527,Schindler's List (1993),Drama|War,220.0,4.225,929.5,4.193546


In [49]:
movies_df.sort_values(by="mean_rating", ascending=False).head(10) # Most popular based on the mean rating

Unnamed: 0,movieId,title,genres,num_ratings,mean_rating,sum_ratings,damped_mean_rating
7656,88448,Paper Birds (Pájaros de papel) (2010),Comedy|Drama,1.0,5.0,5.0,3.637779
8107,100556,"Act of Killing, The (2012)",Documentary,1.0,5.0,5.0,3.637779
9083,143031,Jump In! (2007),Comedy|Drama|Romance,1.0,5.0,5.0,3.637779
9094,143511,Human (2015),Documentary,1.0,5.0,5.0,3.637779
9096,143559,L.A. Slasher (2015),Comedy|Crime|Fantasy,1.0,5.0,5.0,3.637779
4251,6201,Lady Jane (1986),Drama|Romance,1.0,5.0,5.0,3.637779
8154,102217,Bill Hicks: Revelations (1993),Comedy,1.0,5.0,5.0,3.637779
8148,102084,Justice League: Doom (2012),Action|Animation|Fantasy,1.0,5.0,5.0,3.637779
4246,6192,Open Hearts (Elsker dig for evigt) (2002),Romance,1.0,5.0,5.0,3.637779
9122,145994,Formula of Love (1984),Comedy,1.0,5.0,5.0,3.637779


**Problem:** Movies with high ratings from only one or a few reviews will be presented first.

**Solution:** Use the damped mean.

In [50]:
movies_df.sort_values(by="damped_mean_rating", ascending=False).head(10)

Unnamed: 0,movieId,title,genres,num_ratings,mean_rating,sum_ratings,damped_mean_rating
277,318,"Shawshank Redemption, The (1994)",Crime|Drama,317.0,4.429022,1404.0,4.400659
659,858,"Godfather, The (1972)",Crime|Drama,192.0,4.289062,823.5,4.250077
2226,2959,Fight Club (1999),Action|Crime|Drama|Thriller,218.0,4.272936,931.5,4.239103
922,1221,"Godfather: Part II, The (1974)",Crime|Drama,129.0,4.25969,549.5,4.205148
46,50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller,204.0,4.237745,864.5,4.203344
224,260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi,251.0,4.231076,1062.0,4.203125
602,750,Dr. Strangelove or: How I Learned to Stop Worr...,Comedy|War,97.0,4.268041,414.0,4.196407
914,1213,Goodfellas (1990),Crime|Drama,126.0,4.25,535.5,4.194967
461,527,Schindler's List (1993),Drama|War,220.0,4.225,929.5,4.193546
6710,58559,"Dark Knight, The (2008)",Action|Crime|Drama|IMAX,149.0,4.238255,631.5,4.191922
