# Rating based simple recommender

This recommender assumes that ratings is the only parameter needed to decide whether a movie should be recommended to a user. It uses the IMDB formula for weighted rating of movie to prepare movie charts for recommendation.

In [33]:
import pandas as pd
import numpy as np
import dask.dataframe as dd
import dask.array as da
from ast import literal_eval
import h5py
from BaseException.Exception.LookupError import KeyError
from dask_ml.model_selection import train_test_split

In [29]:
data = dd.read_csv('../ratings.csv').set_index('movieId')
metadata = pd.read_csv('../movies_metadata.csv', dtype={'budget':'object',
                                                       'id': 'object',
                                                       'popularity': 'object',
                                                       'revenue': 'float64',
                                                        'vote_count': 'float64'})
data.head()

Unnamed: 0_level_0,userId,rating,timestamp
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,25328,1.0,858444862
1,196279,5.0,1094027044
1,196280,3.5,1115763285
1,192873,2.0,1165268448
1,1236,5.0,852832134


In [30]:
metadata.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [31]:
metadata.dtypes

adult                     object
belongs_to_collection     object
budget                    object
genres                    object
homepage                  object
id                        object
imdb_id                   object
original_language         object
original_title            object
overview                  object
popularity                object
poster_path               object
production_companies      object
production_countries      object
release_date              object
revenue                  float64
runtime                  float64
spoken_languages          object
status                    object
tagline                   object
title                     object
video                     object
vote_average             float64
vote_count               float64
dtype: object

### Computing average rating for each movie

In [5]:
mean_ratings = data.groupby('movieId')['rating'].mean().compute()

In [55]:
mean_ratings

movieId
1         3.888157
2         3.236953
3         3.175550
4         2.875713
5         3.079565
            ...   
176267    4.000000
176269    3.500000
176271    5.000000
176273    1.000000
176275    3.000000
Name: rating, Length: 45115, dtype: float64

In [11]:
C = mean_ratings.mean()
C

3.061294172517874

In [14]:
vote_count = metadata[metadata['vote_count'].notnull()]['vote_count'].astype('int')
m = vote_count.quantile(0.95)
m

434.0

Since recommender system is going to be genre based, genre values are cleaned up:

In [34]:
metadata['genres'] = metadata['genres'].fillna('[]').apply(literal_eval).apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])

In [35]:
metadata.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[Animation, Comedy, Family]",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[Adventure, Fantasy, Family]",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[Romance, Comedy]",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[Comedy, Drama, Romance]",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,[Comedy],,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [38]:
chart_movies = metadata[(metadata['vote_count'] > m - 1) & (metadata['vote_count'].notnull())][['title', 'vote_count', 'genres', 'popularity', 'id']]
chart_movies.head()

Unnamed: 0,title,vote_count,genres,popularity,id
0,Toy Story,5415.0,"[Animation, Comedy, Family]",21.946943,862
1,Jumanji,2413.0,"[Adventure, Fantasy, Family]",17.015539,8844
5,Heat,1886.0,"[Action, Crime, Drama, Thriller]",17.924927,949
9,GoldenEye,1194.0,"[Adventure, Action, Thriller]",14.686036,710
15,Casino,1343.0,"[Drama, Crime]",10.137389,524


In [56]:
def weighted_rating(movie):
    v = movie['vote_count']
    try:
        R = mean_ratings.loc[int(movie['id'])]
    except KeyError as e:
        R = 0
    return (v*R + m*C)/(v + m)

In [57]:
chart_movies['Weighted_Rating'] = chart_movies.apply(weighted_rating, axis = 1)

In [58]:
top_charts = chart_movies.sort_values('Weighted_Rating', ascending = False).head(250)

In [60]:
top_charts.head()

Unnamed: 0,title,vote_count,genres,popularity,id,Weighted_Rating
10826,Ice Age: The Meltdown,3034.0,"[Animation, Family, Comedy, Adventure]",16.646029,950,4.013827
2647,The Sixth Sense,3223.0,"[Mystery, Thriller, Drama]",18.449169,745,3.987843
6388,Terminator 3: Rise of the Machines,2177.0,"[Action, Thriller, Science Fiction]",20.818907,296,3.985691
5325,Men in Black II,3188.0,"[Action, Adventure, Comedy, Science Fiction]",16.775716,608,3.980246
23753,Guardians of the Galaxy,10014.0,"[Action, Science Fiction, Adventure]",53.291601,118340,3.961007


In [72]:
top_charts.to_csv('../FIRST-top_charts.csv', index = False)