##Movie recommender system
Dataset: https://grouplens.org/datasets/movielens/

In [1]:
import numpy as np
import pandas as pd
import ipywidgets as widgets
from IPython.display import Javascript, display
from surprise import SVD

from CFRecommenderSystem.CFMovieSystem import CFMovieSystem
from CFRecommenderSystem.CFData import CFData
from CFRecommenderSystem.CFModel import CFModel

df_movie = pd.read_csv(r'E:\Machine Learning lectures\project\New folder\CFRecommenderSystem-master\CFRecommenderSystem-master\ml-latest-small\movies.csv',encoding="ISO-8859-1")
df_rating = pd.read_csv(r'E:\Machine Learning lectures\project\New folder\CFRecommenderSystem-master\CFRecommenderSystem-master\ml-latest-small\ratings.csv',encoding="ISO-8859-1")
df_link = pd.read_csv(r'E:\Machine Learning lectures\project\New folder\CFRecommenderSystem-master\CFRecommenderSystem-master\ml-latest-small\links.csv',encoding="ISO-8859-1")

my_tmdb_key = 'e83e104d60a8e051e81b74de405d7bb7'

In [2]:
df_movie.head(3)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance


In [3]:
df_rating.head(3)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224


# Prepare the dataset for training

In [4]:
# Load rating data to CFData class
df_data = df_rating[['userId','movieId', 'rating',]]
df_data = df_data.rename(index=str, columns={'userId': 'userID', 'movieId': 'itemID', 'rating': 'rating'})
df_id_name_table = df_movie[['movieId', 'title']]
df_id_name_table = df_id_name_table.rename(index=str, columns={'movieId':'itemID', 'title':'itemName'})

# Model training for collaborative filtering by SVD

In [5]:

data_movie = CFData(df_data, test_ratio=None, df_id_name_table=df_id_name_table, rating_scale=(0.5, 5))
model_svd = CFModel(SVD, lr_all=0.005, reg_all=0.4, n_epochs=30)
model_svd.fit(data_movie.trainset)
cf_movie_sys = CFMovieSystem(data_movie, model_svd)

# Movie recommendation based on user-selected movie

In [6]:
def get_most_rated_movie(df_movie_in, df_rating_in, n_output):
    movie_list_tmp1 = pd.merge(df_movie, df_rating, on='movieId', how='inner').groupby('title').count()   
    movie_list_top_k = movie_list_tmp1['rating'].sort_values(ascending=False).index[:n_output]
    return movie_list_top_k
def run_next_cell(ev):
    display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, \
                      IPython.notebook.get_selected_index()+2)'))

movie_list_top_100 = get_most_rated_movie(df_movie, df_rating, 700)
selected_movie_name = widgets.Dropdown(options=movie_list_top_100, value='Forrest Gump (1994)', 
                                       description='Select a movie:')
button = widgets.Button(description="Top-10 movies recommended to you", layout=widgets.Layout(width='40%', height='40px'))
button.on_click(run_next_cell)
widgets.VBox([selected_movie_name, button])

VBox(children=(Dropdown(description='Select a movie:', options=('Forrest Gump (1994)', 'Shawshank Redemption, …

<IPython.core.display.Javascript object>

In [8]:
# Obtain input movie name
movie_name = selected_movie_name.value
if my_tmdb_key:
    
    cf_movie_sys.show_recommended_movies(movie_name, k=10, tmdb_key=my_tmdb_key, df_ml_imdb_id=df_link)
else:
     
    cf_movie_sys.show_recommended_movies(movie_name, k=10)

Movie you select is 'Lord of the Rings: The Fellowship of the Ring, The (2001)'


Based on 'Lord of the Rings: The Fellowship of the Ring, The (2001)', we recommend 10 movies below:


['Lord of the Rings: The Return of the King, The (2003)', 'Lord of the Rings: The Two Towers, The (2002)', 'Maverick (1994)', "Puppet Master III: Toulon's Revenge (1991)", 'Company of Wolves, The (1984)', 'Mouse That Roared, The (1959)', 'Dangerous Liaisons (1988)', 'Glass House, The (2001)', 'Phantom Tollbooth, The (1970)', 'Kentucky Fried Movie, The (1977)']
