# Movie Recommendation System

In [1]:
# My scripts
from Src.scripts.data.tabular_dataset_handler import TabularDatasetHandler
from Src.scripts.approaches.filters.popularity_rankings import PopularityRanking
from Src.scripts.approaches.filters.content_based_filtering import ContentBasedFiltering
from Src.scripts.approaches.collaborative_filters.SVD_based_CF import SVD_Based_CollaborativeFilter
from Src.scripts.approaches.filters.hybrid_filtering import HybridFiltering

# Remove warnings
import warnings; warnings.simplefilter('ignore')

# Data preparation

## Data loading

In [2]:
tdh = TabularDatasetHandler()
tdh.get_movies_df_deepcopy().shape

(45466, 24)

In [3]:
tdh.get_movies_df_deepcopy().head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [4]:
tdh.get_movies_df_deepcopy().dtypes

adult                     object
belongs_to_collection     object
budget                    object
genres                    object
homepage                  object
id                        object
imdb_id                   object
original_language         object
original_title            object
overview                  object
popularity                object
poster_path               object
production_companies      object
production_countries      object
release_date              object
revenue                  float64
runtime                  float64
spoken_languages          object
status                    object
tagline                   object
title                     object
video                     object
vote_average             float64
vote_count               float64
dtype: object

## Data preprocessing

In [5]:
tdh.preprocess_datasets()
tdh.get_movies_df_deepcopy().shape

(45463, 25)

In [6]:
tdh.get_movies_df_deepcopy().head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year
0,False,Toy Story Collection,30000000,"[Animation, Comedy, Family]",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,1995
1,False,,65000000,"[Adventure, Fantasy, Family]",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,1995
2,False,Grumpy Old Men Collection,0,"[Romance, Comedy]",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,1995
3,False,,16000000,"[Comedy, Drama, Romance]",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,1995
4,False,Father of the Bride Collection,0,[Comedy],,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,1995


In [7]:
tdh.get_movies_df_deepcopy().dtypes

adult                     object
belongs_to_collection     object
budget                    object
genres                    object
homepage                  object
id                         int32
imdb_id                   object
original_language         object
original_title            object
overview                  object
popularity                object
poster_path               object
production_companies      object
production_countries      object
release_date              object
revenue                  float64
runtime                  float64
spoken_languages          object
status                    object
tagline                   object
title                     object
video                     object
vote_average             float64
vote_count               float64
year                      object
dtype: object

# Models testing

## Popularity Ranking

In [8]:
top_movies = PopularityRanking.top_movies_IMDB_wr_formula(tdh, 50)
top_movies.head(10)

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,genres,wr
10309,19404,Dilwale Dulhania Le Jayenge,1995,661,9,34.457024,"[Comedy, Drama, Romance]",8.585574
15480,27205,Inception,2010,14075,8,29.108149,"[Action, Thriller, Science Fiction, Mystery, A...",7.984042
12481,155,The Dark Knight,2008,12269,8,123.167259,"[Drama, Action, Crime, Thriller]",7.981708
22879,157336,Interstellar,2014,11187,8,32.213481,"[Adventure, Drama, Science Fiction]",7.979952
2843,550,Fight Club,1999,9678,8,63.869599,[Drama],7.976853
4863,120,The Lord of the Rings: The Fellowship of the Ring,2001,8892,8,32.070725,"[Adventure, Fantasy, Action]",7.974825
292,680,Pulp Fiction,1994,8670,8,140.950236,"[Thriller, Crime]",7.974187
314,278,The Shawshank Redemption,1994,8358,8,51.645403,"[Drama, Crime]",7.973232
7000,122,The Lord of the Rings: The Return of the King,2003,8226,8,29.324358,"[Adventure, Fantasy, Action]",7.972807
351,13,Forrest Gump,1994,8147,8,48.307194,"[Comedy, Drama, Romance]",7.972546


In [9]:
top_movies_for_genre = PopularityRanking.top_movies_for_genre(tdh, 'Action', 50)
top_movies_for_genre.head(10)

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,genre,wr
15480,27205,Inception,2010,14075,8,29.108149,Action,7.955099
12481,155,The Dark Knight,2008,12269,8,123.167259,Action,7.94861
4863,120,The Lord of the Rings: The Fellowship of the Ring,2001,8892,8,32.070725,Action,7.929579
7000,122,The Lord of the Rings: The Return of the King,2003,8226,8,29.324358,Action,7.924031
5814,121,The Lord of the Rings: The Two Towers,2002,7641,8,29.423537,Action,7.918382
256,11,Star Wars,1977,6778,8,42.149697,Action,7.908327
1154,1891,The Empire Strikes Back,1980,5998,8,19.470959,Action,7.896841
4135,111,Scarface,1983,3017,8,11.299673,Action,7.802046
9430,670,Oldboy,2003,2000,8,10.616859,Action,7.711649
1910,346,Seven Samurai,1954,892,8,15.01777,Action,7.426145


## Content-based filtering

In [10]:
tdh.get_small_movies_df_deepcopy().shape

(9099, 25)

In [11]:
top_similar_movies = ContentBasedFiltering.description_based_recommender(tdh, 'The Dark Knight', 25, improved=False)
top_similar_movies.head(10)

Unnamed: 0,id,title,year,description
7931,49026,The Dark Knight Rises,2012,Following the death of District Attorney Harve...
132,414,Batman Forever,1995,The Dark Knight of Gotham City confronts a das...
1113,364,Batman Returns,1992,"Having defeated the Joker, Batman now faces th..."
8227,142061,"Batman: The Dark Knight Returns, Part 2",2013,Batman has stopped the reign of terror that Th...
7565,40662,Batman: Under the Red Hood,2010,Batman faces his ultimate challenge as the mys...
524,268,Batman,1989,The Dark Knight of Gotham City begins his war ...
7901,69735,Batman: Year One,2011,Two men come to Gotham City: Bruce Wayne after...
2579,14919,Batman: Mask of the Phantasm,1993,An old flame of Bruce Wayne's strolls into tow...
2696,820,JFK,1991,New Orleans District Attorney Jim Garrison dis...
8165,123025,"Batman: The Dark Knight Returns, Part 1",2012,Batman has not been seen for ten years. A new ...


In [12]:
top_similar_movies = ContentBasedFiltering.description_based_recommender(tdh, 'The Dark Knight', 25, improved=True)
top_similar_movies.head(10)

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,wr
7931,49026,The Dark Knight Rises,2012,9263,7,20.58258,6.93809
6144,272,Batman Begins,2005,7511,7,28.505341,6.924949
7933,58574,Sherlock Holmes: A Game of Shadows,2011,3971,7,18.695329,6.868594
524,268,Batman,1989,2145,7,19.10673,6.785521
7344,22803,Law Abiding Citizen,2009,1522,7,16.639047,6.726538
6740,5174,Rush Hour 3,2007,801,6,9.718111,6.080225
1113,364,Batman Returns,1992,1706,6,15.001681,6.050584
132,414,Batman Forever,1995,1529,5,13.321354,5.381667
8917,209112,Batman v Superman: Dawn of Justice,2016,7189,5,31.435879,5.109336
1240,415,Batman & Robin,1997,1447,4,17.038824,4.735345


In [13]:
top_similar_movies = ContentBasedFiltering.metadata_based_recommender(tdh, 'The Dark Knight', 25, improved=False)
top_similar_movies.head(10)

Unnamed: 0,id,title,year,soup
8031,49026,The Dark Knight Rises,2012,christianbale michaelcaine garyoldman christop...
6218,272,Batman Begins,2005,christianbale michaelcaine liamneeson christop...
6623,1124,The Prestige,2006,hughjackman christianbale michaelcaine christo...
2085,11660,Following,1998,jeremytheobald alexhaw lucyrussell christopher...
4145,320,Insomnia,2002,alpacino robinwilliams hilaryswank christopher...
3381,77,Memento,2000,guypearce carrie-annemoss joepantoliano christ...
8613,157336,Interstellar,2014,matthewmcconaughey jessicachastain annehathawa...
7648,27205,Inception,2010,leonardodicaprio josephgordon-levitt ellenpage...
5943,9812,Thursday,1998,thomasjane paulamarshall aaroneckhart skipwood...
8927,228968,Kidnapping Mr. Heineken,2015,anthonyhopkins jimsturgess samworthington dani...


In [14]:
top_similar_movies = ContentBasedFiltering.metadata_based_recommender(tdh, 'The Dark Knight', 25, improved=True)
top_similar_movies.head(10)

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,wr
7648,27205,Inception,2010,14075,8,29.108149,7.963571
8613,157336,Interstellar,2014,11187,8,32.213481,7.95446
6623,1124,The Prestige,2006,4510,8,16.94556,7.892031
3381,77,Memento,2000,4168,8,15.450789,7.883877
8031,49026,The Dark Knight Rises,2012,9263,7,20.58258,6.982826
6218,272,Batman Begins,2005,7511,7,28.505341,6.979004
1532,1051,The French Connection,1971,435,7,6.694959,6.792264
2085,11660,Following,1998,363,7,5.283661,6.771593
149,10428,Hackers,1995,406,6,14.810519,6.254794
4145,320,Insomnia,2002,1181,6,11.424974,6.126703


## Collaborative filtering

In [15]:
# SVD model with cross-validation
movies_suggestor = SVD_Based_CollaborativeFilter(tdh)
movies_suggestor.train_with_cross_validation()
movies_suggestor.evaluate_performance_with_cross_validation()

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8958  0.9021  0.8943  0.8967  0.9005  0.8979  0.0029  
MAE (testset)     0.6891  0.6930  0.6863  0.6933  0.6924  0.6908  0.0027  
Fit time          0.71    0.73    0.66    0.65    0.66    0.68    0.03    
Test time         0.06    0.06    0.07    0.20    0.07    0.09    0.06    


(0.8978882314211262, 0.690845928622682)

In [16]:
# SVD model without cross-validation
movies_suggestor = SVD_Based_CollaborativeFilter(tdh)
movies_suggestor.train()
movies_suggestor.evaluate_performance()

RMSE: 0.9005
MAE:  0.6901


(0.9005420994345136, 0.6901334682615878)

In [17]:
# Predict the rating given by a sample user give to a sample movie
user_id = 1
movie_id = 302
rating = 3
movies_suggestor.predict(user_id, movie_id, rating)

Prediction(uid=1, iid=302, r_ui=3, est=2.8435996375143677, details={'was_impossible': False})

In [18]:
import pandas as pd

users_ratings_df = tdh.get_users_ratings_df_deepcopy()
movies_df = tdh.get_small_movies_df_deepcopy()

# Specify the userId for which you want to extract the first record
user_id = 100

# Extract up to the first 10 seen movies for the specific userId
seen_movies_ratings = users_ratings_df[users_ratings_df['userId'] == user_id][['userId', 'movieId', 'rating']]
seen_movies_ratings = seen_movies_ratings.head(10)

# Extract the 'movieId' from the 'seen_movies_ratings'
seen_movies_ids = seen_movies_ratings['movieId']

# Extract the seen movies from the 'movies_df' 
seen_movies = movies_df[movies_df['id'].isin(seen_movies_ids)][['id', 'title']]

# Merge dataframes based on 'id' in 'seen_movies' and 'movieId' in 'seen_movies_ratings'
merged_df = pd.merge(seen_movies, seen_movies_ratings, left_on='id', right_on='movieId', how='left')

# Drop the redundant 'movie_id' column
merged_df = merged_df.drop(columns=['movieId'])

# Rename the 'rating' column to 'gt_rating'
merged_df = merged_df.rename(columns={'rating': 'gt_rating'})

# Add the 'est_rating' column
merged_df['est_rating'] = merged_df.apply(
    lambda row: movies_suggestor.predict(user_id, row['id']).est, axis=1
)

merged_df

Unnamed: 0,id,title,userId,gt_rating,est_rating
0,6,Judgment Night,100,3.0,3.724054
1,62,2001: A Space Odyssey,100,3.0,3.463279
2,88,Dirty Dancing,100,2.0,2.759209
3,25,Jarhead,100,4.0,3.558439
4,86,The Elementary Particles,100,3.0,3.405179


## Hybrid recommender

In [19]:
model = SVD_Based_CollaborativeFilter(tdh)
hybrid_movies_suggestor = HybridFiltering(tdh, model)

In [20]:
# Suggest the user (new and already rate) movies similar to a target movie  
user_id = 1
movie_title = 'The Dark Knight'
hybrid_movies_suggestor.predict(user_id, movie_title, 25).head(10)

Unnamed: 0,id,title,year,soup,est_rating,gt_rating
7648,27205,Inception,2010,leonardodicaprio josephgordon-levitt ellenpage...,3.589502,
3381,77,Memento,2000,guypearce carrie-annemoss joepantoliano christ...,3.496507,
1532,1051,The French Connection,1971,genehackman fernandorey royscheider williamfri...,3.379926,4.0
6623,1124,The Prestige,2006,hughjackman christianbale michaelcaine christo...,3.321779,
6218,272,Batman Begins,2005,christianbale michaelcaine liamneeson christop...,3.221891,
8613,157336,Interstellar,2014,matthewmcconaughey jessicachastain annehathawa...,3.186174,
5943,9812,Thursday,1998,thomasjane paulamarshall aaroneckhart skipwood...,3.180507,
2639,9367,El Mariachi,1992,carlosgallardo jaimedehoyos petermarquardt rob...,3.058095,
149,10428,Hackers,1995,jonnyleemiller angelinajolie jessebradford iai...,2.981085,
1952,16885,The General,1998,brendangleeson adriandunbar seanmcginley johnb...,2.932187,


In [21]:
# Suggest the user (new and already rated) movies similar to a target movie  
user_id = 200
movie_title = 'The Dark Knight'
hybrid_movies_suggestor.predict(user_id, movie_title, 25).head(10)

Unnamed: 0,id,title,year,soup,est_rating,gt_rating
1532,1051,The French Connection,1971,genehackman fernandorey royscheider williamfri...,4.020271,
8613,157336,Interstellar,2014,matthewmcconaughey jessicachastain annehathawa...,3.881496,
3381,77,Memento,2000,guypearce carrie-annemoss joepantoliano christ...,3.771929,4.5
6218,272,Batman Begins,2005,christianbale michaelcaine liamneeson christop...,3.567193,
6623,1124,The Prestige,2006,hughjackman christianbale michaelcaine christo...,3.438043,4.5
7648,27205,Inception,2010,leonardodicaprio josephgordon-levitt ellenpage...,3.388354,3.5
440,9516,Menace II Society,1993,tyrinturner larenztate glennplummer alberthugh...,3.355887,
1952,16885,The General,1998,brendangleeson adriandunbar seanmcginley johnb...,3.324491,
2952,10648,Magnum Force,1973,clinteastwood halholbrook mitchellryan tedpost...,3.248506,
628,26744,Force of Evil,1948,johngarfield thomasgomez mariewindsor abrahamp...,3.230238,


In [22]:
# MY COMMENT: IT DOES NOT MAKE SENSE TO COMPUTE RMSE AND MAE FOR EVALUATIONG THE PERFORMANCE OF THE FIRST CLASSICAL TECHNIQUES, BECAUSE THEY ARE NOT USER-BASED