In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Read in the Dataset
movies = pd.read_csv('Data/movies_clean.csv')
reviews = pd.read_csv('Data/reviews_clean.csv')

del movies['Unnamed: 0']
del reviews['Unnamed: 0']

In [3]:
movies.head()

Unnamed: 0,movie_id,movie,genre,date,1800's,1900's,2000's,Horror,News,Western,...,Film-Noir,Talk-Show,Musical,Sci-Fi,Drama,History,Reality-TV,Thriller,Documentary,Adult
0,8,Edison Kinetoscopic Record of a Sneeze (1894),Documentary|Short,1894,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,10,La sortie des usines Lumière (1895),Documentary|Short,1895,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,12,The Arrival of a Train (1896),Documentary|Short,1896,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,25,The Oxford and Cambridge University Boat Race ...,,1895,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,91,Le manoir du diable (1896),Short|Horror,1896,1,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
reviews.head()

Unnamed: 0,user_id,movie_id,rating,timestamp,date
0,1,114508,8,1381006850,2013-10-06 02:30:50
1,2,499549,9,1376753198,2013-08-17 20:56:38
2,2,1305591,8,1376742507,2013-08-17 17:58:27
3,2,1428538,1,1371307089,2013-06-15 20:08:09
4,3,75314,1,1595468524,2020-07-23 07:12:04


### How To Find The Most Popular Movies?
For this notebook, we have a single task. The task is that no matter the user, we need to provide a list of the recommendations based on simply the most popular items.

For this task, we will consider what is "most popular" based on the following criteria:

A movie with the highest average rating is considered best
With ties, movies that have more ratings are better
A movie must have a minimum of 5 ratings to be considered among the best movies
If movies are tied in their average rating and number of ratings, the ranking is determined by the movie that is the most recent rating
With these criteria, the goal for this notebook is to take a user_id and provide back the n_top recommendations.

In [5]:
def create_ranked_df(movies, reviews):
        '''
        INPUT
        movies - the movies dataframe
        reviews - the reviews dataframe
        
        OUTPUT
        ranked_movies - a dataframe with movies that are sorted by highest avg rating, more reviews, 
                        then time, and must have more than 4 ratings
        '''
        movie_ratings = reviews.groupby('movie_id')['rating']
        avg_ratings = movie_ratings.mean()
        num_ratings = movie_ratings.count()
        
        last_rating = pd.DataFrame(reviews.groupby('movie_id')['date'].max())
        last_rating.columns = ['last_rating']
        
        ratings_count_df = pd.DataFrame({'avg_rating': avg_ratings, 'num_rating': num_ratings})
        ratings_count_df = ratings_count_df.join(last_rating)
        
        movies_recs = movies.set_index('movie_id').join(ratings_count_df)
        
        ranked_movies = movies_recs.sort_values(['avg_rating', 'num_rating', 'last_rating'], ascending=False)
        
        ranked_movies = ranked_movies[ranked_movies['num_rating'] > 4]
        
        return ranked_movies

In [6]:
def popular_recommendations(user_id, n_top, ranked_movies):
    '''
    INPUT:
    user_id - the user_id (str) of the individual you are making recommendations for
    n_top - an integer of the number recommendations you want back
    ranked_movies - a pandas dataframe of the already ranked movies based on avg rating, count, and time

    OUTPUT:
    top_movies - a list of the n_top recommended movies by movie title in order best to worst'''
    
    top_movies = list(ranked_movies['movie'][:n_top])
    
    return top_movies

In [7]:
ranked_movies = create_ranked_df(movies, reviews)

In [8]:
ranked_movies

Unnamed: 0_level_0,movie,genre,date,1800's,1900's,2000's,Horror,News,Western,Mystery,...,Sci-Fi,Drama,History,Reality-TV,Thriller,Documentary,Adult,avg_rating,num_rating,last_rating
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4921860,MSG 2 the Messenger (2015),Comedy|Drama|Fantasy|Horror,2015,0,0,1,1,0,0,0,...,0,1,0,0,0,0,0,10.000000,48,2016-08-14 22:46:50
5262972,Avengers: Age of Ultron Parody (2015),Short|Comedy,2015,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,10.000000,28,2016-01-08 06:14:43
6662050,Five Minutes (2017),Short|Comedy,2017,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,10.000000,22,2019-04-21 03:59:19
2737018,Selam (2013),Drama|Romance,2013,0,0,1,0,0,0,0,...,0,1,0,0,0,0,0,10.000000,10,2015-05-11 04:26:01
5804314,Let There Be Light (2017),Drama,2017,0,0,1,0,0,0,0,...,0,1,0,0,0,0,0,10.000000,7,2019-12-25 21:57:47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10367276,The Rat (2019),Short|Drama,2019,0,0,1,0,0,0,0,...,0,1,0,0,0,0,0,1.729167,96,2020-06-13 11:45:19
3108604,American Poltergeist (2015),Horror|Thriller,2015,0,0,1,1,0,0,0,...,0,0,0,0,1,0,0,1.666667,6,2018-10-14 23:17:29
3187378,The Asian Connection (2016),Action|Crime|Drama|Thriller,2016,0,0,1,0,0,0,0,...,0,1,0,0,1,0,0,1.666667,6,2017-07-11 11:43:16
4458206,Kod Adi K.O.Z. (2015),Crime|Mystery,2015,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,1.000000,30,2018-01-20 10:14:12


In [9]:
recs_20_for_1 = popular_recommendations('1', 20, ranked_movies)

In [10]:
recs_20_for_1

['MSG 2 the Messenger (2015)',
 'Avengers: Age of Ultron Parody (2015)',
 'Five Minutes (2017)',
 'Selam (2013)',
 'Let There Be Light (2017)',
 "Quiet Riot: Well Now You're Here, There's No Way Back (2014)",
 'Crawl Bitch Crawl (2012)',
 'Skid Row Marathon (2017)',
 'Chasing Happiness (2019)',
 'Make Like a Dog (2015)',
 'Pandorica (2016)',
 'Third Contact (2011)',
 'Romeo Juliet (2009)',
 'Be Somebody (2016)',
 'Birlesen Gonuller (2014)',
 'Kitbull (2019)',
 'Agnelli (2017)',
 'Sátántangó (1994)',
 'Foster (2011)',
 'CM101MMXI Fundamentals (2013)']

In [11]:
recs_20_for_1 = popular_recommendations('1', 20, ranked_movies)

In [12]:
recs_20_for_1

['MSG 2 the Messenger (2015)',
 'Avengers: Age of Ultron Parody (2015)',
 'Five Minutes (2017)',
 'Selam (2013)',
 'Let There Be Light (2017)',
 "Quiet Riot: Well Now You're Here, There's No Way Back (2014)",
 'Crawl Bitch Crawl (2012)',
 'Skid Row Marathon (2017)',
 'Chasing Happiness (2019)',
 'Make Like a Dog (2015)',
 'Pandorica (2016)',
 'Third Contact (2011)',
 'Romeo Juliet (2009)',
 'Be Somebody (2016)',
 'Birlesen Gonuller (2014)',
 'Kitbull (2019)',
 'Agnelli (2017)',
 'Sátántangó (1994)',
 'Foster (2011)',
 'CM101MMXI Fundamentals (2013)']

In [13]:
# Top 5 movies recommended for id 53968
recs_5_for_53968 = popular_recommendations('53968', 5, ranked_movies)
recs_5_for_53968

['MSG 2 the Messenger (2015)',
 'Avengers: Age of Ultron Parody (2015)',
 'Five Minutes (2017)',
 'Selam (2013)',
 'Let There Be Light (2017)']

In [14]:
popular_recommendations('43', 35, ranked_movies)

['MSG 2 the Messenger (2015)',
 'Avengers: Age of Ultron Parody (2015)',
 'Five Minutes (2017)',
 'Selam (2013)',
 'Let There Be Light (2017)',
 "Quiet Riot: Well Now You're Here, There's No Way Back (2014)",
 'Crawl Bitch Crawl (2012)',
 'Skid Row Marathon (2017)',
 'Chasing Happiness (2019)',
 'Make Like a Dog (2015)',
 'Pandorica (2016)',
 'Third Contact (2011)',
 'Romeo Juliet (2009)',
 'Be Somebody (2016)',
 'Birlesen Gonuller (2014)',
 'Kitbull (2019)',
 'Agnelli (2017)',
 'Sátántangó (1994)',
 'Foster (2011)',
 'CM101MMXI Fundamentals (2013)',
 'Crystal Lake Memories: The Complete History of Friday the 13th (2013)',
 'Seishun Buta Yaro wa Yumemiru Shoujo no Yume wo Minai (2019)',
 'Hans Zimmer Live on Tour (2017)',
 'Kirik Party (2016)',
 'Chasing Asylum (2016)',
 'Beyond the Sea (2004)',
 'Blood Brother (2013)',
 'Poshter Girl (2016)',
 'Kuroshitsuji: Book of the Atlantic (2017)',
 'Akahige (1965)',
 'Bridegroom (2013)',
 'Mad As Hell (2014)',
 'Nashville (1975)',
 'Dag II (201

In [24]:
def popular_recs_filtered(user_id, n_top, ranked_movies, years=None, genres=None):
    '''
    INPUT:
    user_id - the user_id (str) of the individual you are making recommendations for
    n_top - an integer of the number recommendations you want back
    ranked_movies - a pandas dataframe of the already ranked movies based on avg rating, count, and time
    years - a list of strings with years of movies
    genres - a list of strings with genres of movies
    
    OUTPUT:
    top_movies - a list of the n_top recommended movies by movie title in order best to worst
    '''
    # Filter movies based on year and genre
    if years is not None:
        ranked_movies = ranked_movies[ranked_movies['date'].isin(years)]

    if genres is not None:
        num_genre_match = ranked_movies[genres].sum(axis=1)
        ranked_movies = ranked_movies.loc[num_genre_match > 0, :]
            
    # create top movies list 
    top_movies = list(ranked_movies['movie'][:n_top])

    return top_movies

In [25]:
recs_20_for_1_filtered = popular_recs_filtered('1', 20, ranked_movies, years=['2015', '2016', '2017', '2018'], genres=['History'])

In [26]:
recs_20_for_1_filtered

["Hillary's America: The Secret History of the Democratic Party (2016)",
 'I Believe in Miracles (2015)',
 'O.J.: Made in America (2016)',
 'Ayla: The Daughter of War (2017)',
 'Hacksaw Ridge (2016)',
 'They Shall Not Grow Old (2018)',
 'The Farthest (2017)',
 '13th (2016)',
 'Sado (2015)',
 'Silicon Cowboys (2016)',
 'Ethel &amp; Ernest (2016)',
 'Kono sekai no katasumi ni (2016)',
 'Paul, Apostle of Christ (2018)',
 'Namhansanseong (2017)',
 'Kincsem (2017)',
 'Straight Outta Compton (2015)',
 'Nise - O Coração da Loucura (2015)',
 'LA 92 (2017)',
 'Under sandet (2015)',
 'Only the Dead (2015)']