In [1]:
import os
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None

In [2]:
data_path = os.path.join(os.getcwd(), 'data', 'the-movies-dataset')
movies = pd.read_csv(os.path.join(data_path, 'movies_preprocessed.csv'))
movies.drop_duplicates(subset='id', inplace=True)

In [3]:
movies.head(2)

Unnamed: 0,id,imdb_id,overview,popularity,poster_path,runtime,spoken_languages,tagline,title,vote_average,...,suspense,love,female nudity,sport,police,teenager,duringcreditsstinger,sequel,friendship,world war ii
0,862,tt0114709,"Led by Woody, Andy's toys live happily in his ...",21.946943,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",,Toy Story,7.7,...,0,0,0,0,0,0,0,0,1,0
1,8844,tt0113497,When siblings Judy and Peter discover an encha...,17.015539,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Roll the dice and unleash the excitement!,Jumanji,6.9,...,0,0,0,0,0,0,0,0,0,0


In [4]:
movies.columns

Index(['id', 'imdb_id', 'overview', 'popularity', 'poster_path', 'runtime',
       'spoken_languages', 'tagline', 'title', 'vote_average', 'vote_count',
       'Action', 'Adventure', 'Animation', 'Comedy', 'Crime', 'Documentary',
       'Drama', 'Family', 'Fantasy', 'Foreign', 'History', 'Horror', 'Music',
       'Mystery', 'Romance', 'Science Fiction', 'TV Movie', 'Thriller', 'War',
       'Western', 'en', 'fr', 'it', 'ja', 'de', 'es', 'ru', 'release_year',
       'star_1', 'star_2', 'star_3', 'director', 'writer', 'producer',
       'woman director', 'independent film', 'murder', 'based on novel',
       'musical', 'sex', 'violence', 'nudity', 'biography', 'revenge',
       'suspense', 'love', 'female nudity', 'sport', 'police', 'teenager',
       'duringcreditsstinger', 'sequel', 'friendship', 'world war ii'],
      dtype='object')

## One Feature IMDB Recommender

In [5]:
class ImdbRecommender():
    recommendation = []
    popularity_levels =  [5000, 2500, 1000, 100, 10]
    recommendation_categories = ['director', 'star_1', 'star_2', 'producer', 'star_3', 'writer']
    
    def __init__(self, data):
        self.data = data
        
    def recommend_by(self, category, movie_id):
        
        target = self.data[self.data['id'] == movie_id][category].values[0]
        target_movies = self.data[self.data[category] == target]
        
        for level in self.popularity_levels:
            
            if len(target_movies[target_movies['vote_count'] > level]) >= 2:
                temp = target_movies[target_movies['vote_count'] > level]
                temp.sort_values('vote_average', ascending=False, inplace=True)
                
                if temp.iloc[0]['id'] == movie_id:
                    if temp.iloc[1]['id'] not in self.recommendation: 
                        return temp.iloc[1]['id']
                else:
                    if temp.iloc[0]['id'] not in self.recommendation: 
                        return temp.iloc[0]['id'] 
        return []
                
                
    
    def recommend(self, movie_id):
        self.recommendation = []
        
        for category in self.recommendation_categories:
            rec = self.recommend_by(category, movie_id)
            if rec:
                self.recommendation.append(rec)
                
        return self.recommendation
        

In [6]:
def translate_id_to_title(ids, data):
    titles = []
    for i in ids:
        titles.append(data[data['id'] == i]['title'].values[0])
    return titles

In [7]:
ir = ImdbRecommender(movies)

In [8]:
movies[movies['title'] == 'The Matrix']

Unnamed: 0,id,imdb_id,overview,popularity,poster_path,runtime,spoken_languages,tagline,title,vote_average,...,suspense,love,female nudity,sport,police,teenager,duringcreditsstinger,sequel,friendship,world war ii
2466,603,tt0133093,"Set in the 22nd century, The Matrix tells the ...",33.366332,/hEpWvX6Bp79eLxY1kX5ZZJcme5U.jpg,136.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Welcome to the Real World.,The Matrix,7.9,...,0,0,0,0,0,0,0,0,0,0


## Sample Tests

In [10]:
test_movies = [603, 13, 1726, 680, 665, 583, 103, 128]
for movie_id in test_movies:
    print('Movie: ', translate_id_to_title([movie_id], movies))
    recommendations = ir.recommend(movie_id=movie_id)
    titles = translate_id_to_title(recommendations, movies)
    print('Recommendations:\n{}'.format(titles))
    print()

Movie:  ['The Matrix']
Recommendations:
['Bound', 'John Wick', 'The Matrix Revolutions', 'Sherlock Holmes', 'Disturbia', 'Jupiter Ascending']

Movie:  ['Forrest Gump']
Recommendations:
['Back to the Future', 'Saving Private Ryan', 'The Princess Bride', 'The Devil Wears Prada', 'Open Season']

Movie:  ['Iron Man']
Recommendations:
['Iron Man 2', 'The Avengers', 'The Brave One', 'The Amazing Spider-Man 2', 'The Men Who Stare at Goats']

Movie:  ['Pulp Fiction']
Recommendations:
['Inglourious Basterds', 'Grease', 'Captain America: The Winter Soldier', 'Reservoir Dogs', 'Les Misérables']

Movie:  ['Ben-Hur']
Recommendations:
['Roman Holiday', 'Touch of Evil', 'Zulu', 'Quo Vadis']

Movie:  ['Life of Brian']
Recommendations:
['Absolutely Anything', 'Monty Python and the Holy Grail', 'The Meaning of Life']

Movie:  ['Taxi Driver']
Recommendations:
['The Wolf of Wall Street', 'GoodFellas', 'Listen to Your Heart', 'The Sting', 'From Dusk Till Dawn', 'American Gigolo']

Movie:  ['Princess Monono