# IMDb Movie Recommender - Demo Version
This version loads a subset of the IMDb dataset stored as a CSV file for demonstration purposes. The full source code can be found at https://github.com/SamIAm10/IMDb-Movie-Recommender/blob/main/movie_recommender.ipynb

To try the notebook for yourself, go to Cell > Run All

In [None]:
# import necessary libraries and ML tools
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

## Load the dataset

In [None]:
popular_movies_df = pd.read_csv('popular_movies_df.csv', header=0).fillna('')

## Take user input and give movie recommendations

In [None]:
# take full or partial movie titles from user input
input_movies = input('Enter one or more of your favorite movies, separated by commas:\n').split(',')

try:
    # collect data for each movie
    full_input_movie_titles = [] # list of full movie titles from user input
    combined_movie_features = '' # combined features of input movies
    
    for movie in input_movies:
        attributes = popular_movies_df[popular_movies_df['primaryTitle'].str.contains(movie.strip(), case=False)]
        full_input_movie_titles.append(attributes['primaryTitle'].values[0])
        for movie_features in attributes[['genres', 'directors', 'writers']].values[0]:
            combined_movie_features += movie_features + ' '

    # build the features series
    features = popular_movies_df['genres'] + ' ' + popular_movies_df['directors'] + ' ' + popular_movies_df['writers']
    features = features.append(pd.Series([combined_movie_features]), ignore_index=True)
    
    # calculate the cosine similarity using movie features
    cv = CountVectorizer()
    cv_matrix = cv.fit_transform(features)
    cos_sim = cosine_similarity(cv_matrix)

    # obtain a list of recommended movies
    recommended_movies = list(enumerate(cos_sim[-1]))
    recommended_movies.pop() # last element is the combined movie features, so we delete it

    # sort movies by decreasing similarity
    recommended_movies_sorted = sorted(recommended_movies, key=lambda x:x[1], reverse=True)

    # return a list of movies, ordered from most to least recommended
    max_recs = int(input('\nHow many recommendations would you like to see?\n')) # max number of recommendations
    assert(max_recs > 0)
    
    print("\nIf you liked:")
    for movie in full_input_movie_titles:
        print('•', movie)
    print('\nYou might also like:')
    
    num_recs = 1
    for movie in recommended_movies_sorted:
        title = popular_movies_df[popular_movies_df.index == movie[0]]['primaryTitle'].values[0]
        if title not in full_input_movie_titles: # don't recommend a movie the user already watched!
            print(str(num_recs) + '.', title)
            num_recs += 1
            if num_recs > max_recs:
                break

# error handling
except IndexError:
    print('\nSorry, one or more of the titles you entered do not exist in our records. Please try again.')
except (ValueError, AssertionError):
    print('\nPlease enter a valid number.')

Enter one or more of your favorite movies, separated by commas:
infinity war, star wars, jurassic park

How many recommendations would you like to see?
20

If you liked:
• Avengers: Infinity War
• Star Wars: Episode IV - A New Hope
• Jurassic Park

You might also like:
1. The Lost World: Jurassic Park
2. Avengers: Endgame
3. Captain America: The First Avenger
4. Captain America: Civil War
5. Captain America: The Winter Soldier
6. Iron Man 2
7. Spider-Man
8. The Incredible Hulk
9. Solo: A Star Wars Story
10. Star Wars: Episode I - The Phantom Menace
11. Star Wars: Episode III - Revenge of the Sith
12. Ready Player One
13. The Last Starfighter
14. Stealth
15. Transformers: Dark of the Moon
16. Transformers: Age of Extinction
17. Bumblebee
18. Avengers: Age of Ultron
19. Timeline
20. Jurassic World: Fallen Kingdom
