In [45]:
#import libraries
import pandas as pd
import random
import imdb


from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity 
from imdb import Cinemagoer


#create list of movies that were rated highly (>= q3 according to letterboxd analytics project)
diary_df = pd.read_csv("topratedmovies.csv")
#create df with massive list of random movies and their summaries
movies_df = pd.read_csv("movies_dataset.csv") 



def find_random_fave_movie_summary(df):
    """
    Chooses random movie title from topratedmovies csv and find summary
    """
    #uses random.choice to pick random movie title
    movie_choice = str(random.choice(df['Name']))
    #create instance of Cinemagoer class
    ia = Cinemagoer()
    #searches on imdb for movie matches, returns list of matches
    movies = ia.search_movie(movie_choice)
    #if statement ensures no error if movie is not on imdb (empty list is returned in prior step)
    if movies:
        #get_movie finds the summary of the first movie match in the search_movie list by id
        movie = ia.get_movie(movies[0].movieID)
        #return (name of movie selected with random.choice, plot outline of movie from get_movie step
        return (movie_choice, movie['plot outline'])

    return (None, None)






def recommend_similar_movies(movie_list,favorite_movies, n_recommendations):
    """
    Recommends movies from movies_list to watch based on a random film chosen from favorite_movies.

    E.g. input a list of movies that user likes as favorite_movies (formatted with a 'Name' column). 
    Input a random list of movies from the internet (formatted with a 'title' and an 'overview' column) as movie_list.
    Output will be the name of a random movie that you already like from favorite_movies, 
    and a list of suggestions of movies similar to this film based on the summaries from movies_list.
    For when you do not know what to watch :)
    """

    # creates a copy, so original movie_list df is not changed
    df = movie_list.copy()

    # favorite_movies df goes through find_favorite_movie_summary; returns tuple with title, overview of random fave movie
    title, overview = find_random_fave_movie_summary(favorite_movies)
    #if random fave movie can't be found on imdb, raise ValueError
    if not title:
        raise ValueError("Movie not found")
    print("Input random movie: " + title)

    #creates row with random fave movie title and overview and adds to movie_list copy
    new_row = pd.DataFrame({'title': [title], 'overview': [overview]})
    df = pd.concat([df, new_row], ignore_index=True)
    #finds index of fave movie (only selects first match of fave movie if it's already in movie_list)
    input_movie_index = df[df['title'] == title].index[0]

    #removes all NAs in the movie_list overviews
    df['overview'] = df['overview'].fillna('') 

    # creates instance of TfidfVectorizer, removes words like "a" and "the" from evalution
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
    #looks at unique words in each overview and creates matrix for scoring similarity
    tfidf_matrix = tfidf_vectorizer.fit_transform(df['overview'])

    # creates similarity matrix based on similarity between words using tfidf_matrix
    similarity = cosine_similarity(tfidf_matrix)
        
    
    # finds row in similarity matrix that corresponds to fave movie based on its index
    #sorts similarity scores, and then returns recommendations based on highest to lowest score, excluding actual fave_movie
    similar_movie_indices = similarity[input_movie_index].argsort()[::-1][1:n_recommendations+1]
    # returns (title of fave movie, titles of recommendations based on index)
    return (title, df['title'].iloc[similar_movie_indices].tolist())



#use recommend_similar_movies
title, results = recommend_similar_movies(movies_df, diary_df, 8)

#print
print("Results for movie: " + title)
for x in results:
    #if movie is in movie_list already, excludes it from print statement
    if x != title:
        print("Watch: " + x) 



Input random movie: The Seventh Seal
Results for movie: The Seventh Seal
Watch: Black Angel
Watch: George and the Dragon
Watch: Lily Sometimes
Watch: High-Rise
Watch: Tightrope
Watch: Radio Inside
Watch: Inside
