In [None]:
import pandas as pd
from tkinter import *
from tkinter import messagebox
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

data = pd.read_csv('../input/tmdb-top-10000-popular-movies-dataset/TMDb_updated.CSV')

# make a copy
df = data.copy()

# drop all movie entries that does not have a written overview
df.dropna(inplace=True)

# drop all movie with 'plot unknown' in overview
df = df[df['overview'] != 'Plot unknown.']

# sort dataset by highest vote counts
df = df.sort_values(['vote_count', 'vote_average'], ascending=False)

# create a temp dataframe
df_temp = df.title

# drop any duplicate titles keeping only the 1st entry
df_temp.drop_duplicates(keep='first', inplace=True)

# merge the temp dataframe
df = pd.concat([df, df_temp], axis=1)

# drop all duplicate movie keeping only the one with the highest vote
df.dropna(inplace=True)

# reset the dataframe index
df.reset_index(inplace=True)

# remove extra columns that were inserted during the data clean-up process
df = df.iloc[:, 2:-1]

# create instance to remove all stop words
tfidf_vector = TfidfVectorizer(stop_words='english')

# fit and transform the data
tfidf_matrix = tfidf_vector.fit_transform(df['overview'])

# construct similarity matrix
similarity_matrix = linear_kernel(tfidf_matrix, tfidf_matrix)

# recommender function: pass in movie name, match row, get the similarity scores from the matrix and sorts it
def recommender(movie_name):
    row = df.index[df.title == movie_name]
    get_similar_scores = list(enumerate(similarity_matrix[row[0]]))
    get_similar_scores.sort(key=lambda x: x[1], reverse=True)

# match the top ten similarity scores to row index and outputs the title along with the vote average
    for item in get_similar_scores[1:11]:
        print(df['title'].iloc[item[0]], df['vote_average'].iloc[item[0]])
        
print('\n!Da Movie Recommender Tool! 😁\n')

while True:

    user_input = input('Enter a movie and get it\'s review or search for\nsimilar flicks from a database of 9,000+ movies: ').lower()

    # checks against movie titles and displays rating and review if found
    try:
        review = df[df.title.str.lower() == user_input]['overview'].values[0]
        rating = df[df.title.str.lower() == user_input]['vote_average'].values[0]

    # if not found, display error
    except IndexError:
        print('\nMovie name misspelled or not in database.\n')

    else:
        selection = input('Enter 1 to get the review for the movie or 2 to get a list of similar flicks: ')
        if selection == '1':
            print(f'\nMovie rating: {rating} ★\'s\n\nMovie review: {review}')
        elif selection == '2':
            row = df.index[df.title.str.lower() == user_input]
            movie_similar_scores = list(enumerate(similarity_matrix[row[0]]))
            movie_similar_scores.sort(key=lambda x: x[1], reverse=True)
  # match the top ten similarity scores to row index and outputs the title along with the vote average
            recommended = ''
            for item in movie_similar_scores[1:11]:
                recommended += 'Rating ' + str(df['vote_average'].iloc[item[0]]) + ' ★\'s  '
                recommended += str(df['title'].iloc[item[0]]) + '\n'
            print(f'\nList of similar flicks!\n\n{recommended}')
        else:
            print('\nInvalid entry. Try again.')
    if input('To run program again hit Enter or enter q to quit: ') == 'q':
        break