In [None]:
import pandas as pd
from ast import literal_eval
import numpy as np
from nltk.stem.snowball import SnowballStemmer
import math
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pyarrow as pa
import pyarrow.parquet as pq

In [None]:
from google.colab import drive
drive.mount('/content/Drive')

Mounted at /content/Drive


# 0. User functions

In [None]:
def convert(text):
    genres = []  # Initialize an empty list to store genre names
    for i in literal_eval(text):  # Convert the string into a list of dictionaries using safe evaluation
        genres.append(i['name'])  # Retrieve the name of each genre dictionary and add it to the list
    return genres  # Return the list of genre names


def get_director(x):
    """
    Extract the Name of the Director for a movie if it is present inside the job
    """
    for i in x:
        if i['job'] == 'Director':
            return i['name']
    return np.nan


# Function to convert runtime to hours and minutes
def convert_runtime(runtime):
    if math.isnan(runtime):
        return "NaN"
    else:
        hours = int(runtime // 60)
        minutes = int(runtime % 60)
        return f"{hours} hours {minutes} minutes"

# 1. Merging dataset

In [None]:
movies  = pd.read_csv('/content/Drive/MyDrive/AWS Project/dataset/movies_metadata.csv')
credits         = pd.read_csv('/content/Drive/MyDrive/AWS Project/dataset/credits.csv')
keywords        = pd.read_csv('/content/Drive/MyDrive/AWS Project/dataset/keywords.csv')
links           = pd.read_csv('/content/Drive/MyDrive/AWS Project/dataset/links.csv')

  movies  = pd.read_csv('/content/Drive/MyDrive/AWS Project/dataset/movies_metadata.csv')


In [None]:
## Dropping these 3 rows because Date Column value for them is string date instead of Int with ID.
movies = movies.drop([19730, 29503, 35587])

In [None]:
## Extracting Genres of movies from the genres dictionary. If not present, append empty list
movies['genres'] = movies['genres'].fillna('[]').apply(literal_eval).apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])

In [None]:
## Convert to common data type for primary key in our dataset
keywords['id'] = keywords['id'].astype('int')
credits['id'] = credits['id'].astype('int')
movies['id'] = movies['id'].astype('int')

In [None]:
## Merging movies dataset with credits & keywords to form master dataset
movies = movies.merge(credits, on='id')
df = movies.merge(keywords, on='id')

In [None]:
df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,spoken_languages,status,tagline,title,video,vote_average,vote_count,cast,crew,keywords
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[Animation, Comedy, Family]",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,"[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...","[{'id': 931, 'name': 'jealousy'}, {'id': 4290,..."
1,False,,65000000,"[Adventure, Fantasy, Family]",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,"[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de...","[{'id': 10090, 'name': 'board game'}, {'id': 1..."
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[Romance, Comedy]",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,"[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de...","[{'id': 1495, 'name': 'fishing'}, {'id': 12392..."
3,False,,16000000,"[Comedy, Drama, Romance]",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,"[{'cast_id': 1, 'character': ""Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de...","[{'id': 818, 'name': 'based on novel'}, {'id':..."
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,[Comedy],,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,"[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de...","[{'id': 1009, 'name': 'baby'}, {'id': 1599, 'n..."


In [None]:
links = links[links['tmdbId'].notnull()]['tmdbId'].astype('int')
df = df[df['id'].isin(links)]
print(df.shape)

(46628, 27)


# 2. Data cleaning

In [None]:
## Updating cast, crew and keyword columns by parsing them as their loaded data type is string but need to be converted to list
df['cast']      = df['cast'].apply(literal_eval)
df['crew']      = df['crew'].apply(literal_eval)
df['keywords']  = df['keywords'].apply(literal_eval)

In [None]:
## Updating cast to maintain proportion between different lengths (keeping top 3 cast members)
df['cast']      = df['cast'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])
df['cast']      = df['cast'].apply(lambda x: x[:3] if len(x) >=3 else x)

## Setting keywords to empty list if does not exists, otherwise taking into account for each word as keyword
df['keywords']  = df['keywords'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])

## Extracting directory names from the crew
df['director']  = df['crew'].apply(get_director)

In [None]:
df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,status,tagline,title,video,vote_average,vote_count,cast,crew,keywords,director
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[Animation, Comedy, Family]",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,Released,,Toy Story,False,7.7,5415.0,"[Tom Hanks, Tim Allen, Don Rickles]","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...","[jealousy, toy, boy, friendship, friends, riva...",John Lasseter
1,False,,65000000,"[Adventure, Fantasy, Family]",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,"[Robin Williams, Jonathan Hyde, Kirsten Dunst]","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de...","[board game, disappearance, based on children'...",Joe Johnston
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[Romance, Comedy]",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,"[Walter Matthau, Jack Lemmon, Ann-Margret]","[{'credit_id': '52fe466a9251416c75077a89', 'de...","[fishing, best friend, duringcreditsstinger, o...",Howard Deutch
3,False,,16000000,"[Comedy, Drama, Romance]",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,"[Whitney Houston, Angela Bassett, Loretta Devine]","[{'credit_id': '52fe44779251416c91011acb', 'de...","[based on novel, interracial relationship, sin...",Forest Whitaker
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,[Comedy],,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,"[Steve Martin, Diane Keaton, Martin Short]","[{'credit_id': '52fe44959251416c75039ed7', 'de...","[baby, midlife crisis, confidence, aging, daug...",Charles Shyer


In [None]:
## for uniqueness, removing all the spaces in between the names
df['cast'] = df['cast'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])

## Maintaining the original director name as main director
df['main_director'] = df['director']

## Maintaining the number of director to maintain proportion (similar to cast column above)
df['director']  = df['director'].astype('str').apply(lambda x: str.lower(x.replace(" ", "")))
df['director']  = df['director'].apply(lambda x: [x,x,x])

In [None]:
df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,tagline,title,video,vote_average,vote_count,cast,crew,keywords,director,main_director
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[Animation, Comedy, Family]",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,,Toy Story,False,7.7,5415.0,"[tomhanks, timallen, donrickles]","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...","[jealousy, toy, boy, friendship, friends, riva...","[johnlasseter, johnlasseter, johnlasseter]",John Lasseter
1,False,,65000000,"[Adventure, Fantasy, Family]",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,"[robinwilliams, jonathanhyde, kirstendunst]","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de...","[board game, disappearance, based on children'...","[joejohnston, joejohnston, joejohnston]",Joe Johnston
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[Romance, Comedy]",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,"[waltermatthau, jacklemmon, ann-margret]","[{'credit_id': '52fe466a9251416c75077a89', 'de...","[fishing, best friend, duringcreditsstinger, o...","[howarddeutch, howarddeutch, howarddeutch]",Howard Deutch
3,False,,16000000,"[Comedy, Drama, Romance]",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,"[whitneyhouston, angelabassett, lorettadevine]","[{'credit_id': '52fe44779251416c91011acb', 'de...","[based on novel, interracial relationship, sin...","[forestwhitaker, forestwhitaker, forestwhitaker]",Forest Whitaker
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,[Comedy],,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,"[stevemartin, dianekeaton, martinshort]","[{'credit_id': '52fe44959251416c75039ed7', 'de...","[baby, midlife crisis, confidence, aging, daug...","[charlesshyer, charlesshyer, charlesshyer]",Charles Shyer


In [None]:
## Stacking the keywords and keeping the movies which containers X number of keywords as minimum
s = df.apply(lambda x: pd.Series(x['keywords']),axis=1).stack().reset_index(level=1, drop=True)
s.name = 'keyword'
s = s.value_counts()
print(s)

  s = df.apply(lambda x: pd.Series(x['keywords']),axis=1).stack().reset_index(level=1, drop=True)


womandirector       3128
independentfilm     1942
murder              1346
music                932
basedonnovel         841
                    ... 
nightcourt             1
existentialdrama       1
spoiltmarriag          1
prevar                 1
acab                   1
Name: keyword, Length: 18921, dtype: int64


In [None]:
## Will try to map where more than 1 keyword is present for the movie
s = s[s > 1]

In [None]:
## creating an object for ENGLISH Stemmer - Snowball to trim down keywords to their stem words
stemmer = SnowballStemmer('english')

## Trim down keywords to their stem words and then remove the space between keywords which are having more than 1 length for uniqueness
df['keywords']  = df['keywords'].apply(lambda x: [stemmer.stem(i) for i in x])
df['keywords']  = df['keywords'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])

In [None]:
df['keywords'].head()

0    [jealousi, toy, boy, friendship, friend, rival...
1    [boardgam, disappear, basedonchildren'sbook, n...
2          [fish, bestfriend, duringcreditsst, oldmen]
3    [basedonnovel, interracialrelationship, single...
4    [babi, midlifecrisi, confid, age, daughter, mo...
Name: keywords, dtype: object

In [None]:
## Creating a soup feature - combination of (keywords, cast, director, genres)
df['tags'] = df['keywords'] + df['cast'] + df['genres'] + df['director']

## Modifying by placing single space between all the soup words
df['tags'] = df['tags'].apply(lambda x: ' '.join(x))

In [None]:
df['tags'].head()

0    jealousi toy boy friendship friend rivalri boy...
1    boardgam disappear basedonchildren'sbook newho...
2    fish bestfriend duringcreditsst oldmen walterm...
3    basedonnovel interracialrelationship singlemot...
4    babi midlifecrisi confid age daughter motherda...
Name: tags, dtype: object

In [None]:
print(df.columns)

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count', 'cast', 'crew', 'keywords', 'director',
       'main_director', 'tags'],
      dtype='object')


In [None]:
#Dropping useless columns
df.drop(['adult', 'belongs_to_collection', 'budget','revenue', 'homepage', 'id', 'imdb_id', 'original_language', 'original_title', 'overview', 'poster_path', 'production_companies', 'production_countries',
       'spoken_languages', 'status', 'tagline', 'video', 'vote_average', 'vote_count', 'cast', 'crew', 'keywords', 'director'],axis=1,inplace=True)

In [None]:
df.head()

Unnamed: 0,genres,popularity,release_date,runtime,title,main_director,tags
0,"[Animation, Comedy, Family]",21.946943,1995-10-30,81.0,Toy Story,John Lasseter,jealousi toy boy friendship friend rivalri boy...
1,"[Adventure, Fantasy, Family]",17.015539,1995-12-15,104.0,Jumanji,Joe Johnston,boardgam disappear basedonchildren'sbook newho...
2,"[Romance, Comedy]",11.7129,1995-12-22,101.0,Grumpier Old Men,Howard Deutch,fish bestfriend duringcreditsst oldmen walterm...
3,"[Comedy, Drama, Romance]",3.859495,1995-12-22,127.0,Waiting to Exhale,Forest Whitaker,basedonnovel interracialrelationship singlemot...
4,[Comedy],8.387519,1995-02-10,106.0,Father of the Bride Part II,Charles Shyer,babi midlifecrisi confid age daughter motherda...


In [None]:
# Convert non-finite values to integers with handling of special cases
df['runtime'] = df['runtime'].fillna(0)
df['runtime'] = df['runtime'].replace(np.inf, 0)

In [None]:
df['runtime']=df['runtime'].astype('int')

In [None]:
#convert runtime
df['runtime'] = df['runtime'].apply(convert_runtime)

In [None]:
df.head()

Unnamed: 0,genres,popularity,release_date,runtime,title,main_director,tags
0,"[Animation, Comedy, Family]",21.946943,1995-10-30,1 hours 21 minutes,Toy Story,John Lasseter,jealousi toy boy friendship friend rivalri boy...
1,"[Adventure, Fantasy, Family]",17.015539,1995-12-15,1 hours 44 minutes,Jumanji,Joe Johnston,boardgam disappear basedonchildren'sbook newho...
2,"[Romance, Comedy]",11.7129,1995-12-22,1 hours 41 minutes,Grumpier Old Men,Howard Deutch,fish bestfriend duringcreditsst oldmen walterm...
3,"[Comedy, Drama, Romance]",3.859495,1995-12-22,2 hours 7 minutes,Waiting to Exhale,Forest Whitaker,basedonnovel interracialrelationship singlemot...
4,[Comedy],8.387519,1995-02-10,1 hours 46 minutes,Father of the Bride Part II,Charles Shyer,babi midlifecrisi confid age daughter motherda...


In [None]:
## Checking popularity column for being non-float data type and removing them
df['popularity']    = df.apply(lambda r: r['popularity'] if type(r['popularity'])==float else np.nan, axis=1)
df.dropna(inplace=True)

## Checking director column for being non-string data type and removing them
df['main_director'] = df.apply(lambda r: r['main_director'] if len(r['main_director'])>1 else np.nan, axis=1)
df.dropna(inplace=True)

In [None]:
## Sorting the whole dataset based on popularity. This will help us to take top X number of movies based on popularity.
df.sort_values(by=['popularity'],ascending=False,inplace=True)

## Dropping popularity column after sorting based on popularity
df.drop(['popularity'],axis=1,inplace=True)
df.dropna(inplace=True)

In [None]:
## Reset index because after sorting, the index values have changed.
df.reset_index(inplace=True,drop=True)

In [None]:
## Checking release date column for being non-string data type and removing them
df['release_date'] = df.apply(lambda r: r['release_date'] if len(r['release_date'])>1 else np.nan, axis=1)
df.dropna(inplace=True)

In [None]:
df = df[:15000]

In [None]:
df.shape

(15000, 6)

# 3. Recommendation System

In [None]:
## Creating a Count Vectorizer object which will be based on word analyzer, with ngram 1-2 and minimum number of occurances of words as 2
count = CountVectorizer(analyzer='word',ngram_range=(1, 2),min_df=2, stop_words='english')

## Adjusting the count vectorizer object with respect to our dataset
count_matrix = count.fit_transform(df['tags'])

In [None]:
print(count_matrix.shape)

(15000, 30789)


In [None]:
## We build it as an pyarrow dataframe because it is the most efficient
table = pa.Table.from_pandas(pd.DataFrame(cosine_similarity(count_matrix, count_matrix)))

# 4. Model export

In [None]:
!pip install fastparquet

Collecting fastparquet
  Downloading fastparquet-2023.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.5 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/1.5 MB[0m [31m7.6 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.5/1.5 MB[0m [31m25.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
Collecting cramjam>=2.3 (from fastparquet)
  Downloading cramjam-2.6.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m79.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: cramjam, fastparquet
Successfully installed cramjam-2.6.2 fastparquet-2023.7.0


In [None]:
## save the Master Dataset
df.to_parquet('/content/movie_database.parquet',engine='fastparquet',index=False)

In [None]:
## Writing the Matrix table
pq.write_table(table, '/content/model.parquet')

In [None]:
!mv model.parquet ./Drive/MyDrive/
!mv movie_database.parquet ./Drive/MyDrive/

# 5. Testing

In [None]:
import pandas as pd
import pyarrow as pa

In [None]:
dataset = pd.read_parquet('/content/Drive/MyDrive/movie_database.parquet')

In [None]:
table = pa.parquet.read_table('/content/Drive/MyDrive/model.parquet').to_pandas()

In [None]:
dataset = dataset.reset_index()
titles = dataset['title']
indices = pd.Series(dataset.index, index=dataset['title'])

In [None]:
print(dataset['genres'])

0           b'["Family","Animation","Adventure","Comedy"]'
1        b'["Adventure","Family","Animation","Action","...
2                       b'["Action","Adventure","Comedy"]'
3        b'["Action","Adventure","Comedy","Science Fict...
4        b'["Action","Adventure","Fantasy","Science Fic...
                               ...                        
14995                   b'["Comedy","Thriller","Mystery"]'
14996                             b'["TV Movie","Comedy"]'
14997                               b'["Action","Horror"]'
14998                                        b'["Comedy"]'
14999               b'["Crime","Drama","Action","Comedy"]'
Name: genres, Length: 15000, dtype: object


In [None]:
#genres are connverted to byte string, so we need to convert it back to a list
dataset['converted_genres'] = dataset['genres'].apply(lambda x: literal_eval(x.decode("utf-8")))
dataset['converted_genres'] = dataset['converted_genres'].apply(lambda x: [item.lower() for item in x])

In [None]:
print(dataset['converted_genres'])

0                [family, animation, adventure, comedy]
1        [adventure, family, animation, action, comedy]
2                           [action, adventure, comedy]
3          [action, adventure, comedy, science fiction]
4         [action, adventure, fantasy, science fiction]
                              ...                      
14995                       [comedy, thriller, mystery]
14996                                [tv movie, comedy]
14997                                  [action, horror]
14998                                          [comedy]
14999                    [crime, drama, action, comedy]
Name: converted_genres, Length: 15000, dtype: object


In [None]:
from zmq.sugar.socket import T
def get_recommendations(movie_id_from_db,movie_db):
    try:
        sim_scores = list(enumerate(movie_db[movie_id_from_db]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:15] ## get top 15 Recommendations

        movie_indices = [i[0] for i in sim_scores]
        output = dataset.iloc[movie_indices]
        output.reset_index(inplace=True, drop=True)

        response = []
        for i in range(len(output)):
            response.append({
                'movie_title':output['title'].iloc[i],
                'movie_release_date':output['release_date'].iloc[i],
                'movie_director':output['main_director'].iloc[i],
                'google_link':"https://www.google.com/search?q=" + '+'.join(output['title'].iloc[i].strip().split())
            })
        return response
    except Exception as e:
        print("error: ",e)
        return []


#Fetch most popular movies for a genre
def get_genre(genre):
  try:
    genre_column = 'converted_genres'
    # Filter the DataFrame based on the specified genre
    output = dataset[dataset[genre_column].apply(lambda x: genre in x)]
    response = []
    for i in range(5):
      response.append({
        'movie_title':output['title'].iloc[i],
        'movie_release_date':output['release_date'].iloc[i],
        'movie_director':output['main_director'].iloc[i],
        'google_link':"https://www.google.com/search?q=" + '+'.join(output['title'].iloc[i].strip().split())
        })
    return response
  except Exception as e:
        print("error: ",e)
        return []

In [None]:
movie_name = input('Enter a movie Name: ')

Enter a movie Name: Comedy


In [None]:
movie_index = titles.to_list().index(movie_name)
recommendations = get_recommendations(movie_index,table)

In [None]:
print(f"{'Movie Title':<40} | {'Director':<20} | {'Release Date':<15}")
print(f"-"*80)
for recommendation in recommendations:
    print(f"{recommendation['movie_title']:<40} | {recommendation['movie_director']:<20} | {recommendation['movie_release_date']:<15}")

Movie Title                              | Director             | Release Date   
--------------------------------------------------------------------------------
Minions                                  | Kyle Balda           | 2015-06-17     
Big Hero 6                               | Chris Williams       | 2014-10-24     
Deadpool                                 | Tim Miller           | 2016-02-09     
Guardians of the Galaxy Vol. 2           | James Gunn           | 2017-04-19     
Pirates of the Caribbean: Dead Men Tell No Tales | Joachim Rønning      | 2017-05-23     
