In [7]:
# Import TensorFlow and hub
import tensorflow as tf
import tensorflow_hub as hub

# Plotting
import matplotlib.pyplot as plt

# some important packages
import os
import re
import numpy as np
import pandas as pd

# scikit-learn
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA

In [11]:
# LOAD universal sentence encoder from TensorFlow hub
model_url = "https://www.kaggle.com/models/google/universal-sentence-encoder/frameworks/TensorFlow2/variations/universal-sentence-encoder/versions/2"
model = hub.load(model_url)
print('Model Loaded')

Model Loaded


In [12]:
# this function output embedding given the text
def embed(texts):
    return model(texts)

In [4]:
# Load the dataset
df = pd.read_csv("/Users/himanshukumarsingh/Downloads/MSc clg stuffs/Sem 2/project/ml project/Top_10000_Movies.csv", engine = 'python')
df.head()

Unnamed: 0.1,Unnamed: 0,id,original_language,original_title,popularity,release_date,vote_average,vote_count,genre,overview,revenue,runtime,tagline
0,0,580489.0,en,Venom: Let There Be Carnage,5401.308,2021-09-30,6.8,1736.0,"['Science Fiction', 'Action', 'Adventure']",After finding a host body in investigative rep...,424000000.0,97.0,
1,1,524434.0,en,Eternals,3365.535,2021-11-03,7.1,622.0,"['Action', 'Adventure', 'Science Fiction', 'Fa...",The Eternals are a team of ancient aliens who ...,165000000.0,157.0,In the beginning...
2,2,438631.0,en,Dune,2911.423,2021-09-15,8.0,3632.0,"['Action', 'Adventure', 'Science Fiction']","Paul Atreides, a brilliant and gifted young ma...",331116356.0,155.0,"Beyond fear, destiny awaits."
3,3,796499.0,en,Army of Thieves,2552.437,2021-10-27,6.9,555.0,"['Action', 'Crime', 'Thriller']",A mysterious woman recruits bank teller Ludwig...,0.0,127.0,"Before Vegas, one locksmith became a legend."
4,4,550988.0,en,Free Guy,1850.47,2021-08-11,7.8,3493.0,"['Comedy', 'Action', 'Adventure', 'Science Fic...",A bank teller called Guy realizes he is a back...,331096766.0,115.0,Life's too short to be a background character.


In [5]:
# only keeping title and overview column
df = df[["original_title", "overview", "original_language"]]
df.head()

Unnamed: 0,original_title,overview,original_language
0,Venom: Let There Be Carnage,After finding a host body in investigative rep...,en
1,Eternals,The Eternals are a team of ancient aliens who ...,en
2,Dune,"Paul Atreides, a brilliant and gifted young ma...",en
3,Army of Thieves,A mysterious woman recruits bank teller Ludwig...,en
4,Free Guy,A bank teller called Guy realizes he is a back...,en


In [6]:
# removing null values
df = df.dropna()
df = df.reset_index()

In [7]:
overviews = list(df['overview'])
titles = list(df['original_title'])
languages = list(df['original_language'])

In [8]:
# generating embeddings for all the overviews
embeddings_1 = embed(overviews)
print('The embedding shape is:', embeddings_1.shape)

The embedding shape is: (9900, 512)


In [9]:
# generating embeddings for all the titles
embeddings_2 = embed(titles)
print('The embedding shape is:', embeddings_2.shape)

The embedding shape is: (9900, 512)


In [10]:
# generating embeddings for all the languages
embeddings_3 = embed(languages)
print('The embedding shape is:', embeddings_3.shape)

The embedding shape is: (9900, 512)


In [11]:
# creating nearest neighbour for overviews
nn_1 = NearestNeighbors(n_neighbors=10)
nn_1.fit(embeddings_1)

In [12]:
# creating nearest neighbour for titles
nn_2 = NearestNeighbors(n_neighbors=10)
nn_2.fit(embeddings_2)

In [13]:
# creating nearest neighbour for languages
nn_3 = NearestNeighbors(n_neighbors=10)
nn_3.fit(embeddings_3)

In [28]:
def recommend(choice,text):
    emb = embed([text])
    if choice == 1:
        neighbors = nn_1.kneighbors(emb, return_distance=False)[0]
        print('\nHere are the movies according to your choice')
        return df['original_title'].iloc[neighbors].tolist()
    
    elif choice == 2:
        neighbors = nn_2.kneighbors(emb, return_distance=False)[0]
        print('\nHere are the movies according to your choice')
        return df['original_title'].iloc[neighbors].tolist()
    
    elif choice == 3:
        neighbors = nn_3.kneighbors(emb, return_distance=False)[0]
        print('\nHere are the movies according to your choice')
        return df['original_title'].iloc[neighbors].tolist()


In [44]:
# taking user input
print('On what basis do you want me to recommend the movie?')
print('Choose 1 for genre/keyword')
print('Choose 2 for some movie title')
print('Choose 3 for language\n')
choice = int(input())

if choice == 1:
    text = input('\nEnter your genre/keyword: ')
    
elif choice == 2:
    text = input('\nEnter any movie title: ')
    
elif choice == 3:
    text = input('\nEnter any language(only first 2 letters): ')
    
else:
    print('\nWrong input')
    
recommend(choice,text)

On what basis do you want me to recommend the movie?
Choose 1 for genre/keyword
Choose 2 for some movie title
Choose 3 for language

1

Enter your genre/keyword: mythology

Here are the movies according to your choice


['Spider-Man: The Mythology of the 21st Century',
 'Clash of the Titans',
 'Hercules',
 'Percy Jackson & the Olympians: The Lightning Thief',
 'Percy Jackson: Sea of Monsters',
 'Weihnachten mit Jonas Kaufmann',
 '聖闘士星矢 天界編 序奏 ~overture~',
 'Sinbad: Legend of the Seven Seas',
 '聖闘士星矢 神々の熱き戦い',
 'Immortals']