In [7]:
# Import TensorFlow and hub
import tensorflow as tf
import tensorflow_hub as hub

# Plotting
import matplotlib.pyplot as plt

# some important packages
import os
import re
import numpy as np
import pandas as pd

# scikit-learn
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA

In [None]:
model_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
model = hub.load(model_url)
print('Model Loaded')

In [None]:
def embed(texts):
    return model(texts)

In [None]:
embed(['This movie was great!'])

In [None]:
df = pd.read_csv("Top_10000_Movies.csv", engine="python")
df.head()

In [None]:
df = df[["original_title", "overview"]]
df.head()

In [None]:
df = df.dropna()
df = df.reset_index()
df = df[:5500]

In [None]:
titles = list(df['overview'])

In [None]:
titles[:5]

In [None]:
embeddings = embed(titles)
print('The embedding shape is:', embeddings.shape)

In [None]:
pca = PCA(n_components=2)
emb_2d = pca.fit_transform(embeddings)

In [None]:
plt.figure(figsize=(11, 6))
plt.title('Embedding space')
plt.scatter(emb_2d[:, 0], emb_2d[:, 1])
plt.show()

In [None]:
nn = NearestNeighbors(n_neighbors=10)
nn.fit(embeddings)

In [None]:
def recommend(text):
    emb = embed([text])
    neighbors = nn.kneighbors(emb, return_distance=False)[0]
    return df['original_title'].iloc[neighbors].tolist()

In [25]:
print('Recommended Movies:')
recommend("Titanic")

Recommended Movies:


['Titanic',
 'Titanic II',
 'Titanic: 100 Years On',
 'Romeo + Juliet',
 'Los 7 pecados capitales',
 'The Vow',
 'Dear John',
 'Јужни Ветар 2: Убрзање',
 "Pirates of the Caribbean: At World's End",
 'Barbie & Chelsea: The Lost Birthday']

In [26]:
df = pd.read_csv("Top_10000_Movies.csv", engine="python")
print(df)

      Unnamed: 0        id original_language               original_title  \
0              0  580489.0                en  Venom: Let There Be Carnage   
1              1  524434.0                en                     Eternals   
2              2  438631.0                en                         Dune   
3              3  796499.0                en              Army of Thieves   
4              4  550988.0                en                     Free Guy   
...          ...       ...               ...                          ...   
10009       9995     530.0                en              A Grand Day Out   
10010       9996   15934.0                en                  El cantante   
10011       9997  162215.0                en               How I Live Now   
10012       9998    5723.0                en                         Once   
10013       9999  311667.0                en              Manhattan Night   

       popularity release_date  vote_average  vote_count  \
0        5401.3