In [4]:
# import libraries
import pandas as pd
import numpy as np

In [5]:
# read the data
df = pd.read_csv('netflix_titles.csv')
df.head(5)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [6]:
# create a function to get the top n rows of the dataset based on the column name and value provided by the user 
def create_textual_representation(row):
    textual_representation = f""" Type: {row['type']}

Title: {row['title']}
Director: {row['director']}
Cast: {row['cast']}
Released: {row['release_year']}
Genres: {row['listed_in']}

Description: {row['description']}"""
    
    return textual_representation

In [25]:
# apply the function to the dataset
df['textual_representation'] = df.apply(create_textual_representation, axis=1)
print(df['textual_representation'].values[0])

 Type: Movie

Title: Dick Johnson Is Dead
Director: Kirsten Johnson
Cast: nan
Released: 2020
Genres: Documentaries

Description: As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable.


In [9]:
#install faiss
!pip install faiss-cpu



In [10]:
# import the faiss library
import faiss
import requests

# create a function to get the top n rows of the dataset based on the column name and value provided by the user
dim = 4096

# create a flat index
index = faiss.IndexFlatL2(dim)

# create a numpy array to store the embeddings
X = np.zeros((len(df['textual_representation']), dim), dtype=np.float32)

In [12]:
# loop through the dataset and get the embeddings 
# store the embeddings in the numpy array & add the embeddings to the index
# print the progress every 10 instances this will take a while to run you can reduce the number of instances to process to speed up the process
for i, representation in enumerate(df['textual_representation']):
    if i % 10 == 0:
        print('Processed', str(i), 'instances')
              
    res = requests.post('http://localhost:11434/api/embeddings', 
                        json={
                            'model': 'llama2',
                            'prompt': representation
                            }
                            )  

    embedding = res.json()['embedding']

    X[i] = np.array(embedding)
index.add(X)

Processed 0 instances
Processed 10 instances
Processed 20 instances
Processed 30 instances
Processed 40 instances
Processed 50 instances
Processed 60 instances
Processed 70 instances


KeyboardInterrupt: 

In [13]:
faiss.write_index(index, 'index')

In [14]:
# load the index
index = faiss.read_index('index')

In [15]:
favorite_movie = df.iloc[1358]

In [16]:
favorite_movie

show_id                                                               s1359
type                                                                  Movie
title                                                        Shutter Island
director                                                    Martin Scorsese
cast                      Leonardo DiCaprio, Mark Ruffalo, Ben Kingsley,...
country                                                       United States
date_added                                                 February 1, 2021
release_year                                                           2010
rating                                                                    R
duration                                                            139 min
listed_in                                                         Thrillers
description               A U.S. marshal's troubling visions compromise ...
textual_representation     Type: Movie\n\nTitle: Shutter Island\nDirecto...
Name: 1358, 

In [17]:
# get the embeddings of the favorite movie
res = requests.post('http://localhost:11434/api/embeddings', json = {
    'model': 'llama2',
    'prompt': favorite_movie['textual_representation']
})

In [18]:
# get the embeddings
embedding  = np.array([res.json()['embedding']], dtype=np.float32)

D, I = index.search(embedding, 5)

In [21]:
# get the best matches
best_matches = np.array(df['textual_representation'])[I.flatten()]

In [24]:
# print the best matches
for match in best_matches:
    print('NEXT MOVIE')
    print(match)
    print('-----------------')


NEXT MOVIE
 Type: Movie

Title: Zubaan
Director: Mozez Singh
Cast: Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanana, Manish Chaudhary, Meghna Malik, Malkeet Rauni, Anita Shabdish, Chittaranjan Tripathy
Released: 2015
Genres: Dramas, International Movies, Music & Musicals

Description: A scrappy but poor boy worms his way into a tycoon's dysfunctional family, while facing his fear of music and the truth about his past.
-----------------
NEXT MOVIE
 Type: Movie

Title: Zubaan
Director: Mozez Singh
Cast: Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanana, Manish Chaudhary, Meghna Malik, Malkeet Rauni, Anita Shabdish, Chittaranjan Tripathy
Released: 2015
Genres: Dramas, International Movies, Music & Musicals

Description: A scrappy but poor boy worms his way into a tycoon's dysfunctional family, while facing his fear of music and the truth about his past.
-----------------
NEXT MOVIE
 Type: Movie

Title: Zubaan
Director: Mozez Singh
Cast: Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanana, Manish