In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('netflix_titles.csv')

In [3]:
df

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
...,...,...,...,...,...,...,...,...,...,...,...,...
8802,s8803,Movie,Zodiac,David Fincher,"Mark Ruffalo, Jake Gyllenhaal, Robert Downey J...",United States,"November 20, 2019",2007,R,158 min,"Cult Movies, Dramas, Thrillers","A political cartoonist, a crime reporter and a..."
8803,s8804,TV Show,Zombie Dumb,,,,"July 1, 2019",2018,TV-Y7,2 Seasons,"Kids' TV, Korean TV Shows, TV Comedies","While living alone in a spooky town, a young g..."
8804,s8805,Movie,Zombieland,Ruben Fleischer,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ...",United States,"November 1, 2019",2009,R,88 min,"Comedies, Horror Movies",Looking to survive in a world taken over by zo...
8805,s8806,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero..."


In [4]:
def create_textual_representation(row):
    textual_representation = f"""
    Type: {row['type']},
    Title: {row['title']},
    Director: {row['director']},
    Cast: {row['cast']},
    Release Year: {row['release_year']},
    Genres: {row['listed_in']},
    Description: {row['description']}.
"""
    return textual_representation

In [5]:
df['textual_representation'] = df.apply(create_textual_representation, axis=1)

In [6]:
print(df['textual_representation'].values[0])


    Type: Movie,
    Title: Dick Johnson Is Dead,
    Director: Kirsten Johnson,
    Cast: nan,
    Release Year: 2020,
    Genres: Documentaries,
    Description: As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable..



In [7]:
import faiss
import requests

In [9]:
dim = 4096
index = faiss.IndexFlatL2(dim)
X = np.zeros((len(df['textual_representation']), dim), dtype=np.float32)

In [None]:
for idx, representation in enumerate(df['textual_representation']):
    if idx % 200:
        print("Processed", str(idx), 'instances')
    
    res = requests.post('http://localhost:11434/api/embeddings', 
                        json={
                            'model': 'llama2',
                            'prompt': representation
                        })
    
    embedding = res.json()['embedding']
    
    X[idx] = np.array(embedding)

index.add(X)

Processed 1 instances
Processed 2 instances
Processed 3 instances
Processed 4 instances
Processed 5 instances
Processed 6 instances
Processed 7 instances
Processed 8 instances
Processed 9 instances
Processed 10 instances
Processed 11 instances
Processed 12 instances
Processed 13 instances
Processed 14 instances
Processed 15 instances
Processed 16 instances
Processed 17 instances
Processed 18 instances
Processed 19 instances
Processed 20 instances
Processed 21 instances
Processed 22 instances
Processed 23 instances
Processed 24 instances
Processed 25 instances
Processed 26 instances
Processed 27 instances
Processed 28 instances
Processed 29 instances
Processed 30 instances
Processed 31 instances
Processed 32 instances
Processed 33 instances
Processed 34 instances
Processed 35 instances
Processed 36 instances
Processed 37 instances
Processed 38 instances
Processed 39 instances
Processed 40 instances
Processed 41 instances
Processed 42 instances
Processed 43 instances
Processed 44 instanc

In [None]:
faiss.write_index(index, 'index')

In [None]:
index = faiss.read_index('index')

In [None]:
df[df.title.str.contains('Shutter')]

In [None]:
favor_movie = df.iloc[1358]

In [None]:
res = requests.post('http://localhost:11434/api/embeddings',
                    json={
                        'model': 'llama2',
                        'prompt': favor_movie['textual_representation']
                    })

In [None]:
embedding = np.array([res.json()['embedding']], dtype='float32')

In [None]:
D, I = index.search(embedding, 5)

In [None]:
best_matches = np.array(df['textual_representation'])[I.flatten()]

In [None]:
for match in best_matches:
    print("NEXT MOVIE")
    print(match)
    print()