In [1]:
!pip install faiss-cpu requests

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp312-cp312-win_amd64.whl.metadata (3.8 kB)
Downloading faiss_cpu-1.8.0.post1-cp312-cp312-win_amd64.whl (14.6 MB)
   ---------------------------------------- 0.0/14.6 MB ? eta -:--:--
    --------------------------------------- 0.3/14.6 MB ? eta -:--:--
   -- ------------------------------------- 1.0/14.6 MB 3.9 MB/s eta 0:00:04
   ----- ---------------------------------- 1.8/14.6 MB 3.9 MB/s eta 0:00:04
   ------- -------------------------------- 2.9/14.6 MB 3.9 MB/s eta 0:00:04
   ---------- ----------------------------- 3.7/14.6 MB 3.9 MB/s eta 0:00:03
   ------------ --------------------------- 4.5/14.6 MB 3.9 MB/s eta 0:00:03
   -------------- ------------------------- 5.2/14.6 MB 3.9 MB/s eta 0:00:03
   ----------------- ---------------------- 6.3/14.6 MB 3.9 MB/s eta 0:00:03
   ------------------- -------------------- 7.1/14.6 MB 3.9 MB/s eta 0:00:02
   ------------------- -------------------- 7.1/14.6 MB 3.9 MB/s eta 0:00

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('netflix_titles.csv')


In [4]:
def create_textual_representation(row):
    textual_representation =f"""
    Type: {row['type']},
    Title: {row['title']},
    Director: {row['director']},
    Cast: {row['cast']},
    Release Year: {row['release_year']},
    Listed In: {row['listed_in']},
    Description: {row['description']}
    """
    return textual_representation

In [5]:
df['textual_representation'] = df.apply(create_textual_representation, axis=1)

In [6]:
print(df['textual_representation'].values[0])


    Type: Movie,
    Title: Dick Johnson Is Dead,
    Director: Kirsten Johnson,
    Cast: nan,
    Release Year: 2020,
    Listed In: Documentaries,
    Description: As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable.
    


In [7]:
import faiss
import requests
import numpy as np

dim = 4096
index = faiss.IndexFlatL2(dim)

X = np.zeros((len(df['textual_representation']), dim),dtype='float32')

In [8]:
X[0]

array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)

In [10]:
for i , representation in enumerate(df['textual_representation']):
    if i % 100:
       print('processed',str(i),'instances')
    res = requests.post('http://localhost:11434/api/embeddings',
                      json={
                          'model':'llama2',
                          'prompt':representation
                      }
                      )
    embedding = res.json()['embedding']
    X[i] = np.array(embedding)


index.add(np.array([embedding]))


processed 1 instances
processed 2 instances
processed 3 instances
processed 4 instances
processed 5 instances
processed 6 instances
processed 7 instances
processed 8 instances
processed 9 instances
processed 10 instances
processed 11 instances
processed 12 instances
processed 13 instances
processed 14 instances
processed 15 instances
processed 16 instances
processed 17 instances
processed 18 instances
processed 19 instances
processed 20 instances
processed 21 instances
processed 22 instances
processed 23 instances
processed 24 instances
processed 25 instances
processed 26 instances
processed 27 instances
processed 28 instances
processed 29 instances
processed 30 instances
processed 31 instances
processed 32 instances
processed 33 instances
processed 34 instances
processed 35 instances
processed 36 instances
processed 37 instances
processed 38 instances
processed 39 instances
processed 40 instances
processed 41 instances
processed 42 instances
processed 43 instances
processed 44 instanc

KeyboardInterrupt: 

In [11]:
faiss.write_index(index,'index')

In [12]:
index = faiss.read_index('index')

In [13]:
df[df.title.str.contains('Shutter')]

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,textual_representation
1358,s1359,Movie,Shutter Island,Martin Scorsese,"Leonardo DiCaprio, Mark Ruffalo, Ben Kingsley,...",United States,"February 1, 2021",2010,R,139 min,Thrillers,A U.S. marshal's troubling visions compromise ...,"\n Type: Movie,\n Title: Shutter Island,..."
8013,s8014,Movie,Shutter,"Banjong Pisanthanakun, Parkpoom Wongpoom","Ananda Everingham, Natthaweeranuch Thongmee, A...",Thailand,"September 5, 2018",2004,TV-MA,96 min,"Horror Movies, International Movies",After killing a young girl in a hit-and-run ac...,"\n Type: Movie,\n Title: Shutter,\n D..."


In [24]:
favorite_movie = df.iloc[800]

In [25]:
res = requests.post('http://localhost:11434/api/embeddings',
                      json={
                          'model':'llama2',
                          'prompt':favorite_movie['textual_representation']
                      }
                      )

In [26]:
embedding = np.array([res.json()['embedding']], dtype='float32')
D,I = index.search(embedding,5)

In [27]:
best_matchs = np.array(df['textual_representation'])[I.flatten()]

In [28]:
for match in best_matchs:
    print("NEXT MOVIE")
    print(match)
    print('-'*100)

NEXT MOVIE

    Type: Movie,
    Title: Zubaan,
    Director: Mozez Singh,
    Cast: Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanana, Manish Chaudhary, Meghna Malik, Malkeet Rauni, Anita Shabdish, Chittaranjan Tripathy,
    Release Year: 2015,
    Listed In: Dramas, International Movies, Music & Musicals,
    Description: A scrappy but poor boy worms his way into a tycoon's dysfunctional family, while facing his fear of music and the truth about his past.
    
----------------------------------------------------------------------------------------------------
NEXT MOVIE

    Type: Movie,
    Title: Zubaan,
    Director: Mozez Singh,
    Cast: Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanana, Manish Chaudhary, Meghna Malik, Malkeet Rauni, Anita Shabdish, Chittaranjan Tripathy,
    Release Year: 2015,
    Listed In: Dramas, International Movies, Music & Musicals,
    Description: A scrappy but poor boy worms his way into a tycoon's dysfunctional family, while facing his fear of music and 

In [29]:
new_movie = '''Type: family,
    Title: Dick Johnson Is Dead,
    Director: Kirsten Johnson,
    Cast: nan,
    Release Year: 2020,
    Listed In: Documentaries,
    Description: As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable.'''

In [30]:
res = requests.post('http://localhost:11434/api/embeddings',
                      json={
                          'model':'llama2',
                          'prompt':new_movie
                      }
                      )

In [31]:
embedding = np.array([res.json()['embedding']], dtype='float32')
D,I = index.search(embedding,5)

In [32]:
best_matchs = np.array(df['textual_representation'])[I.flatten()]

In [33]:
for match in best_matchs:
    print("NEXT MOVIE")
    print(match)
    print('-'*100)

NEXT MOVIE

    Type: Movie,
    Title: Zubaan,
    Director: Mozez Singh,
    Cast: Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanana, Manish Chaudhary, Meghna Malik, Malkeet Rauni, Anita Shabdish, Chittaranjan Tripathy,
    Release Year: 2015,
    Listed In: Dramas, International Movies, Music & Musicals,
    Description: A scrappy but poor boy worms his way into a tycoon's dysfunctional family, while facing his fear of music and the truth about his past.
    
----------------------------------------------------------------------------------------------------
NEXT MOVIE

    Type: Movie,
    Title: Zubaan,
    Director: Mozez Singh,
    Cast: Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanana, Manish Chaudhary, Meghna Malik, Malkeet Rauni, Anita Shabdish, Chittaranjan Tripathy,
    Release Year: 2015,
    Listed In: Dramas, International Movies, Music & Musicals,
    Description: A scrappy but poor boy worms his way into a tycoon's dysfunctional family, while facing his fear of music and 