In [1]:
import pandas as pd
import faiss
import requests
import numpy as np

In [2]:
df=pd.read_csv("netflix_titles.csv")

In [3]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [4]:
def create_textual_representation(row):
    textual_representation = f"""Type:{row['type']},
Title:{row['title']},
Director:{row['director']},
Cast:{row['cast']},
Released:{row['release_year']},
Genres:{row['listed_in']},

Description:{row['description']}"""
    return textual_representation

In [5]:
df['textual_representation']=df.apply(create_textual_representation,axis=1)

In [6]:
print(df['textual_representation'].values[1])

Type:TV Show,
Title:Blood & Water,
Director:nan,
Cast:Ama Qamata, Khosi Ngema, Gail Mabalane, Thabang Molaba, Dillon Windvogel, Natasha Thahane, Arno Greeff, Xolile Tshabalala, Getmore Sithole, Cindy Mahlangu, Ryle De Morny, Greteli Fincham, Sello Maake Ka-Ncube, Odwa Gwanya, Mekaila Mathys, Sandi Schultz, Duane Williams, Shamilla Miller, Patrick Mofokeng,
Released:2021,
Genres:International TV Shows, TV Dramas, TV Mysteries,

Description:After crossing paths at a party, a Cape Town teen sets out to prove whether a private-school swimming star is her sister who was abducted at birth.


In [7]:
pip install faiss-cpu

Note: you may need to restart the kernel to use updated packages.


In [8]:
dim=4096
index=faiss.IndexFlatL2(dim)
X=np.zeros((len(df['textual_representation']),dim),dtype='float32')

In [9]:
for i, representation in enumerate(df['textual_representation']):
    if i % 10 ==0:
        print("processed",str(i),"instance")
    res=requests.post('http://localhost:11434/api/embeddings',
                        json={
                            'model':'llama2',
                            'prompt':representation
                        }
                     )
    embedding = res.json()['embedding']
    X[i]=np.array(embedding)
index.add(X)

processed 0 instance
processed 10 instance
processed 20 instance
processed 30 instance
processed 40 instance
processed 50 instance
processed 60 instance
processed 70 instance
processed 80 instance
processed 90 instance
processed 100 instance
processed 110 instance
processed 120 instance
processed 130 instance
processed 140 instance
processed 150 instance
processed 160 instance
processed 170 instance
processed 180 instance
processed 190 instance
processed 200 instance
processed 210 instance
processed 220 instance
processed 230 instance
processed 240 instance
processed 250 instance
processed 260 instance
processed 270 instance
processed 280 instance
processed 290 instance
processed 300 instance
processed 310 instance
processed 320 instance
processed 330 instance
processed 340 instance
processed 350 instance
processed 360 instance
processed 370 instance
processed 380 instance
processed 390 instance
processed 400 instance
processed 410 instance
processed 420 instance
processed 430 instance

KeyboardInterrupt: 

In [11]:
faiss.write_index(index,'index')

In [12]:
index=faiss.read_index('index')

In [13]:
df[df.title.str.contains('Shutter')]

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,textual_representation
1358,s1359,Movie,Shutter Island,Martin Scorsese,"Leonardo DiCaprio, Mark Ruffalo, Ben Kingsley,...",United States,"February 1, 2021",2010,R,139 min,Thrillers,A U.S. marshal's troubling visions compromise ...,"Type:Movie,\nTitle:Shutter Island,\nDirector:M..."
8013,s8014,Movie,Shutter,"Banjong Pisanthanakun, Parkpoom Wongpoom","Ananda Everingham, Natthaweeranuch Thongmee, A...",Thailand,"September 5, 2018",2004,TV-MA,96 min,"Horror Movies, International Movies",After killing a young girl in a hit-and-run ac...,"Type:Movie,\nTitle:Shutter,\nDirector:Banjong ..."


In [14]:
favourite_movie=df.iloc[1358]

In [16]:
res=requests.post('http://localhost:11434/api/embeddings',json={
    'model':'llama2',
    'prompt':favourite_movie['textual_representation']
})

In [17]:
embedding = np.array([res.json()['embedding']],dtype='float32')
D,I=index.search(embedding,5)

In [19]:
best_matches=np.array(df['textual_representation'])[I.flatten()]

In [20]:
for match in best_matches:
    print("Next Movie")
    print(match)
    print()
          

Next Movie
Type:Movie,
Title:Zubaan,
Director:Mozez Singh,
Cast:Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanana, Manish Chaudhary, Meghna Malik, Malkeet Rauni, Anita Shabdish, Chittaranjan Tripathy,
Released:2015,
Genres:Dramas, International Movies, Music & Musicals,

Description:A scrappy but poor boy worms his way into a tycoon's dysfunctional family, while facing his fear of music and the truth about his past.

Next Movie
Type:Movie,
Title:Zubaan,
Director:Mozez Singh,
Cast:Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanana, Manish Chaudhary, Meghna Malik, Malkeet Rauni, Anita Shabdish, Chittaranjan Tripathy,
Released:2015,
Genres:Dramas, International Movies, Music & Musicals,

Description:A scrappy but poor boy worms his way into a tycoon's dysfunctional family, while facing his fear of music and the truth about his past.

Next Movie
Type:Movie,
Title:Zubaan,
Director:Mozez Singh,
Cast:Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanana, Manish Chaudhary, Meghna Malik, Malkeet Rauni, An