In [1]:
import pandas as pd
import requests
import numpy as np
import faiss

In [2]:
df = pd.read_csv('netflix_titles.csv')
df.shape 

(8807, 12)

In [3]:
df

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
...,...,...,...,...,...,...,...,...,...,...,...,...
8802,s8803,Movie,Zodiac,David Fincher,"Mark Ruffalo, Jake Gyllenhaal, Robert Downey J...",United States,"November 20, 2019",2007,R,158 min,"Cult Movies, Dramas, Thrillers","A political cartoonist, a crime reporter and a..."
8803,s8804,TV Show,Zombie Dumb,,,,"July 1, 2019",2018,TV-Y7,2 Seasons,"Kids' TV, Korean TV Shows, TV Comedies","While living alone in a spooky town, a young g..."
8804,s8805,Movie,Zombieland,Ruben Fleischer,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ...",United States,"November 1, 2019",2009,R,88 min,"Comedies, Horror Movies",Looking to survive in a world taken over by zo...
8805,s8806,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero..."


1. build index

In [4]:
def create_movie_string(movie):
    return f"""Type:{movie['type']},
Title:{movie['title']},
Director:{movie['director']},
Cast:{movie['cast']},
Released:{movie['date_added']},
Genres:{movie['listed_in']},

Description:{movie['description']}
"""

In [5]:
df['movie_strings'] = df.apply(create_movie_string, axis=1)

In [6]:
print(df['movie_strings'].values[0])

Type:Movie,
Title:Dick Johnson Is Dead,
Director:Kirsten Johnson,
Cast:nan,
Released:September 25, 2021,
Genres:Documentaries,

Description:As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable.



In [16]:
dim = 4096
index = faiss.IndexFlatL2(dim)
ollama_uri = 'http://localhost:11434/api/embeddings'
X = np.zeros((len(df['movie_strings']), dim), dtype=np.float32)
print(X.shape)

(8807, 4096)


In [25]:
for i, movie in enumerate(df['movie_strings']):
    if i % 30 == 0:
        print('processed ',str(i), 'instances')
    response = requests.post(
        ollama_uri,
        json={
            'model' : 'llama2',
            'prompt' : movie
        }
    )
    X[i] = np.array(response.json()['embedding'])
index.add(X)

processed  0 instances
processed  30 instances
processed  60 instances
processed  90 instances
processed  120 instances
processed  150 instances
processed  180 instances
processed  210 instances
processed  240 instances
processed  270 instances
processed  300 instances
processed  330 instances
processed  360 instances
processed  390 instances
processed  420 instances
processed  450 instances
processed  480 instances
processed  510 instances
processed  540 instances
processed  570 instances
processed  600 instances
processed  630 instances
processed  660 instances
processed  690 instances
processed  720 instances
processed  750 instances
processed  780 instances
processed  810 instances
processed  840 instances
processed  870 instances
processed  900 instances
processed  930 instances
processed  960 instances
processed  990 instances
processed  1020 instances
processed  1050 instances
processed  1080 instances
processed  1110 instances
processed  1140 instances
processed  1170 instances

In [26]:
faiss.write_index(index, 'movie_index')

<faiss.swigfaiss.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x12e874810> >

In [27]:
idx = faiss.read_index('movie_index')
idx

<faiss.swigfaiss.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x12c30daa0> >

2. retreive/search

In [29]:
search_movie = 'shutter island'
df[df.title.str.lower().str.contains(search_movie)]

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,movie_strings
1358,s1359,Movie,Shutter Island,Martin Scorsese,"Leonardo DiCaprio, Mark Ruffalo, Ben Kingsley,...",United States,"February 1, 2021",2010,R,139 min,Thrillers,A U.S. marshal's troubling visions compromise ...,"Type:Movie,\nTitle:Shutter Island,\nDirector:M..."


In [32]:
fav_movie = df.iloc[1358].movie_strings
print(fav_movie)

Type:Movie,
Title:Shutter Island,
Director:Martin Scorsese,
Cast:Leonardo DiCaprio, Mark Ruffalo, Ben Kingsley, Max von Sydow, Michelle Williams, Emily Mortimer, Patricia Clarkson, Jackie Earle Haley, Ted Levine, John Carroll Lynch, Elias Koteas,
Released:February 1, 2021,
Genres:Thrillers,

Description:A U.S. marshal's troubling visions compromise his investigation into the disappearance of a patient from a hospital for the criminally insane.



In [33]:
response = requests.post(
        ollama_uri,
        json={
            'model' : 'llama2',
            'prompt' : fav_movie
        }
    )

In [43]:
n_recommendations = 5
search_embeding = np.array([response.json()['embedding']], dtype=np.float32)
distances,indices = idx.search(search_embeding, n_recommendations)

In [44]:
distances,indices

array([[   0.    , 2000.5894, 2002.2739, 2007.2107, 2023.5918]],
      dtype=float32)

In [45]:
recommendations = np.array(df['movie_strings'])[indices.flatten()]
recommendations

array([[1358, 3190,   48, 8283, 7868]])

In [70]:
for i, r in enumerate(recommendations):
    print("====================")
    print("Embeding distance ",distances.flatten()[i])
    print(r)

Embeding distance  0.0
Type:Movie,
Title:Shutter Island,
Director:Martin Scorsese,
Cast:Leonardo DiCaprio, Mark Ruffalo, Ben Kingsley, Max von Sydow, Michelle Williams, Emily Mortimer, Patricia Clarkson, Jackie Earle Haley, Ted Levine, John Carroll Lynch, Elias Koteas,
Released:February 1, 2021,
Genres:Thrillers,

Description:A U.S. marshal's troubling visions compromise his investigation into the disappearance of a patient from a hospital for the criminally insane.

Embeding distance  2000.5894
Type:Movie,
Title:Cut Bank,
Director:Matt Shakman,
Cast:Liam Hemsworth, Teresa Palmer, Billy Bob Thornton, Bruce Dern, John Malkovich, Michael Stuhlbarg, Oliver Platt,
Released:December 1, 2019,
Genres:Dramas, Thrillers,

Description:A small-town dreamer is sure he's landed on a gold mine after accidentally filming a murder but is sucked into a violent tale of greed and deception.

Embeding distance  2002.2739
Type:Movie,
Title:Training Day,
Director:Antoine Fuqua,
Cast:Denzel Washington, Ethan

In [71]:
search_movie = """Type:Movie,
Title:Ramen Noodles,
Director:Christpoher Nolan,
Cast:Leonardo DiCaprio, Jr NTR, James bond, Scarlet witch, Melissa fumero,
Released:February 31, 2000,
Genres:Horror,Comedy, Mass, Drama, Over-the-top

Description:5 distinct people meet to take have fun, take revenge, fight till death .

"""

In [72]:
response = requests.post(
        ollama_uri,
        json={
            'model' : 'llama2',
            'prompt' : search_movie
        }
    )

In [73]:
n_recommendations = 5
search_embeding = np.array([response.json()['embedding']], dtype=np.float32)
distances,indices = idx.search(search_embeding, n_recommendations)

In [74]:
distances,indices

(array([[5696.738 , 5719.159 , 5776.464 , 5846.5537, 5957.201 ]],
       dtype=float32),
 array([[6458, 6147, 2956, 8621, 7370]]))

In [75]:
recommendations = np.array(df['movie_strings'])[indices.flatten()]
recommendations

array(['Type:Movie,\nTitle:Chernobyl Diaries,\nDirector:Bradley Parker,\nCast:Ingrid Bolsø Berdal, Dimitri Diatchenko, Olivia Taylor Dudley, Devin Kelley, Jesse McCartney, Nathan Phillips, Jonathan Sadowski, Alex Feldman, Kristof Konrad, Pasha D. Lychnikoff,\nReleased:August 1, 2018,\nGenres:Horror Movies, Thrillers,\n\nDescription:A group of kids takes an illegal tour through an abandoned city near Chernobyl, where mysterious humanoid forms begin to haunt their steps...\n',
       'Type:Movie,\nTitle:American Psycho,\nDirector:Mary Harron,\nCast:Christian Bale, Willem Dafoe, Jared Leto, Reese Witherspoon, Samantha Mathis, Chloë Sevigny, Justin Theroux, Josh Lucas, Matt Ross, Bill Sage, Cara Seymour, Guinevere Turner,\nReleased:September 1, 2019,\nGenres:Comedies, Cult Movies, Dramas,\n\nDescription:With chiseled good looks that belie his insanity, a businessman takes pathological pride in yuppie pursuits and indulges in sudden homicidal urges.\n',
       'Type:Movie,\nTitle:Chronicall

In [76]:
for i, r in enumerate(recommendations):
    print("====================")
    print("Embeding distance ",distances.flatten()[i])
    print(r)

Embeding distance  5696.738
Type:Movie,
Title:Chernobyl Diaries,
Director:Bradley Parker,
Cast:Ingrid Bolsø Berdal, Dimitri Diatchenko, Olivia Taylor Dudley, Devin Kelley, Jesse McCartney, Nathan Phillips, Jonathan Sadowski, Alex Feldman, Kristof Konrad, Pasha D. Lychnikoff,
Released:August 1, 2018,
Genres:Horror Movies, Thrillers,

Description:A group of kids takes an illegal tour through an abandoned city near Chernobyl, where mysterious humanoid forms begin to haunt their steps...

Embeding distance  5719.159
Type:Movie,
Title:American Psycho,
Director:Mary Harron,
Cast:Christian Bale, Willem Dafoe, Jared Leto, Reese Witherspoon, Samantha Mathis, Chloë Sevigny, Justin Theroux, Josh Lucas, Matt Ross, Bill Sage, Cara Seymour, Guinevere Turner,
Released:September 1, 2019,
Genres:Comedies, Cult Movies, Dramas,

Description:With chiseled good looks that belie his insanity, a businessman takes pathological pride in yuppie pursuits and indulges in sudden homicidal urges.

Embeding distance

In [None]:
from bs4 import BeautifulSoup

In [None]:

def fetch_imdb_thumbnail(movie_title):
    search_url = f"https://www.imdb.com/find?q={movie_title.replace(' ', '+')}&s=tt&ttype=ft&ref_=fn_ft"
    response = requests.get(search_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the first movie link
    movie_page = soup.find('td', class_='result_text')
    if movie_page and movie_page.a:
        movie_href = "https://www.imdb.com" + movie_page.a['href']
        movie_response = requests.get(movie_href)
        movie_soup = BeautifulSoup(movie_response.text, 'html.parser')
        thumbnail_tag = movie_soup.find('meta', property='og:image')
        if thumbnail_tag:
            return thumbnail_tag['content']
    
    return None

def fetch_wikipedia_thumbnail(movie_title):
    search_url = f"https://en.wikipedia.org/wiki/{movie_title.replace(' ', '_')}"
    response = requests.get(search_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the first image thumbnail
    infobox = soup.find('table', class_='infobox')
    if infobox:
        img_tag = infobox.find('img')
        if img_tag:
            return "https:" + img_tag['src']
    
    return None

def fetch_omdb_thumbnail(movie_title, api_key):
    omdb_url = f"http://www.omdbapi.com/?t={movie_title.replace(' ', '+')}&apikey={api_key}"
    response = requests.get(omdb_url)
    data = response.json()

    if 'Poster' in data and data['Poster'] != "N/A":
        return data['Poster']
    
    return None

def fetch_movie_thumbnail(movie_title, omdb_api_key=None):
    thumbnail = fetch_imdb_thumbnail(movie_title)
    if thumbnail:
        return thumbnail
    
    thumbnail = fetch_wikipedia_thumbnail(movie_title)
    if thumbnail:
        return thumbnail
    
    if omdb_api_key:
        thumbnail = fetch_omdb_thumbnail(movie_title, omdb_api_key)
        if thumbnail:
            return thumbnail

    return "Thumbnail not found."

# Example usage
movie_title = "Inception"
omdb_api_key = "your_omdb_api_key"  # Replace with your OMDb API key if available
thumbnail_url = fetch_movie_thumbnail(movie_title, omdb_api_key)
print(thumbnail_url)
