In [1]:
import pandas as pd
df=pd.read_csv("netflix_titles.csv")

In [2]:
def create_textual_representation(row):
    textual_representation=f"""Type:{row['type']},
Title:{row['title']},
Director:{row['director']},
Cast:{row['cast']},
Released:{row['release_year']},
Genres:{row['listed_in']},
Description:{row['description']}"""
    return textual_representation

In [4]:
df['Textual_Representation']=df.apply(create_textual_representation,axis=1)

In [5]:
print(df['Textual_Representation'].values[0])

Type:Movie,
Title:Dick Johnson Is Dead,
Director:Kirsten Johnson,
Cast:nan,
Released:2020,
Genres:Documentaries,
Description:As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable.


In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

# Load embedding model (free)
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Encode all movies
embeddings = model.encode(df['Textual_Representation'].tolist(), convert_to_numpy=True)

# Normalize for cosine similarity
embeddings = embeddings.astype('float32')


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Use cosine similarity (inner product) instead of L2
faiss.normalize_L2(embeddings)

index = faiss.IndexFlatIP(embeddings.shape[1])  # inner product
index.add(embeddings)  # add all movie embeddings


In [None]:
def recommend_movies(movie_title, k=5):
    # Find index of the movie
    idx = df.index[df['title'] == movie_title].tolist()
    if not idx:
        return f"Movie '{movie_title}' not found."
    
    idx = idx[0]
    query_vector = embeddings[idx].reshape(1, -1)

    # Search in FAISS
    distances, indices = index.search(query_vector, k+1)  # k+1 to skip the movie itself
    
    # Get recommendations (skip the first one because it’s the same movie)
    recommendations = df.iloc[indices[0][1:]][['title', 'listed_in', 'description']]
    return recommendations
