In [2]:
#import block
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np



In [3]:
import pandas as pd
df_movies = pd.read_csv("tmdb_movie_data.csv")
df_movies.drop(columns=['text_blob'], inplace=True)
# If these are still lists:
df_movies['crew_blob'] = (
    df_movies['directors'].apply(lambda x: " ".join(eval(x)) if isinstance(x, str) else " ".join(x)) + " " +
    df_movies['top_cast'].apply(lambda x: " ".join(eval(x)) if isinstance(x, str) else " ".join(x))
)

df_movies.head()



Unnamed: 0,adult,id,original_language,original_title,overview,popularity,release_date,title,video,vote_average,vote_count,directors,top_cast,genres,crew_blob
0,False,1376434,en,Predator: Killer of Killers,This original animated anthology follows three...,708.7155,2025-06-05,Predator: Killer of Killers,False,8.047,201,['Dan Trachtenberg'],"['Lindsay LaVanchy', 'Louis Ozawa', 'Rick Gonz...","('Action', 'Animation', 'Science Fiction')",Dan Trachtenberg Lindsay LaVanchy Louis Ozawa ...
1,False,870028,en,The Accountant²,"When an old acquaintance is murdered, Wolff is...",634.158,2025-04-23,The Accountant²,False,7.268,464,"[""Gavin O'Connor""]","['Ben Affleck', 'Jon Bernthal', 'Cynthia Addai...","('Action', 'Crime', 'Thriller')",Gavin O'Connor Ben Affleck Jon Bernthal Cynthi...
2,True,1188808,tl,Tuhog,Abie mourns after the death of Michael. She fi...,565.734,2023-11-03,Tuhog,False,4.4,7,['G.B. Sampedro'],"['Arron Villaflor', 'Apple Dy', 'Joko Diaz']","('Drama', 'Romance')",G.B. Sampedro Arron Villaflor Apple Dy Joko Diaz
3,False,552524,en,Lilo & Stitch,The wildly funny and touching story of a lonel...,556.0685,2025-05-17,Lilo & Stitch,False,7.108,535,['Dean Fleischer Camp'],"['Maia Kealoha', 'Sydney Agudong', 'Chris Sand...","('Comedy', 'Family', 'Science Fiction')",Dean Fleischer Camp Maia Kealoha Sydney Agudon...
4,False,1233413,en,Sinners,"Trying to leave their troubled lives behind, t...",543.1837,2025-04-16,Sinners,False,7.532,1260,['Ryan Coogler'],"['Michael B. Jordan', 'Hailee Steinfeld', 'Mil...","('Action', 'Horror', 'Thriller')",Ryan Coogler Michael B. Jordan Hailee Steinfel...


In [4]:
model = SentenceTransformer('all-MiniLM-L6-v2')  # small, fast, accurate
overview_embeddings = model.encode(df_movies['overview'].fillna(""), convert_to_numpy=True)
crew_embeddings = model.encode(df_movies['crew_blob'].fillna(""), convert_to_numpy=True)
genre_embeddings = model.encode(df_movies['genres'].fillna(""), convert_to_numpy=True)

w_overview = 0.5
w_crew = 0.4
w_genre = 0.1

# Combine all embeddings into one vector per movie
combined_embeddings = (
    w_overview * overview_embeddings +
    w_crew * crew_embeddings +
    w_genre * genre_embeddings
)


In [5]:
def recommend_movies(title, top_n=5):
    if title not in df_movies['title'].values:
        return f"'{title}' not found in dataset."

    idx = df_movies[df_movies['title'] == title].index[0]
    query_vec = combined_embeddings[idx].reshape(1, -1)
    sims = cosine_similarity(query_vec, combined_embeddings)[0]


    top_indices = sims.argsort()[::-1][1:top_n+1]
    results = df_movies.iloc[top_indices][['title', 'genres', 'vote_average']].copy()
    results['similarity'] = sims[top_indices]
    
    return results

In [6]:
recommend_movies("Dune", top_n=5)


Unnamed: 0,title,genres,vote_average,similarity
234,Dune: Part Two,"('Adventure', 'Science Fiction')",8.148,0.776512
41,Alien: Covenant,"('Horror', 'Science Fiction', 'Thriller')",6.2,0.593086
191,Interstellar,"('Adventure', 'Drama', 'Science Fiction')",8.455,0.585031
868,Arcadian,"('Action', 'Horror', 'Science Fiction', 'Thril...",6.1,0.576367
428,Mission: Impossible II,"('Action', 'Adventure', 'Thriller')",6.129,0.562397


In [None]:
#