In [5]:
# MOVIE RECOMMENDATION SYSTEM WITH IMDB WATCH LINK

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re


# 1. LOAD MOVIES & LINKS DATASETS
movies = pd.read_csv("movies.csv")    # movieId, title, genres
links = pd.read_csv("links.csv")      # movieId, imdbId, tmdbId

# Merge to attach IMDB IDs to movies
movies = movies.merge(links, on="movieId", how="left")


# 2. CLEAN TITLES
def clean_title(title):
    title = title.lower()
    title = re.sub(r"\(.*?\)", "", title)
    title = re.sub(r"[^a-z0-9 ]", " ", title)
    title = re.sub(r"\s+", " ", title).strip()
    return title

movies["title_clean"] = movies["title"].apply(clean_title)


# 3. TF-IDF ON GENRES + TITLES
movies["combined"] = movies["title_clean"] + " " + movies["genres"].fillna("")

tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(movies["combined"])

similarity_matrix = cosine_similarity(tfidf_matrix)


# 4. RECOMMENDATION FUNCTION
def recommend_movie(movie_name, top_n=10):

    movie_name = movie_name.lower().strip()

    if movie_name not in movies["title_clean"].values:
        return f" Movie '{movie_name}' not found."

    idx = movies[movies["title_clean"] == movie_name].index[0]

    scores = list(enumerate(similarity_matrix[idx]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)

    top_indices = [i[0] for i in scores[1:top_n+1]]

    recommended = movies.iloc[top_indices][["movieId", "title", "genres", "imdbId"]].copy()

    # Add IMDB watch link
    recommended["watch_link"] = recommended["imdbId"].apply(
        lambda x: f"https://www.imdb.com/title/tt{int(x):07d}/" if not np.isnan(x) else "N/A"
    )

    return recommended


# 5. RUN PROGRAM
if __name__ == "__main__":
    print("\n Movie Recommendation System Ready!")
    user_movie = input("Enter Movie Name: ")

    print("\n Top Recommendations:\n")
    result = recommend_movie(user_movie)

    if isinstance(result, str):
        print(result)
    else:
        print(result.to_string(index=False))



 Movie Recommendation System Ready!

 Top Recommendations:

 movieId                                                        title genres  imdbId                            watch_link
    2848 Othello (Tragedy of Othello: The Moor of Venice, The) (1952)  Drama   45251 https://www.imdb.com/title/tt0045251/
     386                                                S.F.W. (1994)  Drama  111048 https://www.imdb.com/title/tt0111048/
    1922                                              Whatever (1998)  Drama  140688 https://www.imdb.com/title/tt0140688/
    3289                 Not One Less (Yi ge dou bu neng shao) (1999)  Drama  209189 https://www.imdb.com/title/tt0209189/
    4745                                                     O (2001)  Drama  184791 https://www.imdb.com/title/tt0184791/
    4765                                                L.I.E. (2001)  Drama  242587 https://www.imdb.com/title/tt0242587/
    5788                                        All or Nothing (2002)  Drama  

In [6]:
# Save cleaned dataset
movies.to_csv("movies_cleaned.csv", index=False)

# Save similarity matrix
import pickle
with open("similarity.pkl", "wb") as f:
    pickle.dump(similarity_matrix, f)
