In [7]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# -----------------------------------------------------
# LOAD CSV
# -----------------------------------------------------
df = pd.read_csv(r"C:\Users\91917\Desktop\merged_movies.csv")

# Check required columns
required_cols = [
    "movie_title", "imdb_rating", "director", "cast",
    "imdb_votes", "certification", "genres", "description"
]

for col in required_cols:
    if col not in df.columns:
        raise ValueError(f"‚ùå Missing column: {col}")

# -----------------------------------------------------
# PREPARE A SINGLE TEXT COLUMN FOR FEATURE EXTRACTION
# -----------------------------------------------------
df["combined_features"] = (
    df["movie_title"].astype(str) + " " +
    df["director"].astype(str) + " " +
    df["cast"].astype(str) + " " +
    df["genres"].astype(str) + " " +
    df["certification"].astype(str) + " " +
    df["description"].astype(str)
)

# -----------------------------------------------------
# TRAIN-TEST SPLIT (80/20)
# -----------------------------------------------------
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Vectorizer fit on **training only** (real ML practice)
vectorizer = TfidfVectorizer(stop_words="english")
train_vectors = vectorizer.fit_transform(train_df["combined_features"])

# Transform test set
test_vectors = vectorizer.transform(test_df["combined_features"])

# -----------------------------------------------------
# FULL DATA VECTORS (FOR RECOMMENDATION)
# -----------------------------------------------------
full_vectors = vectorizer.transform(df["combined_features"])

# Map titles to index
title_to_index = {title: idx for idx, title in enumerate(df["movie_title"])}

# -----------------------------------------------------
# RECOMMENDER FUNCTION
# -----------------------------------------------------
def recommend_movies(input_movie_list, num_recommendations=10):
    """
    input_movie_list: list of 5 movie titles provided by user
    """
    valid_indices = []
    for movie in input_movie_list:
        if movie in title_to_index:
            valid_indices.append(title_to_index[movie])
        else:
            print(f"‚ö†Ô∏è Movie not found in dataset: {movie}")

    if len(valid_indices) == 0:
        return []

    # Calculate similarity of each input movie vs entire dataset
    similarities = np.zeros(full_vectors.shape[0])
    
    for idx in valid_indices:
        sim = cosine_similarity(full_vectors[idx], full_vectors).flatten()
        similarities += sim

    # Sort by similarity
    recommended_indices = similarities.argsort()[::-1]

    # Exclude the input movies from recommendation
    recommended_indices = [i for i in recommended_indices if i not in valid_indices]

    # Top N recommendations
    top_indices = recommended_indices[:num_recommendations]

    return df.iloc[top_indices][["movie_title", "genres", "imdb_rating"]]

# -----------------------------------------------------
# EXAMPLE RUN (YOU WILL INPUT YOUR 5 MOVIES)
# -----------------------------------------------------
input_movies = [
    "La La Land",
    "Deadpool",
    "Avengers: Infinity War",
    "Top Gun: Maverick",
    "Joker"
]

print("\nüî• Recommended Movies:")
print(recommend_movies(input_movies, 10))



üî• Recommended Movies:
                     movie_title                     genres  imdb_rating
1610        Deadpool & Wolverine     Action, Comedy, Sci-Fi          8.0
604            Avengers: Endgame  Action, Adventure, Sci-Fi          8.4
1     Captain America: Civil War             Action, Sci-Fi          7.8
1635         Joker: Folie √† Deux   Drama, Musical, Thriller          5.2
411                   Deadpool 2  Action, Adventure, Comedy          7.6
466                    First Man  Biography, Drama, History          7.3
1642     Furiosa: A Mad Max Saga  Action, Adventure, Sci-Fi          7.5
608               Captain Marvel  Action, Adventure, Sci-Fi          6.7
841         The War with Grandpa      Comedy, Drama, Family          5.7
381               Only the Brave   Action, Biography, Drama          7.6
