In [11]:
import pandas as pd
import numpy as np
import ast
from sklearn.preprocessing import MultiLabelBinarizer, MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import hstack

class HybridRecommender:
    def __init__(self, path):
        self.df = pd.read_csv(path)
        self._prepare_data()
        self._build_model()

    def _prepare_data(self):
        print(type(self.df['genres'][0]))
        # Convert genre strings back to list
        self.df['genres'] = self.df['genres'].apply(ast.literal_eval)

        # TF-IDF on overview
        self.df['overview'] = self.df['overview'].fillna("")
        self.tfidf = TfidfVectorizer(max_features=5000)
        self.overview_vec = self.tfidf.fit_transform(self.df['overview'])

        # Genre one-hot
        mlb = MultiLabelBinarizer()
        self.genre_vec = mlb.fit_transform(self.df['genres'])

        # Numeric features
        numeric_cols = ['popularity', 'vote_average', 'runtime']
        for col in numeric_cols:
            self.df[col] = pd.to_numeric(self.df[col], errors='coerce')
        self.df[numeric_cols] = self.df[numeric_cols].fillna(0)

        self.scaler = MinMaxScaler()
        self.numeric_vec = self.scaler.fit_transform(self.df[numeric_cols])

        # Combine all features
        self.final_features = hstack([self.overview_vec, self.genre_vec, self.numeric_vec])

    def _build_model(self):
        self.nn = NearestNeighbors(metric='cosine', algorithm='brute')
        self.nn.fit(self.final_features)

    def recommend(self, title, n=5):
        title = title.lower()
        matches = self.df[self.df['title'].str.lower() == title]
        if matches.empty:
            return ["Movie not found."]
        idx = matches.index[0]
        vec = self.final_features.getrow(idx)
        distances, indices = self.nn.kneighbors(vec, n_neighbors=n+1)
        return self.df.iloc[indices[0][1:]]['title'].tolist()


In [12]:
recommender = HybridRecommender('updated_movie_metadeta.csv')

<class 'str'>


In [14]:
movie_name = "Interstellar"
recommendations = recommender.recommend(movie_name, n=5)

print("Recommended Movies:")
for movie in recommendations:
    print("-", movie)

Recommended Movies:
- A.I. Artificial Intelligence
- Silent Running
- Hard to Be a God
- Crumbs
- The Martian
