# Training

In [None]:
%pip install numpy==1.26.4
%pip install scikit-surprise

hello


In [None]:
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split
import pickle

class PureSVDRecommender:
    def __init__(self, rating_scale=(0.5, 5.0), random_state=30):
        self.reader = Reader(rating_scale=rating_scale)
        self.random_state = random_state
        self.model = None
        self.ratings_df = None
        self.trainset = None
        self.metrics_ = {}

    def fit(self, ratings_df, test_size=0.2):
        """
        Train SVD and compute RMSE & MAE.
        """

        # Store ratings
        self.ratings_df = ratings_df.copy()

        # Convert to Surprise format
        data = Dataset.load_from_df(
            ratings_df[["userId", "movieId", "rating"]],
            self.reader
        )

        # Train-test split
        trainset, testset = train_test_split(
            data, test_size=test_size, random_state=42
        )

        # Train SVD
        self.model = SVD(
        n_factors=200,
        n_epochs=200,
        lr_all=0.005,
        reg_all=0.05,   # stronger regularization
        random_state=self.random_state
        )
        self.model.fit(trainset)

        # Save trainset for latent-space lookups
        self.trainset = trainset

        # Evaluate
        predictions = self.model.test(testset)

        rmse = accuracy.rmse(predictions, verbose=False)
        mae = accuracy.mae(predictions, verbose=False)

        self.metrics_["rmse"] = rmse
        self.metrics_["mae"] = mae

        return self

    def save(self, filepath):
        with open(filepath, "wb") as f:
            pickle.dump(self, f)

    @staticmethod
    def load(filepath):
        with open(filepath, "rb") as f:
            return pickle.load(f)

    def get_metrics(self):
        return self.metrics_

    def _get_watched_movies(self, user_id):
        return set(
            self.ratings_df[self.ratings_df["userId"] == user_id]["movieId"]
        )

    def _get_liked_movies(self, user_id, min_rating=4.0):
        return set(
            self.ratings_df[
                (self.ratings_df["userId"] == user_id) &
                (self.ratings_df["rating"] >= min_rating)
            ]["movieId"]
        )

    # ============================
    # NEW: FIND SIMILAR LIKED MOVIES IN LATENT SPACE
    # ============================

    def _get_similar_liked_movies(self, user_id, rec_movie_id, movies_df, top_k=2):
        """
        Find movies the user liked that are closest to the recommended movie
        in SVD latent space (cosine similarity).
        """

        liked_movies = self._get_liked_movies(user_id)

        # Map raw movieId → Surprise inner id
        try:
            rec_inner_id = self.trainset.to_inner_iid(rec_movie_id)
        except ValueError:
            return []

        rec_vec = self.model.qi[rec_inner_id]

        similarities = []

        for m in liked_movies:
            try:
                m_inner = self.trainset.to_inner_iid(m)
            except ValueError:
                continue

            m_vec = self.model.qi[m_inner]

            # Cosine similarity
            sim = np.dot(rec_vec, m_vec) / (
                np.linalg.norm(rec_vec) * np.linalg.norm(m_vec) + 1e-9
            )

            similarities.append((m, sim))

        # Take top-k most similar liked movies
        similarities = sorted(similarities, key=lambda x: x[1], reverse=True)[:top_k]

        # Convert movieIds → titles
        similar_titles = []
        for m, _ in similarities:
            title = movies_df.loc[
                movies_df["movieId"] == m, "title"
            ].values
            if len(title) > 0:
                similar_titles.append(title[0])

        return similar_titles

    # ============================
    # UPDATED RECOMMEND METHOD (DYNAMIC EXPLANATION)
    # ============================

    def recommend(self, user_id, movies_df, n=10):
        """
        Return Top-N recommendations based ONLY on SVD predicted rating,
        with dynamic explanations based on latent similarity.
        """

        watched = self._get_watched_movies(user_id)

        # Candidate movies = all movies NOT watched
        candidates = list(set(movies_df["movieId"]) - watched)

        # Predict ratings
        preds = [
            (m, self.model.predict(user_id, m).est)
            for m in candidates
        ]

        # Rank by predicted rating
        top_n = sorted(preds, key=lambda x: x[1], reverse=True)[:n]

        results = []

        for movie_id, score in top_n:
            title = movies_df.loc[
                movies_df["movieId"] == movie_id, "title"
            ].values[0]

            similar_liked = self._get_similar_liked_movies(
                user_id, movie_id, movies_df, top_k=2
            )

            if similar_liked:
                explanation = (
                    f"Recommended because it is similar to movies you liked such as "
                    f"{' and '.join(similar_liked)}."
                )
            else:
                explanation = (
                    "Recommended based on patterns in your past movie ratings."
                )

            results.append((movie_id, title, round(score, 3), explanation))

        return results
# Load data
ratings_file = pd.read_csv("/content/ratings.csv")
movies_file = pd.read_csv("/content/movies.csv")

# Train + evaluate
recommender = PureSVDRecommender()
recommender.fit(ratings_file)

# Save model
recommender.save("/content/pure_svd_recommender.pkl")
print("Model saved to pure_svd_recommender.pkl")

# Print accuracy
print("Model Accuracy:")
print(recommender.get_metrics())

# Get recommendations
user_id = 1

recommendations = recommender.recommend(
    user_id=user_id,
    movies_df=movies_file,
    n=10
)

for i, (mid, title, score, reason) in enumerate(recommendations, start=1):
    print(f"{i}. {title} (score: {score})")
    print("-", reason)
    print()
