# Step 1: Install & Import Required Libraries

In [None]:

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD
from sklearn.model_selection import train_test_split
import ipywidgets as widgets
from IPython.display import display

# For reproducibility
np.random.seed(42)


# Step 2: Load Dataset (or generate synthetic if not available)

In [None]:

try:
    # Attempt to load MovieLens 100k dataset if available
    import zipfile, requests, io
    url = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
    r = requests.get(url)
    z = zipfile.ZipFile(io.BytesIO(r.content))
    ratings = pd.read_csv(z.open('ml-latest-small/ratings.csv'))
    movies = pd.read_csv(z.open('ml-latest-small/movies.csv'))
except Exception as e:
    # Generate synthetic dataset
    user_ids = np.arange(1, 51)
    movie_ids = np.arange(1, 101)
    ratings_data = []
    for u in user_ids:
        rated_movies = np.random.choice(movie_ids, size=20, replace=False)
        for m in rated_movies:
            ratings_data.append((u, m, np.random.randint(1, 6)))
    ratings = pd.DataFrame(ratings_data, columns=["userId", "movieId", "rating"])
    movies = pd.DataFrame({
        "movieId": movie_ids,
        "title": [f"Movie {i}" for i in movie_ids]
    })

ratings.head(), movies.head()


# Step 3: Create User–Item Matrix

In [None]:

user_item_matrix = ratings.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)
user_item_matrix.head()


# Step 4: Popularity Baseline

In [None]:

movie_mean_ratings = ratings.groupby('movieId')['rating'].mean()
top_movies = movie_mean_ratings.sort_values(ascending=False).head(10).index
movies[movies['movieId'].isin(top_movies)]


# Step 5: Item-based Collaborative Filtering (Cosine Similarity)

In [None]:

item_similarity = cosine_similarity(user_item_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)

def recommend_item_based(movie_id, top_n=5):
    similar_scores = item_similarity_df[movie_id].sort_values(ascending=False)[1:top_n+1]
    return movies[movies['movieId'].isin(similar_scores.index)]

recommend_item_based(movie_id=user_item_matrix.columns[0])


# Step 6: Matrix Factorization with TruncatedSVD

In [None]:

svd = TruncatedSVD(n_components=20)
latent_matrix = svd.fit_transform(user_item_matrix)
reconstructed = np.dot(latent_matrix, svd.components_)

reconstructed_df = pd.DataFrame(reconstructed, index=user_item_matrix.index, columns=user_item_matrix.columns)

def recommend_svd(user_id, top_n=5):
    user_ratings = reconstructed_df.loc[user_id]
    already_rated = ratings[ratings.userId == user_id]['movieId']
    recommendations = user_ratings.drop(already_rated).sort_values(ascending=False).head(top_n)
    return movies[movies['movieId'].isin(recommendations.index)]

recommend_svd(user_id=1)


# Step 7: Interactive Widget

In [None]:

def interactive_recommender(user_id):
    display(recommend_svd(user_id))

user_selector = widgets.IntSlider(min=1, max=user_item_matrix.index.max(), step=1, value=1)
widgets.interact(interactive_recommender, user_id=user_selector);
