In [5]:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import mean_squared_error
from math import sqrt

# 1. Load data
ratings = pd.read_csv("ratings.csv")  l
movies = pd.read_csv("movies.csv")

# 2. Filter popular items and active users (threshold >20 ratings)
active_users = ratings['userId'].value_counts()[lambda x: x > 20].index
popular_movies = ratings['movieId'].value_counts()[lambda x: x > 20].index
ratings = ratings[ratings['userId'].isin(active_users) & ratings['movieId'].isin(popular_movies)]

# 3. Pivot to user–item matrix
mat = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# 4. Apply Truncated SVD
svd = TruncatedSVD(n_components=20, random_state=42)
U = svd.fit_transform(mat)
Vt = svd.components_
predicted = np.dot(U, Vt)
pred_df = pd.DataFrame(predicted, index=mat.index, columns=mat.columns)

# 5. Recommend top-5 movies for a sample user
def recommend(user_id, top_n=5):
    user_ratings = mat.loc[user_id]
    user_preds = pred_df.loc[user_id]
    unseen = user_preds[user_ratings == 0]
    top = unseen.sort_values(ascending=False).head(top_n)
    # Map movieId to title
    titles = movies.set_index('movieId').loc[top.index]['title']
    return pd.DataFrame({'title': titles, 'predicted_rating': top.values})

sample_user = mat.index[0]
recs = recommend(sample_user)
print(f"\n🎯 Recommendations for User {sample_user}:\n", recs.to_string(index=False))

# 6. Compute RMSE
mask = mat.values != 0
rmse = sqrt(mean_squared_error(mat.values[mask], pred_df.values[mask]))
print(f"\n📈 RMSE: {rmse:.4f}")



🎯 Recommendations for User 1:
                             title  predicted_rating
                  Die Hard (1988)          4.186452
   Godfather: Part II, The (1974)          3.954716
            Godfather, The (1972)          3.830065
Terminator 2: Judgment Day (1991)          3.623493
                    Aliens (1986)          3.008915

📈 RMSE: 2.1535
