# Recommender System Lab (New Movies Dataset)

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import normalize

In [None]:
df = pd.read_csv('movies.csv')
df.head()

In [None]:
user_item = df.pivot_table(index='user_id', columns='movie_title', values='rating')
user_item

In [None]:
filled = user_item.fillna(0).to_numpy()
sim = cosine_similarity(filled)
sim.shape

In [None]:
# Example: recommend for user 1
user_index = list(user_item.index).index(1)
sims = sim[user_index]
sims[user_index] = -np.inf

ratings = user_item.to_numpy()
ratings_filled = np.nan_to_num(ratings, nan=0)

w = sims.reshape(-1,1)
preds = (w*ratings_filled).sum(axis=0) / (np.abs(w).sum(axis=0)+1e-8)

already_rated = ~np.isnan(ratings[user_index])
preds[already_rated] = -np.inf

movie_titles = list(user_item.columns)
top_idx = np.argsort(preds)[-3:][::-1]
[(movie_titles[i], preds[i]) for i in top_idx if preds[i] != -np.inf]

In [None]:
if 'genre' in df.columns:
    df['genre_text'] = df['genre'].astype(str).str.replace('|',' ')
    tfidf = TfidfVectorizer()
    item_tfidf = tfidf.fit_transform(df['genre_text'])
    rows_u = df[df['user_id']==1]
    thr = rows_u['rating'].mean()
    liked = rows_u[rows_u['rating']>=thr]
    if liked.empty:
        liked = rows_u
    user_profile = item_tfidf[liked.index.tolist()].mean(axis=0)
    U = normalize(user_profile)
    I = normalize(item_tfidf)
    sims = (I@U.T).toarray().ravel()
    rated_titles = set(rows_u['movie_title'])
    candidates = {title: s for title,s in zip(df['movie_title'], sims) if title not in rated_titles}
    sorted(candidates.items(), key=lambda x: x[1], reverse=True)[:3]