In [4]:
#  Required libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD

# Load the MovieLens 100K dataset
ratings_cols = ['user_id', 'item_id', 'rating', 'timestamp']
ratings = pd.read_csv('u.data', sep='\t', names=ratings_cols, encoding='latin-1')

movies_cols = ['item_id', 'title'] + list(range(22))
movies = pd.read_csv('u.item', sep='|', names=movies_cols, usecols=[0, 1], encoding='latin-1')

#  Merge ratings with movie titles
df = pd.merge(ratings, movies, on='item_id')

#  Train-test split
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

#  Create User-Item Matrix
user_item_matrix = train_data.pivot_table(index='user_id', columns='title', values='rating')
user_item_matrix_filled = user_item_matrix.fillna(0)

#  Compute User Similarity
user_similarity = cosine_similarity(user_item_matrix_filled)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

#  Get top-k similar users
def get_top_similar_users(user_id, k=5):
    if user_id not in user_similarity_df.index:
        return pd.Series(dtype=float)
    sim_scores = user_similarity_df[user_id].drop(index=user_id, errors='ignore')
    return sim_scores.sort_values(ascending=False).head(k)

#  Recommend top-N movies
def recommend_movies(user_id, k=5, n=5):
    if user_id not in user_item_matrix.index:
        return []
    top_users = get_top_similar_users(user_id, k)
    user_ratings = user_item_matrix.loc[user_id]
    unseen_movies = user_ratings[user_ratings.isna()].index
    movie_scores = {}
    for movie in unseen_movies:
        total_score, sim_sum = 0, 0
        for sim_user, sim in top_users.items():
            rating = user_item_matrix.loc[sim_user, movie]
            if not np.isnan(rating):
                total_score += rating * sim
                sim_sum += sim
        if sim_sum > 0:
            movie_scores[movie] = total_score / sim_sum
    return sorted(movie_scores, key=movie_scores.get, reverse=True)[:n]

#  Precision@K Evaluation
def precision_at_k(test_data, k=5):
    hits, total = 0, 0
    for user_id in test_data['user_id'].unique():
        if user_id not in user_item_matrix.index:
            continue
        actual = test_data[test_data['user_id'] == user_id]['title'].tolist()
        recommended = recommend_movies(user_id, k=k, n=k)
        hits += len(set(actual) & set(recommended))
        total += k
    return hits / total if total > 0 else 0

# Matrix Factorization (SVD-based)
def svd_recommend_movies(user_id, n=5):
    matrix = user_item_matrix.fillna(0)
    svd = TruncatedSVD(n_components=20)
    user_features = svd.fit_transform(matrix)
    movie_features = svd.components_
    pred_ratings = np.dot(user_features, movie_features)
    pred_df = pd.DataFrame(pred_ratings, index=matrix.index, columns=matrix.columns)
    seen = matrix.loc[user_id][matrix.loc[user_id] > 0].index
    recommendations = pred_df.loc[user_id].drop(seen).sort_values(ascending=False).head(n)
    return list(recommendations.index)

#  Print Output
print(" Precision@5:", precision_at_k(test_data, k=5))
print(" User-Based Recommendations for User 1:", recommend_movies(user_id=1, k=5, n=5))
print(" SVD Recommendations for User 1:", svd_recommend_movies(user_id=1, n=5))


 Precision@5: 0.07382978723404256
 User-Based Recommendations for User 1: ['Titanic (1997)', 'Alien: Resurrection (1997)', 'Angels and Insects (1995)', 'Bitter Moon (1992)', 'Casablanca (1942)']
 SVD Recommendations for User 1: ['Fish Called Wanda, A (1988)', 'Trainspotting (1996)', 'Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)', 'Heathers (1989)', 'Reservoir Dogs (1992)']
