In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import precision_score
from sklearn.model_selection import train_test_split
from scipy.sparse.linalg import svds


In [3]:
ratings = pd.read_csv('u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])
ratings.drop('timestamp', axis=1, inplace=True)


In [4]:
user_item_matrix = ratings.pivot(index='user_id', columns='movie_id', values='rating')


In [5]:
user_similarity = cosine_similarity(user_item_matrix.fillna(0))
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)


In [6]:
def recommend_movies(user_id, k=5):
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)[1:]
    weighted_ratings = pd.Series(dtype=float)

    for sim_user, similarity in similar_users.items():
        user_ratings = user_item_matrix.loc[sim_user].dropna()
        for movie, rating in user_ratings.items():
            if pd.isna(user_item_matrix.loc[user_id, movie]):
                weighted_ratings[movie] = weighted_ratings.get(movie, 0) + rating * similarity

    recommended_movies = weighted_ratings.sort_values(ascending=False).head(k)
    return recommended_movies


In [7]:
def precision_at_k(user_id, k=5, test_data=None):
    recommended = recommend_movies(user_id, k).index
    actual = test_data[test_data.user_id == user_id].movie_id.values
    relevant = set(recommended).intersection(set(actual))
    return len(relevant) / k


In [8]:
item_similarity = cosine_similarity(user_item_matrix.T.fillna(0))
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)

def item_based_recommend(user_id, k=5):
    user_ratings = user_item_matrix.loc[user_id]
    scores = pd.Series(dtype=float)

    for movie, rating in user_ratings.dropna().items():
        similar_scores = item_similarity_df[movie] * rating
        scores = scores.add(similar_scores, fill_value=0)

    scores = scores.drop(user_ratings.dropna().index)
    return scores.sort_values(ascending=False).head(k)


In [9]:
R = user_item_matrix.fillna(0).values
user_ratings_mean = np.mean(R, axis=1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)

U, sigma, Vt = svds(R_demeaned, k=20)
sigma = np.diag(sigma)

predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
predicted_df = pd.DataFrame(predicted_ratings, index=user_item_matrix.index, columns=user_item_matrix.columns)

def svd_recommend(user_id, k=5):
    user_row = predicted_df.loc[user_id]
    known_movies = user_item_matrix.loc[user_id].dropna().index
    recommendations = user_row.drop(index=known_movies).sort_values(ascending=False).head(k)
    return recommendations


In [10]:
recommended = recommend_movies(user_id=10, k=5)
print("Top 5 User-Based Recommended Movies for User 10:")
print(recommended)


Top 5 User-Based Recommended Movies for User 10:
181    534.991466
172    485.604806
204    435.395734
318    434.517064
79     428.884302
dtype: float64


In [11]:
item_rec = item_based_recommend(user_id=10, k=5)
print("Top 5 Item-Based Recommended Movies for User 10:")
print(item_rec)


Top 5 Item-Based Recommended Movies for User 10:
movie_id
204    327.715430
423    318.155628
172    317.498715
89     312.632345
79     311.699217
dtype: float64


In [12]:
svd_rec = svd_recommend(user_id=10, k=5)
print("Top 5 SVD Recommended Movies for User 10:")
print(svd_rec)


Top 5 SVD Recommended Movies for User 10:
movie_id
514    3.975967
187    3.841146
318    3.830776
427    3.821485
8      3.339764
Name: 10, dtype: float64


In [14]:
movies = pd.read_csv('u.item', sep='|', encoding='latin-1', header=None, usecols=[0, 1], names=['movie_id', 'title'])

# Merge to get titles for user-based recommendations
recommended_df = recommended.reset_index().rename(columns={'index': 'movie_id'})
recommended_with_titles = pd.merge(recommended_df, movies, on='movie_id')
print(recommended_with_titles[['title', 0]])


                             title           0
0        Return of the Jedi (1983)  534.991466
1  Empire Strikes Back, The (1980)  485.604806
2        Back to the Future (1985)  435.395734
3          Schindler's List (1993)  434.517064
4             Fugitive, The (1993)  428.884302
