In [41]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from scipy.sparse.linalg import svds
import zipfile

with zipfile.ZipFile('ml-100k.zip', 'r') as zip_ref:
    zip_ref.extractall('/content')

ratings_path = '/content/ml-100k/u.data'
ratings = pd.read_csv(ratings_path, sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])

movies_path = '/content/ml-100k/u.item'
movies = pd.read_csv(movies_path, sep='\|', encoding='latin-1', header=None, usecols=[0,1], names=['movie_id', 'title'])

ratings = ratings.merge(movies, on='movie_id')


  movies = pd.read_csv(movies_path, sep='\|', encoding='latin-1', header=None, usecols=[0,1], names=['movie_id', 'title'])
  movies = pd.read_csv(movies_path, sep='\|', encoding='latin-1', header=None, usecols=[0,1], names=['movie_id', 'title'])


In [42]:

train_data = []
test_data = []

for user_id, group in ratings.groupby('user_id'):
    train, test = train_test_split(group, test_size=0.2, random_state=42)
    train_data.append(train)
    test_data.append(test)

train_ratings = pd.concat(train_data)
test_ratings = pd.concat(test_data)


In [43]:

train_matrix = train_ratings.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)
test_matrix = test_ratings.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)

user_similarity = cosine_similarity(train_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=train_matrix.index, columns=train_matrix.index)

def recommend_movies(user_id, top_n=10):
    if user_id not in user_similarity_df.index:
        return pd.DataFrame(columns=['movie_id','title','predicted_rating'])

    sim_scores = user_similarity_df[user_id]
    weighted_ratings = train_matrix.T.dot(sim_scores) / (sim_scores.sum() + 1e-9)

    rated_movies = train_matrix.loc[user_id][train_matrix.loc[user_id] > 0].index
    recommendations = weighted_ratings.drop(rated_movies, errors='ignore').sort_values(ascending=False).head(top_n)

    recommended_movies = movies[movies['movie_id'].isin(recommendations.index)].copy()
    recommended_movies['predicted_rating'] = recommendations.values
    return recommended_movies.sort_values('predicted_rating', ascending=False)

In [44]:

item_similarity = cosine_similarity(train_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity, index=train_matrix.columns, columns=train_matrix.columns)

def recommend_movies_item_based(user_id, top_n=10):
    if user_id not in train_matrix.index:
        return pd.DataFrame(columns=['movie_id','title','predicted_rating'])

    user_ratings = train_matrix.loc[user_id]
    scores = item_similarity_df.dot(user_ratings) / (item_similarity_df.sum(axis=1) + 1e-9)
    scores = scores[user_ratings == 0]

    recommendations = movies[movies['movie_id'].isin(scores.sort_values(ascending=False).head(top_n).index)].copy()
    recommendations['predicted_rating'] = scores[recommendations['movie_id']].values
    return recommendations.sort_values('predicted_rating', ascending=False)


In [45]:

def svd_predictions(k=20):
    R = train_matrix.values
    user_ratings_mean = np.mean(R, axis=1)
    R_demeaned = R - user_ratings_mean.reshape(-1, 1)

    U, sigma, Vt = svds(R_demeaned, k=k)
    Sigma = np.diag(sigma)
    all_user_predicted_ratings = np.dot(np.dot(U, Sigma), Vt) + user_ratings_mean.reshape(-1, 1)
    return pd.DataFrame(all_user_predicted_ratings, columns=train_matrix.columns)

predictions_df = svd_predictions(k=20)

def recommend_movies_svd(user_id, top_n=10):
    if user_id not in train_matrix.index:
        return pd.DataFrame(columns=['movie_id','title','predicted_rating'])

    user_row_number = user_id - 1
    sorted_user_predictions = predictions_df.iloc[user_row_number].sort_values(ascending=False)
    rated_movies = train_matrix.loc[user_id][train_matrix.loc[user_id] > 0].index
    recommendations = sorted_user_predictions.drop(rated_movies, errors='ignore').head(top_n)

    recommended_movies = movies[movies['movie_id'].isin(recommendations.index)].copy()
    recommended_movies['predicted_rating'] = recommendations.values
    return recommended_movies.sort_values('predicted_rating', ascending=False)


In [46]:
def precision_recall_f1_at_k(recommend_func, user_id, k=5):
    recommended = recommend_func(user_id, top_n=k)['movie_id'].values
    if user_id not in test_matrix.index:
        return np.nan, np.nan, np.nan
    actual = test_matrix.loc[user_id]
    actual = actual[actual > 0].index.values
    if len(actual) == 0:
        return np.nan, np.nan, np.nan

    precision = len(set(recommended) & set(actual)) / k
    recall = len(set(recommended) & set(actual)) / len(actual)
    f1 = 2 * (precision * recall) / (precision + recall + 1e-9)
    return precision, recall, f1

In [48]:

methods = {
    'User-based CF': recommend_movies,
    'Item-based CF': recommend_movies_item_based,
    'SVD': recommend_movies_svd
}

evaluation_results = {}
for name, func in methods.items():
    precisions, recalls, f1s = [], [], []
    for user_id in test_matrix.index:
        p, r, f1 = precision_recall_f1_at_k(func, user_id, k=5)
        if not np.isnan(p):
            precisions.append(p)
            recalls.append(r)
            f1s.append(f1)
    evaluation_results[name] = {
        'Precision': np.mean(precisions),
        'Recall': np.mean(recalls),
        'F1': np.mean(f1s)
    }

for name, metrics in evaluation_results.items():
    print(f"{name} - Precision: {metrics['Precision']:.4f}, Recall: {metrics['Recall']:.4f}, F1: {metrics['F1']:.4f}")




User-based CF - Precision: 0.3266, Recall: 0.1082, F1: 0.1417
Item-based CF - Precision: 0.0036, Recall: 0.0005, F1: 0.0009
SVD - Precision: 0.4365, Recall: 0.1464, F1: 0.1928


In [50]:
top_n = 10
user_ids = test_matrix.index[:1]

for user_id in user_ids:
    print(f"\nUser {user_id} Top-{top_n} Recommendations:")
    for method_name, func in methods.items():
        recs = func(user_id, top_n=top_n)[['title','predicted_rating']]
        print(f"\n{method_name}:\n", recs.reset_index(drop=True))


User 1 Top-10 Recommendations:

User-based CF:
                                     title  predicted_rating
0                        Apollo 13 (1995)          1.664144
1                        Rock, The (1996)          1.619190
2                  Terminator, The (1984)          1.555980
3          When Harry Met Sally... (1989)          1.533272
4                          Contact (1997)          1.479059
5             English Patient, The (1996)          1.474407
6                 Schindler's List (1993)          1.411240
7  One Flew Over the Cuckoo's Nest (1975)          1.377354
8       E.T. the Extra-Terrestrial (1982)          1.367056
9                       Casablanca (1942)          1.342433

Item-based CF:
                                                title  predicted_rating
0                          Santa with Muscles (1996)          1.725132
1  Ballad of Narayama, The (Narayama Bushiko) (1958)          1.307893
2                        Perfect Candidate, A (1996)         