In [11]:
from google.colab import files
uploaded = files.upload()

Saving archive (2).zip to archive (2).zip


In [12]:
import zipfile
import io

with zipfile.ZipFile(io.BytesIO(uploaded['archive (2).zip']), 'r') as zip_ref:
    zip_ref.extractall()


In [13]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD

# Load Dataset
ratings = pd.read_csv("ml-100k/u.data", sep="\t", names=["user_id", "item_id", "rating", "timestamp"])
movies = pd.read_csv("ml-100k/u.item", sep="|", encoding="latin-1", header=None, usecols=[0, 1], names=["item_id", "title"])
data = pd.merge(ratings, movies, on="item_id")

# Create User-Item Matrix
user_item_matrix = data.pivot_table(index="user_id", columns="title", values="rating")
user_item_matrix_filled = user_item_matrix.fillna(0)

# Compute User Similarity
user_similarity = cosine_similarity(user_item_matrix_filled)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

# Recommend Movies Function
def recommend_movies(user_id, k=5, top_n_similar_users=10):
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).drop(user_id).head(top_n_similar_users)
    weighted_ratings = pd.Series(dtype=np.float64)
    for sim_user, sim_score in similar_users.items():
        sim_user_ratings = user_item_matrix.loc[sim_user]
        weighted_ratings = weighted_ratings.add(sim_user_ratings * sim_score, fill_value=0)
    user_rated_movies = user_item_matrix.loc[user_id].dropna().index
    recommendations = weighted_ratings.drop(user_rated_movies).sort_values(ascending=False).head(k)
    return recommendations

# Precision@K
def precision_at_k(user_id, k=5):
    recommended = recommend_movies(user_id, k).index
    relevant_movies = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] >= 4].dropna().index
    hits = len(set(recommended).intersection(set(relevant_movies)))
    return hits / k

# Test
print(" Recommendations for User 5:")
print(recommend_movies(user_id=5, k=5))

print("\n Precision@5 for User 5:", precision_at_k(5))

# Bonus: Item-Based CF
item_user_matrix = user_item_matrix.T.fillna(0)
item_similarity = cosine_similarity(item_user_matrix)
item_similarity_df = pd.DataFrame(item_similarity, index=item_user_matrix.index, columns=item_user_matrix.index)

# Bonus: SVD
svd = TruncatedSVD(n_components=20)
svd_matrix = svd.fit_transform(user_item_matrix_filled)
reconstructed_matrix = svd.inverse_transform(svd_matrix)
reconstructed_df = pd.DataFrame(reconstructed_matrix, index=user_item_matrix.index, columns=user_item_matrix.columns)

# SVD Recommendations
print("\n Top 5 Predicted Movies for User 5 (SVD):")
predicted_ratings = reconstructed_df.loc[5].sort_values(ascending=False)
already_seen = user_item_matrix.loc[5].dropna().index
top_unseen = predicted_ratings.drop(index=already_seen).head(5)
print(top_unseen)


 Recommendations for User 5:
title
Terminator, The (1984)               19.797364
Terminator 2: Judgment Day (1991)    18.346292
Army of Darkness (1993)              17.442044
Jurassic Park (1993)                 15.253635
Real Genius (1985)                   14.818266
dtype: float64

 Precision@5 for User 5: 0.0

 Top 5 Predicted Movies for User 5 (SVD):
title
Groundhog Day (1993)                      3.037099
Brazil (1985)                             3.003444
Twelve Monkeys (1995)                     2.854668
Nightmare Before Christmas, The (1993)    2.674003
Jurassic Park (1993)                      2.541868
Name: 5, dtype: float64
