In [2]:
# =============================
# Task 5: Movie Recommendation System
# =============================

from google.colab import files
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# =============================
# STEP 1: Upload Dataset
# =============================
print("Upload the 'u.data' file from the MovieLens 100K dataset (tab-separated)")
uploaded = files.upload()

# Load the dataset (tab-separated)
filename = list(uploaded.keys())[0]
df = pd.read_csv(filename, sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])
print("Dataset loaded successfully.")
print(df.head())

# =============================
# STEP 2: Create User-Item Matrix
# =============================
user_item_matrix = df.pivot_table(index='user_id', columns='movie_id', values='rating')
print("\nUser-Item Matrix shape:", user_item_matrix.shape)

# =============================
# STEP 3: Compute User Similarity
# =============================
user_similarity = pd.DataFrame(
    cosine_similarity(user_item_matrix.fillna(0)),
    index=user_item_matrix.index,
    columns=user_item_matrix.index
)
print("\nUser Similarity Matrix created.")

# =============================
# STEP 4: Function to Recommend Movies
# =============================
def recommend_movies(target_user, n_recommendations=5):
    if target_user not in user_item_matrix.index:
        raise ValueError("User ID not found in dataset.")

    similar_users = user_similarity[target_user].sort_values(ascending=False)[1:6]  # top 5 similar users
    similar_users_ratings = user_item_matrix.loc[similar_users.index]

    weighted_ratings = similar_users_ratings.T.dot(similar_users) / similar_users.sum()
    user_seen_movies = user_item_matrix.loc[target_user][user_item_matrix.loc[target_user].notna()].index

    recommendations = weighted_ratings.drop(user_seen_movies, errors='ignore').sort_values(ascending=False)
    return recommendations.head(n_recommendations)

# Example: Recommend for user_id = 10
target_user = 10
recommended_movies = recommend_movies(target_user, n_recommendations=5)
print(f"\nTop 5 movie recommendations for User {target_user}:")
print(recommended_movies)

# =============================
# STEP 5: Evaluate Using Precision@K
# =============================
def precision_at_k(target_user, k=5, threshold=3.5):
    if target_user not in user_item_matrix.index:
        raise ValueError("User ID not found in dataset.")

    recommendations = recommend_movies(target_user, k)
    actual_ratings = user_item_matrix.loc[target_user].dropna()
    relevant_items = actual_ratings[actual_ratings >= threshold].index

    if len(recommendations) == 0:
        return 0.0

    recommended_relevant = [item for item in recommendations.index if item in relevant_items]
    precision = len(recommended_relevant) / len(recommendations)
    return precision

precision_score = precision_at_k(target_user=10, k=5)
print(f"\nPrecision@5 for User {target_user}: {precision_score:.2f}")

# =============================
# STEP 6: Optional - Item-Based Collaborative Filtering
# =============================
item_similarity = pd.DataFrame(
    cosine_similarity(user_item_matrix.fillna(0).T),
    index=user_item_matrix.columns,
    columns=user_item_matrix.columns
)
print("\nItem-based similarity matrix created successfully.")


Upload the 'u.data' file from the MovieLens 100K dataset (tab-separated)


Saving u.data to u (1).data
Dataset loaded successfully.
   user_id  movie_id  rating  timestamp
0      196       242       3  881250949
1      186       302       3  891717742
2       22       377       1  878887116
3      244        51       2  880606923
4      166       346       1  886397596

User-Item Matrix shape: (943, 1682)

User Similarity Matrix created.

Top 5 movie recommendations for User 10:
movie_id
490    4.407075
187    4.402098
523    4.211164
515    4.205871
318    4.204351
dtype: float64

Precision@5 for User 10: 0.00

Item-based similarity matrix created successfully.
