# Item based Collaborative Filtering Example using dummy data


## 1. Data Generation


In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [6]:
user_item_rating = pd.DataFrame(
    {
        "user_id": [1, 1, 1, 2, 2, 3, 3, 4],
        "movie_id": [101, 102, 103, 101, 104, 102, 105, 103],
        "rating": [5, 4, 3, 4, 5, 2, 3, 4],
    }
)
user_item_rating


Unnamed: 0,user_id,movie_id,rating
0,1,101,5
1,1,102,4
2,1,103,3
3,2,101,4
4,2,104,5
5,3,102,2
6,3,105,3
7,4,103,4


## 2. Create User-Item Matrix


In [7]:
## 2. Create User-Item Matrix
user_item_matrix = user_item_rating.pivot_table(
    index="user_id", columns="movie_id", values="rating"
).fillna(0)
user_item_matrix

movie_id,101,102,103,104,105
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,5.0,4.0,3.0,0.0,0.0
2,4.0,0.0,0.0,5.0,0.0
3,0.0,2.0,0.0,0.0,3.0
4,0.0,0.0,4.0,0.0,0.0


## 3. Compute Item-Item Similarity Matrix


In [10]:
item_item_similarity = pd.DataFrame(
    cosine_similarity(user_item_matrix.T),
    index=user_item_matrix.columns,
    columns=user_item_matrix.columns,
)
item_item_similarity

movie_id,101,102,103,104,105
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
101,1.0,0.69843,0.468521,0.624695,0.0
102,0.69843,1.0,0.536656,0.0,0.447214
103,0.468521,0.536656,1.0,0.0,0.0
104,0.624695,0.0,0.0,1.0,0.0
105,0.0,0.447214,0.0,0.0,1.0


## 4. Generate Recommendations


In [14]:
def predict_rating(user_id, movie_id, user_item_matrix, item_similarity_matrix):
    """
    Predict rating for a user-movie pair using item-based collaborative filtering.

    Algorithm:
    1. Get all movies rated by user 'u'
    2. For candidate movie 'm', find similarity with all movies rated by 'u'
    3. Calculate weighted average: Σ(similarity(m,i) * rating(u,i)) / Σ(similarity(m,i))
    """
    # Step 1: Get all movies rated by the user
    user_ratings = user_item_matrix.loc[user_id]
    rated_movies = user_ratings[user_ratings > 0]

    # Step 2: Get similarity scores between candidate movie and all movies rated by user
    movie_similarities = item_similarity_matrix.loc[movie_id, rated_movies.index]

    # Step 3: Filter out movies with zero or negative similarity
    movie_similarities = movie_similarities[movie_similarities > 0]

    # If no similar movies found, return 0
    if len(movie_similarities) == 0:
        return 0

    # Step 4: Calculate weighted average
    weighted_sum = sum(movie_similarities * rated_movies[movie_similarities.index])
    similarity_sum = sum(movie_similarities)
    predicted_rating = weighted_sum / similarity_sum

    return predicted_rating

In [15]:
# Test: Predict rating for User 1, Movie 104
user_id = 1
movie_id = 104

predicted = predict_rating(user_id, movie_id, user_item_matrix, item_item_similarity)
print(f"Predicted rating for User {user_id} and Movie {movie_id}: {predicted:.3f}")

# Show the calculation breakdown
print(
    f"\nUser {user_id} rated movies: {user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] > 0].to_dict()}"
)
print(f"\nSimilarity breakdown for Movie {movie_id}:")
user_rated = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] > 0]
for rated_movie in user_rated.index:
    sim = item_item_similarity.loc[movie_id, rated_movie]
    rating = user_rated[rated_movie]
    print(
        f"  Movie {rated_movie}: sim={sim:.3f}, rating={rating}, contribution={sim * rating:.3f}"
    )

Predicted rating for User 1 and Movie 104: 5.000

User 1 rated movies: {101: 5.0, 102: 4.0, 103: 3.0}

Similarity breakdown for Movie 104:
  Movie 101: sim=0.625, rating=5.0, contribution=3.123
  Movie 102: sim=0.000, rating=4.0, contribution=0.000
  Movie 103: sim=0.000, rating=3.0, contribution=0.000


In [16]:
def recommend_movies(user_id, user_item_matrix, item_similarity_matrix, top_n=5):
    """
    Recommend top N movies for a user.
    """
    all_movies = user_item_matrix.columns
    user_ratings = user_item_matrix.loc[user_id]
    rated_movies = user_ratings[user_ratings > 0].index
    candidate_movies = [m for m in all_movies if m not in rated_movies]

    predictions = []
    for movie_id in candidate_movies:
        predicted_rating = predict_rating(
            user_id, movie_id, user_item_matrix, item_similarity_matrix
        )
        predictions.append({"movie_id": movie_id, "predicted_rating": predicted_rating})

    recommendations = pd.DataFrame(predictions)
    recommendations = recommendations.sort_values(
        "predicted_rating", ascending=False
    ).head(top_n)
    return recommendations.reset_index(drop=True)


# Get recommendations for User 1
print("Top recommendations for User 1:")
print(recommend_movies(1, user_item_matrix, item_item_similarity, top_n=3))

print("\n" + "=" * 50)
print("Recommendations for all users:\n")
for user in user_item_matrix.index:
    print(f"User {user}:")
    recs = recommend_movies(user, user_item_matrix, item_item_similarity, top_n=2)
    if len(recs) > 0:
        for idx, row in recs.iterrows():
            print(
                f"  Movie {row['movie_id']}: predicted rating = {row['predicted_rating']:.3f}"
            )
    else:
        print("  No recommendations available")
    print()

Top recommendations for User 1:
   movie_id  predicted_rating
0       104               5.0
1       105               4.0

Recommendations for all users:

User 1:
  Movie 104.0: predicted rating = 5.000
  Movie 105.0: predicted rating = 4.000

User 2:
  Movie 102.0: predicted rating = 4.000
  Movie 103.0: predicted rating = 4.000

User 3:
  Movie 101.0: predicted rating = 2.000
  Movie 103.0: predicted rating = 2.000

User 4:
  Movie 101.0: predicted rating = 4.000
  Movie 102.0: predicted rating = 4.000

