In [33]:
#collaborative filtering to suggest items to users based on their preferences

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [34]:
# Sample dataset

data = {
    'user_id': [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
    'item_id': [101, 102, 103, 101, 102, 104, 102, 103, 104, 101, 103, 104],
    'rating': [5, 3, 4, 4, 5, 2, 4, 3, 5, 3, 4, 4]
}

df = pd.DataFrame(data)
print(df)

    user_id  item_id  rating
0         1      101       5
1         1      102       3
2         1      103       4
3         2      101       4
4         2      102       5
5         2      104       2
6         3      102       4
7         3      103       3
8         3      104       5
9         4      101       3
10        4      103       4
11        4      104       4


In [35]:
# Create user-item matrix

user_item_matrix = df.pivot_table(index='user_id', columns='item_id', values='rating').fillna(0)
print(user_item_matrix)

item_id  101  102  103  104
user_id                    
1        5.0  3.0  4.0  0.0
2        4.0  5.0  0.0  2.0
3        0.0  4.0  3.0  5.0
4        3.0  0.0  4.0  4.0


In [36]:
# Calculate cosine similarity between users

user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)
print(user_similarity_df)

user_id         1         2         3         4
user_id                                        
1        1.000000  0.737865  0.480000  0.684675
2        0.737865  1.000000  0.632456  0.465620
3        0.480000  0.632456  1.000000  0.706762
4        0.684675  0.465620  0.706762  1.000000


In [37]:
#Predict Ratings for Unrated Items

def predict_ratings(user_item_matrix, user_similarity):
    # Mean rating for each user
    mean_user_rating = user_item_matrix.mean(axis=1)
    
    # Rating difference from the mean for each user
    ratings_diff = user_item_matrix - mean_user_rating[:, np.newaxis]
    
    # Predicting ratings based on similarity
    pred = mean_user_rating[:, np.newaxis] + user_similarity.dot(ratings_diff) / np.array([np.abs(user_similarity).sum(axis=1)]).T
    return pred

# Predict ratings for all items
predicted_ratings = predict_ratings(user_item_matrix.values, user_similarity)
predicted_ratings_df = pd.DataFrame(predicted_ratings, index=user_item_matrix.index, columns=user_item_matrix.columns)
print(predicted_ratings_df)

item_id       101       102       103       104
user_id                                        
1        3.569673  3.088660  2.940299  2.401368
2        3.083141  3.314892  2.245719  2.356248
3        2.619490  3.170058  2.866700  3.343752
4        2.778383  2.401532  2.978984  2.841101


In [38]:
#Recommend Items to Users

def recommend_items(predicted_ratings_df, user_id, num_recommendations=2):
    # Get the user's predicted ratings
    user_ratings = predicted_ratings_df.loc[user_id]
    
    # Sort ratings in descending order
    sorted_ratings = user_ratings.sort_values(ascending=False)
    
    # Filter out items the user has already rated
    already_rated = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] > 0].index
    recommendations = sorted_ratings.drop(already_rated).head(num_recommendations)
    
    return recommendations

# Recommend 2 items for user 3
recommended_items = recommend_items(predicted_ratings_df, user_id=3, num_recommendations=2)
print("Recommended Items for User 3:")
print(recommended_items)

Recommended Items for User 3:
item_id
101    2.61949
Name: 3, dtype: float64
