# Collaborative Filtering

In [5]:
import pandas as pd
import numpy as np

# Sample user-item ratings
data = {
    'user_id': [1, 1, 1, 2, 2, 3, 3, 3, 4, 4],
    'item_id': [101, 102, 103, 101, 104, 102, 103, 104, 101, 103],
    'rating': [5, 3, 2, 4, 5, 2, 4, 1, 4, 3]
}

df = pd.DataFrame(data)
print(df)


   user_id  item_id  rating
0        1      101       5
1        1      102       3
2        1      103       2
3        2      101       4
4        2      104       5
5        3      102       2
6        3      103       4
7        3      104       1
8        4      101       4
9        4      103       3


In [6]:
# Create a User-Item Matrix:

user_item_matrix = df.pivot_table(index='user_id', columns='item_id', values='rating')
print(user_item_matrix)


item_id  101  102  103  104
user_id                    
1        5.0  3.0  2.0  NaN
2        4.0  NaN  NaN  5.0
3        NaN  2.0  4.0  1.0
4        4.0  NaN  3.0  NaN


Compute Item-Item Similarity:

In [8]:
from sklearn.metrics.pairwise import cosine_similarity

# Fill NaN values with 0
user_item_matrix.fillna(0, inplace=True)

# Compute cosine similarity between items
item_similarity = cosine_similarity(user_item_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)

print(item_similarity_df)


item_id       101       102       103       104
item_id                                        
101      1.000000  0.551039  0.541111  0.519524
102      0.551039  1.000000  0.721037  0.108786
103      0.541111  0.721037  1.000000  0.145671
104      0.519524  0.108786  0.145671  1.000000


Generate Recommendations

In [9]:
def recommend_items(user_id, user_item_matrix, item_similarity_df, num_recommendations=3):
    user_ratings = user_item_matrix.loc[user_id].values.reshape(1, -1)
    similarity_scores = np.dot(user_ratings, item_similarity_df.values)
    similarity_scores_df = pd.DataFrame(similarity_scores, columns=user_item_matrix.columns)

    # Remove items already rated by the user
    rated_items = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] > 0].index
    similarity_scores_df.drop(columns=rated_items, inplace=True)

    # Get top N recommendations
    recommendations = similarity_scores_df.T.sort_values(by=0, ascending=False).head(num_recommendations)
    return recommendations.index

# Example recommendation for user 1
recommended_items = recommend_items(1, user_item_matrix, item_similarity_df)
print("Recommended items for user 1:", recommended_items)


Recommended items for user 1: Index([104], dtype='int64', name='item_id')


# Content-Based Filtering

In [10]:
# Sample item features
item_features = {
    'item_id': [101, 102, 103, 104],
    'genre': ['Action', 'Comedy', 'Action', 'Drama'],
    'length': [120, 90, 110, 140]
}

item_df = pd.DataFrame(item_features)
print(item_df)


   item_id   genre  length
0      101  Action     120
1      102  Comedy      90
2      103  Action     110
3      104   Drama     140


Create a Feature Matrix:

In [11]:
from sklearn.preprocessing import OneHotEncoder

# One-hot encode categorical features
encoder = OneHotEncoder()
encoded_features = encoder.fit_transform(item_df[['genre']]).toarray()

# Combine with numerical features
feature_matrix = np.hstack([encoded_features, item_df[['length']].values])

print("Feature matrix:\n", feature_matrix)


Feature matrix:
 [[  1.   0.   0. 120.]
 [  0.   1.   0.  90.]
 [  1.   0.   0. 110.]
 [  0.   0.   1. 140.]]


Compute Item Similarity:

In [12]:
# Compute cosine similarity between items based on features
item_similarity_content = cosine_similarity(feature_matrix)
item_similarity_content_df = pd.DataFrame(item_similarity_content, index=item_df['item_id'], columns=item_df['item_id'])

print(item_similarity_content_df)


item_id       101       102       103       104
item_id                                        
101      1.000000  0.999904  1.000000  0.999940
102      0.999904  1.000000  0.999897  0.999913
103      1.000000  0.999897  1.000000  0.999933
104      0.999940  0.999913  0.999933  1.000000


Generate Content-Based Recommendations:

In [13]:
def recommend_items_content(item_id, item_similarity_content_df, num_recommendations=3):
    similarity_scores = item_similarity_content_df.loc[item_id]
    similar_items = similarity_scores.sort_values(ascending=False).head(num_recommendations + 1).index[1:]
    return similar_items

# Example content-based recommendation for item 101
recommended_items_content = recommend_items_content(101, item_similarity_content_df)
print("Content-based recommended items similar to item 101:", recommended_items_content)


Content-based recommended items similar to item 101: Index([103, 104, 102], dtype='int64', name='item_id')
