In [19]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler


In [20]:
scaler = MinMaxScaler()
clickstream_data['Time_Spent_Normalized'] = scaler.fit_transform(clickstream_data[['Time_Spent']])

# Create user-item matrix
user_item_matrix = pd.pivot_table(clickstream_data, index='UserId', columns='ArticleId_served', values='Time_Spent_Normalized', aggfunc='mean', fill_value=0)

# Display user-item matrix
print("User-Item Matrix:")
print(user_item_matrix.tail())

User-Item Matrix:
ArticleId_served  1     2     4     5         6     9     10    11    12    \
UserId                                                                       
996                0.0   0.0   0.0   0.0  0.000000   0.0   0.0   0.0   0.0   
997                0.0   0.0   0.0   0.0  0.146667   0.0   0.0   0.0   0.0   
998                0.0   0.0   0.0   0.0  0.000000   0.0   0.0   0.0   0.0   
999                0.0   0.0   0.0   0.0  0.000000   0.0   0.0   0.0   0.0   
1000               0.0   0.0   0.0   0.0  0.000000   0.0   0.0   0.0   0.0   

ArticleId_served  14    ...  2889  2890  2892  2893  2894  2895  2896  2897  \
UserId                  ...                                                   
996                0.0  ...   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   
997                0.0  ...   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   
998                0.0  ...   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   
999                0.0  ...   0.0   0.0 

Item based collaborative filtering

In [21]:
from sklearn.metrics.pairwise import cosine_similarity

item_similarity_matrix = cosine_similarity(user_item_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity_matrix, index=user_item_matrix.columns, columns=user_item_matrix.columns)

print("Item-Item Similarity Matrix:")
print(item_similarity_df)


Item-Item Similarity Matrix:
ArticleId_served  1     2     4         5     6     9     10    11    12    \
ArticleId_served                                                             
1                  1.0   0.0   0.0  0.000000   0.0   0.0   0.0   0.0   0.0   
2                  0.0   1.0   0.0  0.000000   0.0   0.0   0.0   0.0   0.0   
4                  0.0   0.0   1.0  0.000000   0.0   0.0   0.0   0.0   0.0   
5                  0.0   0.0   0.0  1.000000   0.0   0.0   0.0   0.0   0.0   
6                  0.0   0.0   0.0  0.000000   1.0   0.0   0.0   0.0   0.0   
...                ...   ...   ...       ...   ...   ...   ...   ...   ...   
2895               0.0   0.0   0.0  0.000000   0.0   0.0   0.0   0.0   0.0   
2896               0.0   0.0   0.0  0.002486   0.0   0.0   0.0   0.0   0.0   
2897               0.0   0.0   0.0  0.000000   0.0   0.0   0.0   0.0   0.0   
2898               0.0   0.0   0.0  0.308143   0.0   0.0   0.0   0.0   0.0   
2899               0.0   0.0   0.0 

In [22]:
def get_item_based_recommendations(user_id, user_item_matrix, item_similarity_df, num_recommendations=10):
    user_interactions = user_item_matrix.loc[user_id]
    user_interactions = user_interactions[user_interactions > 0]
    
    # Compute the weighted sum of item similarities
    scores = item_similarity_df[user_interactions.index].dot(user_interactions).div(item_similarity_df[user_interactions.index].sum(axis=1))
    
    # Exclude items the user has already interacted with
    scores = scores[~scores.index.isin(user_interactions.index)]
    
    # Get top recommendations
    top_recommendations = scores.nlargest(num_recommendations)
    
    return top_recommendations

# Example usage
user_id = 1  # Example user ID
recommendations = get_item_based_recommendations(user_id, user_item_matrix, item_similarity_df)
print(f"Top recommendations for user {user_id}:")
print(recommendations)


Top recommendations for user 1:
ArticleId_served
315     0.946667
501     0.946667
1251    0.946667
1497    0.946667
1596    0.946667
2297    0.946667
2662    0.946667
2727    0.946667
81      0.946667
142     0.946667
dtype: float64


User Based Collaborative filtering

In [23]:
user_similarity_matrix = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity_matrix, index=user_item_matrix.index, columns=user_item_matrix.index)

print("User Similarity Matrix:")
print(user_similarity_df)


User Similarity Matrix:
UserId      1     2         3     4         5         6         7     8     \
UserId                                                                       
1       1.000000   0.0  0.020947   0.0  0.000000  0.000000  0.000000   0.0   
2       0.000000   1.0  0.000000   0.0  0.000000  0.000000  0.000000   0.0   
3       0.020947   0.0  1.000000   0.0  0.000000  0.000000  0.000000   0.0   
4       0.000000   0.0  0.000000   1.0  0.000000  0.000000  0.000000   0.0   
5       0.000000   0.0  0.000000   0.0  1.000000  0.014815  0.016729   0.0   
...          ...   ...       ...   ...       ...       ...       ...   ...   
996     0.000000   0.0  0.002325   0.0  0.000000  0.000000  0.000000   0.0   
997     0.000000   0.0  0.000000   0.0  0.000000  0.000000  0.000000   0.0   
998     0.019667   0.0  0.000000   0.0  0.000000  0.000000  0.000000   0.0   
999     0.000000   0.0  0.004259   0.0  0.020753  0.000000  0.000000   0.0   
1000    0.000000   0.0  0.000000   0.0  

In [24]:
def get_user_based_recommendations(user_id, user_item_matrix, user_similarity_df, num_recommendations=10):
    # Get the similarities for the given user
    user_similarities = user_similarity_df[user_id]
    
    # Find similar users
    similar_users = user_similarities.sort_values(ascending=False).index[1:]  # Exclude the user itself
    
    # Compute weighted sum of item scores
    user_interactions = user_item_matrix.loc[similar_users].T.dot(user_similarities[similar_users])
    
    # Normalize by the sum of similarities
    user_interactions /= user_similarities[similar_users].sum()
    
    # Exclude items the user has already interacted with
    user_interactions = user_interactions[~user_interactions.index.isin(user_item_matrix.loc[user_id].index[user_item_matrix.loc[user_id] > 0])]
    
    # Get top recommendations
    top_recommendations = user_interactions.nlargest(num_recommendations)
    
    return top_recommendations

user_id = 1  
recommendations = get_user_based_recommendations(user_id, user_item_matrix, user_similarity_df)
print(f"Top recommendations for user {user_id}:")
print(recommendations)


Top recommendations for user 1:
ArticleId_served
2048    0.035003
2675    0.031331
547     0.029615
2024    0.028241
1672    0.027955
2558    0.027404
1362    0.026210
1403    0.025366
1873    0.025112
1592    0.024888
dtype: float64
