In [11]:
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
import datetime
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Load the data
df = pd.read_csv('supermarket_data.csv')

# Split the data into train and test sets
train, test = train_test_split(df, test_size=0.75, random_state=42)

# Pivot the data to create a user-item matrix
matrix = train.pivot_table(index='User ID', columns='Product Name', values='Interaction Type', fill_value=0)

# Calculate the user-user similarity matrix
user_similarity = pd.DataFrame(cosine_similarity(matrix), index=matrix.index, columns=matrix.index)

def collaborative_filtering(user_id, n_recommendations=3):
    """
    Collaborative filtering recommendation algorithm
    
    Parameters:
    user_id (int): The ID of the target user
    n_recommendations (int): The number of recommendations to return (default is 3)
    
    Returns:
    A list of tuples containing the recommended products and their predicted interaction score
    """
    # Get the similarity scores of the target user with other users
    sim_scores = user_similarity[user_id].sort_values(ascending=False)
    
    # Get the top n most similar users
    top_users = sim_scores.iloc[1:n_recommendations+1].index
    
    # Get the products that the target user has not interacted with
    uninteracted_products = matrix.loc[user_id][matrix.loc[user_id] == 0].index
    
    # Create a list to store the predicted interaction scores for each product
    interaction_scores = []
    
    # Calculate the predicted interaction score for each uninteracted product
    for product in uninteracted_products:
        interaction_score = np.average(matrix.loc[top_users][product], weights=sim_scores[top_users])
        interaction_scores.append((product, interaction_score))
    
    # Sort the products by their predicted interaction score in descending order
    sorted_scores = sorted(interaction_scores, key=lambda x: x[1], reverse=True)
    
    # Return the top n recommended products
    return sorted_scores[:n_recommendations]

# Get recommendations
recommended_products = collaborative_filtering(user_id=100, n_recommendations=3)
print(recommended_products)


[('kitchen utensil', 3.332519651665075), ('photo/film', 2.9975128198114396), ('curd', 2.6724547087120474)]


In [24]:
from sklearn.metrics import average_precision_score

# Get recommendations for user_id 100
recommended_products = collaborative_filtering(user_id=100, n_recommendations=10)

# Get the actual products interacted with by user_id 100
actual_products = list(df.loc[df['User ID'] == 100]['Product Name'].unique())
print(actual_products)
# Calculate the average precision at k
k = 13
recommended_k = list(p[0] for p in recommended_products[:k])
print(recommended_k)
y_true = [int(p in actual_products) for p in recommended_k]
print(y_true)
y_scores = [p[1] for p in recommended_products[:k]]
average_precision = average_precision_score(y_true, y_scores)

print("MAP@3 score for user_id 100:", average_precision)


['preservation products', 'mayonnaise', 'bathroom cleaner', 'butter milk', 'tidbits', 'liver loaf', 'candy', 'dog food', 'Instant food products', 'beef', 'dental care', 'pastry', 'ketchup', 'rice', 'frozen fish', 'specialty fat', 'frankfurter', 'house keeping products', 'curd cheese', 'frozen fruits', 'white bread', 'meat', 'soap', 'canned fish', 'spices', 'pudding powder', 'frozen potato products', 'nuts/prunes', 'cake bar', 'cream cheese ', 'sliced cheese', 'onions', 'liqueur', 'make up remover', 'softener', 'rum', 'male cosmetics', 'sausage', 'instant coffee', 'detergent', 'specialty bar', 'vinegar', 'frozen chicken', 'soups', 'domestic eggs', 'cling film/bags', 'frozen vegetables', 'kitchen towels', 'salty snack', 'bags', 'sparkling wine', 'hard cheese', 'pork', 'toilet cleaner', 'artif. sweetener', 'organic sausage', 'specialty cheese', 'cream', 'rubbing alcohol', 'shopping bags', 'meat spreads', 'chocolate', 'canned beer', 'pasta', 'popcorn', 'chicken', 'ham', 'sugar', 'dish clea

