Importing the necessary libraries.

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
import datetime
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from sklearn.metrics import average_precision_score

Below we load the supermarket data from the 'supermarket_data.csv' file, split it into train and test sets and pivot the train set to create a user-item matrix, as well as calculating a user-user similarity matrix using the cosine similarity of the user-item matrix.

We then define a function, 'collaborative_filtering', which is the function that will provide recommendations for our users, in this case one user. The function takes the target user's ID and the number of reccomendations to return as input parameters, and returns a list of tuples (3) which contain the recommended products and their predicted introduction score (as defined in the data creation ipynb). The function uses the similarity scores of the target user with other users (top n most similar users) also taking into account the products that the target user has not interacted with in order to calculate the interaction score for the uninteracted products. Finally, we print the sorted list of recommended products for the target user with ID 100 based on the predictions made by the function.

In [11]:
df = pd.read_csv('supermarket_data.csv')

train, test = train_test_split(df, test_size = 0.75, random_state = 42)

matrix = train.pivot_table(index = 'User ID', columns = 'Product Name', values = 'Interaction Type', fill_value = 0)

user_similarity = pd.DataFrame(cosine_similarity(matrix), index = matrix.index, columns = matrix.index)


def collaborative_filtering(user_id, n_recommendations = 3):
    """
    Collaborative filtering recommendation algorithm
    
    Parameters:
    user_id (int): The ID of the target user
    n_recommendations (int): The number of recommendations to return (default is 3)
    
    Returns:
    A list of tuples containing the recommended products and their predicted interaction score
    """

    sim_scores = user_similarity[user_id].sort_values(ascending = False)
    
    top_users = sim_scores.iloc[1:n_recommendations+1].index
    
    uninteracted_products = matrix.loc[user_id][matrix.loc[user_id] == 0].index
    
    interaction_scores = []
    
    for product in uninteracted_products:
        interaction_score = np.average(matrix.loc[top_users][product], weights = sim_scores[top_users])
        interaction_scores.append((product, interaction_score))
    
    sorted_scores = sorted(interaction_scores, key=lambda x: x[1], reverse = True)
    
    return sorted_scores[:n_recommendations]


recommended_products = collaborative_filtering(user_id = 100, n_recommendations = 3)
print(recommended_products)

[('kitchen utensil', 3.332519651665075), ('photo/film', 2.9975128198114396), ('curd', 2.6724547087120474)]


Here we calculate the Mean Average Precision at k (MAP@k) score for a target user with ID 100. The code first generates recommendations for user_id 100 based on the 'collaborative_filtering' function, which we store in the 'recommended_products' variable. We then get the actual products that the user with ID 100 interacted with from the test data, which are stored in the 'actual_products' variable. Finally, the MAP@k score is calculated using the 'average_precision_score' function from Sklearn, where it is then printed, giving us an indiccation of the model's effectiveness. 

We have yet to fix a way of evaluating our model and comparing it to others, as can be seen from the poor output.

In [24]:
recommended_products = collaborative_filtering(user_id = 100, n_recommendations = 10)


actual_products = list(df.loc[df['User ID'] == 100]['Product Name'].unique())
print(actual_products)

k = 13
recommended_k = list(p[0] for p in recommended_products[:k])
print(recommended_k)

y_true = [int(p in actual_products) for p in recommended_k]
print(y_true)

y_scores = [p[1] for p in recommended_products[:k]]
average_precision = average_precision_score(y_true, y_scores)

print("MAP@3 score for user_id 100:", average_precision)

['preservation products', 'mayonnaise', 'bathroom cleaner', 'butter milk', 'tidbits', 'liver loaf', 'candy', 'dog food', 'Instant food products', 'beef', 'dental care', 'pastry', 'ketchup', 'rice', 'frozen fish', 'specialty fat', 'frankfurter', 'house keeping products', 'curd cheese', 'frozen fruits', 'white bread', 'meat', 'soap', 'canned fish', 'spices', 'pudding powder', 'frozen potato products', 'nuts/prunes', 'cake bar', 'cream cheese ', 'sliced cheese', 'onions', 'liqueur', 'make up remover', 'softener', 'rum', 'male cosmetics', 'sausage', 'instant coffee', 'detergent', 'specialty bar', 'vinegar', 'frozen chicken', 'soups', 'domestic eggs', 'cling film/bags', 'frozen vegetables', 'kitchen towels', 'salty snack', 'bags', 'sparkling wine', 'hard cheese', 'pork', 'toilet cleaner', 'artif. sweetener', 'organic sausage', 'specialty cheese', 'cream', 'rubbing alcohol', 'shopping bags', 'meat spreads', 'chocolate', 'canned beer', 'pasta', 'popcorn', 'chicken', 'ham', 'sugar', 'dish clea



The following is our new attemt to calculate MAP, NDCG, Precision@K, and Recall@K as was done with the SAR model.

In [None]:
from sklearn.metrics import average_precision_score, ndcg_score
import numpy as np

def evaluate(recommended_products, test_data, K):

    relevant_products = test_data.groupby('User ID')['Product Name'].apply(list).reset_index(name = 'relevant')

    MAP = average_precision_score(relevant_products['relevant'], [r[0] for r in recommended_products], average = 'macro')
    NDCG = ndcg_score([relevant_products['relevant'].tolist()], [r[0] for r in recommended_products], k = K)

    precision = []
    recall = []
    for user_id in relevant_products['User ID']:
        relevant = set(relevant_products[relevant_products['User ID']==user_id]['relevant'].iloc[0])
        recommended = set([r[0] for r in recommended_products if r[1]>=np.mean([r[1] for r in recommended_products])])
        relevant_and_recommended = relevant.intersection(recommended)
        precision.append(len(relevant_and_recommended)/K)
        recall.append(len(relevant_and_recommended)/len(relevant))

    Precision_at_K = np.mean(precision)
    Recall_at_K = np.mean(recall)

    return {'MAP': MAP, 'NDCG': NDCG, 'Precision@K': Precision_at_K, 'Recall@K': Recall_at_K}

test_data = test[['User ID', 'Product Name', 'Interaction Type']]
recommended_products = collaborative_filtering(user_id = 100, n_recommendations = 3)

metrics = evaluate(recommended_products, test_data, K = 3)
print(metrics)