Get Preference Order

In [1]:
from itertools import combinations

MIN_NUMBER = 1

def all_subsets_ordered(input_dict):
    # Extract keys from the dictionary and convert them into a list
    input_list = list(input_dict.keys())
    
    subsets = []
    # Generate all combinations for each possible size starting from 2 to include only subsets of size greater than 1
    for r in range(2, len(input_list) + 1):
        for combo in combinations(input_list, r):
            subsets.append(list(combo))
    
    # Sort subsets by the size (descending) and then lexicographically to maintain initial order
    subsets.sort(key=lambda x: (-len(x), x))
    return subsets


Get Tags List and User List

In [3]:
import pandas as pd
import copy

# Load the recipes and interactions data
recipes = pd.read_csv("/Users/alexanderleonidas/Documents/Maastricht University/AI Masters/Research Project LLM for Group RS/archive/cleaned_recipes_with_country.csv")
interactions = pd.read_csv("/Users/alexanderleonidas/Documents/Maastricht University/AI Masters/Research Project LLM for Group RS/archive/RAW_interactions.csv")

columns = ['time_tags', 'country_tags', 'dietary_tags', 'special_tags', 'ingredients_tags']

# Extract unique tags from the dataset
unique_tags = set()
for name in columns:
    for tag_list in recipes[name]:
        # Correcting parsing based on your description that tags appear like a Python list in string format
        tag_list = tag_list.strip('[]').replace("'", "").split(', ')
        for tag in tag_list:
            cleaned_tag = tag.strip()  # Remove any leading/trailing spaces
            if cleaned_tag:  # Ensure the tag is not empty
                unique_tags.add(cleaned_tag)

# Extract unique users from the dataset
users = interactions['user_id'].unique()

ImportError: dlopen(/Users/alexanderleonidas/miniconda3/lib/python3.11/lib-dynload/cmath.cpython-311-darwin.so, 0x0002): symbol not found in flat namespace '__PyModule_Add'

In [3]:
number_user_ratings = {}
for user in interactions['user_id']:
    if user not in number_user_ratings:
        number_user_ratings[user] = 0
    number_user_ratings[user] += 1

# sort by descending order
number_user_ratings = dict(sorted(number_user_ratings.items(), key=lambda item: item[1], reverse=True))

# get top 200 users
top_users = dict(list(number_user_ratings.items())[200:400])

In [4]:
# interactions for only the top 200 users with the most recipe reviews
filtered_interactions = interactions[interactions['user_id'].isin(top_users)]
filtered_users = filtered_interactions['user_id'].unique()

Make User Profiles

In [5]:
# Create an empty dictionary to store the tags for each user
user_tags = {user: {} for user in filtered_users}
number_appeared = {user: {} for user in filtered_users}

# Iterate over the interactions data to update the user_tags dictionary
for name in columns:
    for index, row in filtered_interactions.iterrows():
        user_id = row['user_id']
        recipe_id = row['recipe_id']
        rating = row['rating']

        # Filter the recipes DataFrame to find the tags for the current recipe_id
        recipe_row = recipes[recipes['id'] == recipe_id]
        
        # Check if the recipe_row is not empty
        if not recipe_row.empty:
            recipe_tags = recipe_row[name].values[0]
            recipe_tags = recipe_tags.strip('[]').replace("'", "").split(', ')

            # Process each tag and update user_tags and number_appeared dictionaries
            for tag in recipe_tags:
                cleaned_tag = tag.strip()
                if cleaned_tag:
                    if cleaned_tag not in user_tags[user_id]:
                        user_tags[user_id][cleaned_tag] = 0
                        number_appeared[user_id][cleaned_tag] = 0
                    user_tags[user_id][cleaned_tag] += rating
                    number_appeared[user_id][cleaned_tag] += 1

# Calculate the average rating for each tag
for user in user_tags:
    for tag in user_tags[user]:
        if number_appeared[user][tag] > 0:
            user_tags[user][tag] /= number_appeared[user][tag]


In [7]:
import random

# Set maximum group size
max_group_size = 5
group_preferences = []

# Get the list of users
users_list = list(user_tags.keys())

# Shuffle the list to ensure randomness
random.shuffle(users_list)

# Divide the shuffled list into groups of 5
for i in range(0, len(users_list), max_group_size):
    group = users_list[i:i + max_group_size]
    
    # Collect preferences for the entire group
    group_pref = []
    for user in group:
        user_preferences = {tag: rating for tag, rating in user_tags[user].items()}
        group_pref.append(user_preferences)
    
    # Append the group's preferences to group_preferences
    group_preferences.append(group_pref)



In [10]:
import recommender

average = []
least_misery = []
most_pleasure = []

for group in group_preferences:
    average.append(recommender.average_recommendation(group))
    least_misery.append(recommender.least_misery_recommendation(group))
    most_pleasure.append(recommender.most_pleasure_recommendation(group))

print(average[2])
print(least_misery[2])
print(most_pleasure[2])

aggregations = average, least_misery, most_pleasure

[('kwanzaa', 5.0), ('japanese', 5.0), ('vietnamese', 5.0), ('native-american', 5.0), ('creole', 5.0), ('lebanese', 5.0), ('chicken-livers', 5.0), ('pork-loins', 5.0), ('stuffings-dressings', 5.0), ('cheesecake', 5.0), ('crab', 5.0), ('pork-sausage', 5.0), ('passover', 5.0), ('rosh-hashana', 5.0), ('stocks', 5.0), ('ontario', 5.0), ('biscotti', 5.0), ('steam', 5.0), ('candy', 5.0), ('"wylers chicken bouillon cubes"', 5.0)]
[('kwanzaa', 5.0), ('japanese', 5.0), ('vietnamese', 5.0), ('native-american', 5.0), ('creole', 5.0), ('lebanese', 5.0), ('chicken-livers', 5.0), ('pork-loins', 5.0), ('stuffings-dressings', 5.0), ('cheesecake', 5.0), ('crab', 5.0), ('pork-sausage', 5.0), ('passover', 5.0), ('rosh-hashana', 5.0), ('stocks', 5.0), ('ontario', 5.0), ('biscotti', 5.0), ('steam', 5.0), ('candy', 5.0), ('"wylers chicken bouillon cubes"', 5.0)]
[('4-hours-or-less', 5.0), ('30-minutes-or-less', 5.0), ('1-day-or-more', 5.0), ('canadian', 5.0), ('british-columbian', 5.0), ('mexican', 5.0), ('e

In [50]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


# Convert the recipe tags to a DataFrame
recipe_tag_matrix = []
recipe_ids = []
recipe_names = []

for index, row in recipes.iterrows():
    for name in columns:
        recipe_id = row['id']
        recipe_tags = row[name].strip('[]').replace("'", "").split(', ')
        recipe_tags = [tag.strip() for tag in recipe_tags if tag.strip()]
        tag_vector = {tag: 1 for tag in recipe_tags}
        recipe_tag_matrix.append(tag_vector)
        recipe_ids.append(recipe_id)
        recipe_names.append(row['name'])

recipe_tag_df = pd.DataFrame(recipe_tag_matrix).fillna(0)

# Compute similarity scores between group tags and recipes
def compute_similarity(group_vector, recipe_matrix):
    group_vector = np.array(group_vector).reshape(1, -1)
    similarity_scores = cosine_similarity(group_vector, recipe_matrix)
    return similarity_scores.flatten()

aggregations = dict(aggregations)

recommendations = [[],[],[]]
for i, method in enumerate(aggregations):
    for group in method:
        group_preferences = dict(group)
        # Generate recommendations for the group based on similarity
        group_vector = [group_preferences.get(tag, 0) for tag in recipe_tag_df.columns]
        similarity_scores = compute_similarity(group_vector, recipe_tag_df.to_numpy())
        recommended_indices = np.argsort(similarity_scores)[::-1]  # Sort in descending order
        recommended_recipes = [recipe_names[idx] for idx in recommended_indices]
        recommendations[i].append(recommended_recipes[:10])

# Print top 10 recommendations for the group
print(f"Group recommendations: {recommended_recipes[:10]}")

KeyboardInterrupt: 