Get Preference Order

In [2]:
from itertools import combinations

MIN_NUMBER = 1

def all_subsets_ordered(input_dict):
    # Extract keys from the dictionary and convert them into a list
    input_list = list(input_dict.keys())
    
    subsets = []
    # Generate all combinations for each possible size starting from 2 to include only subsets of size greater than 1
    for r in range(2, len(input_list) + 1):
        for combo in combinations(input_list, r):
            subsets.append(list(combo))
    
    # Sort subsets by the size (descending) and then lexicographically to maintain initial order
    subsets.sort(key=lambda x: (-len(x), x))
    return subsets


Get Tags List and User List

In [3]:
import pandas as pd
import copy

# Load the recipes and interactions data
recipes = pd.read_csv("RAW_recipes.csv")
interactions = pd.read_csv("RAW_interactions.csv")

# Extract unique tags from the dataset
unique_tags = set()
for tag_list in recipes['tags']:
    # Correcting parsing based on your description that tags appear like a Python list in string format
    tag_list = tag_list.strip('[]').replace("'", "").split(', ')
    for tag in tag_list:
        cleaned_tag = tag.strip()  # Remove any leading/trailing spaces
        if cleaned_tag:  # Ensure the tag is not empty
            unique_tags.add(cleaned_tag)

# Extract unique users from the dataset
users = interactions['user_id'].unique()

Make User Profiles

In [4]:
# Create an empty dictionary to store the tags for each user
user_tags = {user: {} for user in users}
number_appeared = {user: {} for user in users}

# Iterate over the interactions data to update the user_tags dictionary
for index, row in interactions.iterrows():
    user_id = row['user_id']
    recipe_id = row['recipe_id']
    rating = row['rating']
    recipe_tags = recipes[recipes['id'] == recipe_id]['tags'].values[0]
    recipe_tags = recipe_tags.strip('[]').replace("'", "").split(', ')
    for tag in recipe_tags:
        cleaned_tag = tag.strip()
        if cleaned_tag:
            if cleaned_tag not in user_tags[user_id]:
                user_tags[user_id][cleaned_tag] = 0
                number_appeared[user_id][cleaned_tag] = 0
            user_tags[user_id][cleaned_tag] += rating
            number_appeared[user_id][cleaned_tag] += 1

# Get Average rating for each tag
for user in user_tags:
    for tag in user_tags[user]:
        if number_appeared[user][tag] > 0:
            user_tags[user][tag] /= number_appeared[user][tag]




In [8]:
import recommender


group_preferences = []
for user in user_tags:
    user_preferences = {tag: rating for tag, rating in user_tags[user].items()}
    group_preferences.append(user_preferences)

recommendations = recommender.average_recommendation(group_preferences)
print(recommendations)
recommendations = recommender.least_misery_recommendation(group_preferences)
print(recommendations)
recommendations = recommender.most_pleasure_recommendation(group_preferences)
print(recommendations)

[('breakfast-casseroles', 5.0), ('breakfast-potatoes', 5.0), ('pork-loins-roast', 5.0), ('side-dishes-beans', 5.0), ('beans-side-dishes', 5.0), ('irish-st-patricks-day', 5.0), ('desserts-fruit', 5.0), ('desserts-easy', 5.0), ('baked-beans', 5.0), ('middle-eastern-main-dish', 5.0)]
[('breakfast-casseroles', 5.0), ('breakfast-potatoes', 5.0), ('pork-loins-roast', 5.0), ('side-dishes-beans', 5.0), ('beans-side-dishes', 5.0), ('irish-st-patricks-day', 5.0), ('desserts-fruit', 5.0), ('desserts-easy', 5.0), ('baked-beans', 5.0), ('middle-eastern-main-dish', 5.0)]
[('weeknight', 5.0), ('time-to-make', 5.0), ('course', 5.0), ('main-ingredient', 5.0), ('preparation', 5.0), ('occasion', 5.0), ('soups-stews', 5.0), ('beans', 5.0), ('vegetables', 5.0), ('easy', 5.0)]


In [11]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


# Convert the recipe tags to a DataFrame
recipe_tag_matrix = []
recipe_ids = []
recipe_names = []

for index, row in recipes.iterrows():
    recipe_id = row['id']
    recipe_tags = row['tags'].strip('[]').replace("'", "").split(', ')
    recipe_tags = [tag.strip() for tag in recipe_tags if tag.strip()]
    tag_vector = {tag: 1 for tag in recipe_tags}
    recipe_tag_matrix.append(tag_vector)
    recipe_ids.append(recipe_id)
    recipe_names.append(row['name'])

recipe_tag_df = pd.DataFrame(recipe_tag_matrix).fillna(0)

# Compute similarity scores between group tags and recipes
def compute_similarity(group_vector, recipe_matrix):
    group_vector = np.array(group_vector).reshape(1, -1)
    similarity_scores = cosine_similarity(group_vector, recipe_matrix)
    return similarity_scores.flatten()

recommendations = dict(recommendations)

# Generate recommendations for the group based on similarity
group_vector = [recommendations.get(tag, 0) for tag in recipe_tag_df.columns]
similarity_scores = compute_similarity(group_vector, recipe_tag_df.values)
recommended_indices = np.argsort(similarity_scores)[::-1]  # Sort in descending order
recommended_recipes = [recipe_names[idx] for idx in recommended_indices]

# Print top 10 recommendations for the group
print(f"Group recommendations: {recommended_recipes[:10]}")

Group recommendations: ['white bean   green chile pepper soup', 'ten can soup', 'sausage  lentil and kale soup', 'winter vegetable and bean soup with pesto', 'two bean toss', 'creamy beanie weenie soup', 'lima bean and cabbage soup', 'slimastroni soup', 'sopranos  escarole   beans soup', 'herbed split pea soup']
