In [2]:
import pandas as pd
import numpy as np
import os
from typing import DefaultDict

PATH = "../data/processed/final_csvs/"

def load_data():
    dataset_dictionary = DefaultDict(pd.DataFrame)
    datasets = ["Meal.csv", "Category.csv", "Cuisines.csv", "Ingredient.csv", "CategoryToMeal.csv", "Recipe.csv", "CuisineToMeal.csv", "RecipeIngredient.csv"]
    for data in datasets:
        datafile = pd.read_csv(f'{PATH}{data}')
        name = data.split(".")[0].lower()
        dataset_dictionary[name] = datafile
    return dataset_dictionary


datasets = load_data()

In [3]:
meal = datasets["meal"][["meal_id", "name", "description"]]
category = datasets["category"]
category_to_meal = datasets["categorytomeal"]
cuisine = datasets["cuisines"]
ingredient = datasets["ingredient"]
cusines_to_meal = datasets["cuisinetomeal"]
recipeingredient = datasets["recipeingredient"]
recipe = datasets["recipe"][["recipe_id", "meal_id"]]

meal_df = meal.merge(cusines_to_meal, on="meal_id") \
    .merge(category_to_meal, on="meal_id") \
        .merge(recipe, on="meal_id") \
            .merge(recipeingredient, on="recipe_id") \
                .merge(ingredient, on="ingredient_id") \
                    .merge(cuisine, on="cuisines_id") \
                        .merge(category, on="category_id")

print(meal_df.columns)

Index(['meal_id', 'name', 'description', 'cuisines_id', 'category_id',
       'recipe_id', 'quantity', 'weight', 'ingredient_id', 'alternativeOfId',
       'measurement_index', 'main_ingredient', 'cuisines', 'category'],
      dtype='object')


In [4]:
from IPython.display import display

meal_df = meal_df[["meal_id", "name", "description", "category", "cuisines", "main_ingredient"]]
print(display(meal_df.head(4)))

Unnamed: 0,meal_id,name,description,category,cuisines,main_ingredient
0,a6213618-a4b0-4b70-a132-a0cf4763bd02,Plantain Pancakes with Spicy Maple Syrup and P...,These delicious plantain pancakes are topped o...,Breakfast,West African,plantains
1,a6213618-a4b0-4b70-a132-a0cf4763bd02,Plantain Pancakes with Spicy Maple Syrup and P...,These delicious plantain pancakes are topped o...,Breakfast,West African,all purpose flour
2,a6213618-a4b0-4b70-a132-a0cf4763bd02,Plantain Pancakes with Spicy Maple Syrup and P...,These delicious plantain pancakes are topped o...,Breakfast,West African,corn meal
3,a6213618-a4b0-4b70-a132-a0cf4763bd02,Plantain Pancakes with Spicy Maple Syrup and P...,These delicious plantain pancakes are topped o...,Breakfast,West African,baking soda


None


In [None]:
meals_data = meal_df.copy()
meals_data = meals_data.fillna("")

# Group and aggregate
grouped_meals_data = meals_data.groupby("meal_id").agg({
    "name": "first",  # Take the first name for each meal
    "description": "first",
    "category": lambda x: list(set(x)),  # Get unique categories as a list
    "cuisines": lambda x: list(set(x)),  # Get unique cuisines as a list
    "main_ingredient": lambda x: list(set(x))  # Get unique ingredients as a list
}).reset_index()

grouped_meals_data = grouped_meals_data.drop_duplicates(subset=["meal_id"])
len(grouped_meals_data)

33

In [6]:
display(grouped_meals_data.head(4))

Unnamed: 0,meal_id,name,description,category,cuisines,main_ingredient
0,088136b1-ae27-4d02-a928-d5c89358d525,Plantain Bread Recipe,This delicious vegan plantain bread made with ...,[Breakfast],"[African, European, American, Tropical]","[coconut oil, baking soda, vanilla, Sugar, sal..."
1,24c9b8de-5f13-4a4f-aad2-2bb165df890a,Nigerian Sausage Rolls with Chicken and Pancet...,While traditional Nigerian sausage rolls are m...,"[Appetizer, Snack]","[British, Nigerian]","[Bread flour, water, Pancetta, salt, Sugar, On..."
2,2ba7aba9-0be6-4c4c-8a1f-f7c5c2947b80,Afang Soup Recipe,"Afang soup, eaten by Efik and Ibibio people of...","[Main Course, Side Dish]","[Nigerian, Cameroonian]","[cayenne pepper, palm oil, Goat meat, Apple sn..."
3,334442c6-348c-4826-ae79-cf784b351583,Garden Egg Stew (Nigerian Eggplant Sauce) Recipe,Garden Egg Stew (also known as aubergine stew ...,"[Main Course, Dinner, Lunch]",[West African],"[Plum tomatoes, palm oil, peanut, dried shrimp..."


In [7]:
grouped_meals_data["cuisines"] = grouped_meals_data["cuisines"].apply(lambda x: " ".join(x))
grouped_meals_data["category"] = grouped_meals_data["category"].apply(lambda x: " ".join(x))
grouped_meals_data["main_ingredient"] = grouped_meals_data["main_ingredient"].apply(lambda x: " ".join(x))

display(grouped_meals_data.head(4))

Unnamed: 0,meal_id,name,description,category,cuisines,main_ingredient
0,088136b1-ae27-4d02-a928-d5c89358d525,Plantain Bread Recipe,This delicious vegan plantain bread made with ...,Breakfast,African European American Tropical,coconut oil baking soda vanilla Sugar salt Alm...
1,24c9b8de-5f13-4a4f-aad2-2bb165df890a,Nigerian Sausage Rolls with Chicken and Pancet...,While traditional Nigerian sausage rolls are m...,Appetizer Snack,British Nigerian,Bread flour water Pancetta salt Sugar Onion po...
2,2ba7aba9-0be6-4c4c-8a1f-f7c5c2947b80,Afang Soup Recipe,"Afang soup, eaten by Efik and Ibibio people of...",Main Course Side Dish,Nigerian Cameroonian,cayenne pepper palm oil Goat meat Apple snails...
3,334442c6-348c-4826-ae79-cf784b351583,Garden Egg Stew (Nigerian Eggplant Sauce) Recipe,Garden Egg Stew (also known as aubergine stew ...,Main Course Dinner Lunch,West African,Plum tomatoes palm oil peanut dried shrimp smo...


In [8]:
data = grouped_meals_data.copy()

data["features"] = data["name"] +  " " + data["description"] + " " + data["category"] + " " + data["cuisines"] + " " + data["main_ingredient"]
data = data[["meal_id", "features"]]
data.head(4)

Unnamed: 0,meal_id,features
0,088136b1-ae27-4d02-a928-d5c89358d525,Plantain Bread Recipe This delicious vegan pla...
1,24c9b8de-5f13-4a4f-aad2-2bb165df890a,Nigerian Sausage Rolls with Chicken and Pancet...
2,2ba7aba9-0be6-4c4c-8a1f-f7c5c2947b80,"Afang Soup Recipe Afang soup, eaten by Efik an..."
3,334442c6-348c-4826-ae79-cf784b351583,Garden Egg Stew (Nigerian Eggplant Sauce) Reci...


In [9]:
data.to_csv("../data/processed/final_csvs/meal_features.csv", index=False)

In [10]:
# ! pip install nltk tqdm datasets

In [11]:

import nltk
from tqdm import tqdm
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from datasets import Dataset
import re

# nltk.download('stopwords')
# nltk.download('punkt_tab')
# nltk.download('wordnet')


In [12]:
data = pd.read_csv("../data/processed/final_csvs/meal_features.csv")
dataset = Dataset.from_pandas(data)
print(dataset[0])

{'meal_id': '088136b1-ae27-4d02-a928-d5c89358d525', 'features': 'Plantain Bread Recipe This delicious vegan plantain bread made with sweet plantains contains no eggs or dairy, tastes better than banana bread and is certain to appeal to even non-vegans like me! Breakfast African European American Tropical coconut oil baking soda vanilla Sugar salt Almond Milk whole wheat flour plantain egg replacer powder apple cider vinegar baking powder all purpose flour'}


In [13]:
stop_words = set(stopwords.words('english'))

def clean_text(dataset):
    text = dataset["features"]
    text = text.lower()
    text = word_tokenize(text)
    text = [word for word in text if word.isalnum()]
    text = [word for word in text if re.match(r'^[a-zA-Z0-9]+$', word)]
    text = [word for word in text if word not in stop_words]
    text = [WordNetLemmatizer().lemmatize(word) for word in text]
    dataset["features"] = " ".join(text)
    return dataset

dataset = dataset.map(clean_text, batched=False)
print(dataset)


Map:   0%|          | 0/33 [00:00<?, ? examples/s]

Dataset({
    features: ['meal_id', 'features'],
    num_rows: 33
})


In [15]:
dataset.save_to_disk("../data/processed/dataset")

Saving the dataset (0/1 shards):   0%|          | 0/33 [00:00<?, ? examples/s]

In [16]:
# ! pip install spacy gensim
# ! python3 -m spacy download en_core_web_sm

In [17]:
sentence_array = dataset["features"]
# sentence_array = sentence_array[:, np.newaxis]
# sentence_array = [sent.split() for sentence in sentence_array for sent in sentence]

In [66]:
## Implementing tf-idf vectorizer

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

vectorizer = TfidfVectorizer(ngram_range=(1, 3), min_df=1, analyzer='word', stop_words='english')
meal_tfidf_matrix = vectorizer.fit_transform(raw_documents=sentence_array)
matrix_LK = linear_kernel(meal_tfidf_matrix, meal_tfidf_matrix)

In [132]:
# Prepare user data

indexes = pd.Series(data["meal_id"])

user_dict = {
    "category": ["breakfast"],
    "cuisines": ["nigerian"],
    "intollerance": ["milk"],
    "allergies": ["pepper"],
    "preferences": ["vegetarian"]
}

user_data = pd.DataFrame([user_dict])

cols = user_data.columns

for col in cols:
    if isinstance(user_data[col][0], list):
        user_data[col] = user_data[col].apply(lambda x: " ".join(x))
    else:
        user_data[col] = user_data[col]

user_data["features"] = user_data["category"] + " " + user_data["cuisines"] + " " + user_data["preferences"]
user_data = user_data[["features"]]
user_data = Dataset.from_pandas(user_data)
user_data = user_data.map(clean_text, batched=False)
print(user_data)


Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Dataset({
    features: ['features'],
    num_rows: 1
})


In [133]:
user_content = user_data["features"]
user_tfidf_matrix = vectorizer.transform(user_content)
user_LK = cosine_similarity(user_tfidf_matrix, meal_tfidf_matrix).flatten()

In [134]:
user_dict

{'category': ['breakfast'],
 'cuisines': ['nigerian'],
 'intollerance': ['milk'],
 'allergies': ['pepper'],
 'preferences': ['vegetarian']}

In [136]:
user_intolerances = set(user_dict["intollerance"][0].split())
user_allergies = set(user_dict["allergies"][0].split())

def contains_restricted(ingredients, restricted_set):
    """Check if any restricted item is present in the ingredients."""
    ingredients_set = set(ingredients.split())  # Assuming ingredients are space-separated
    return bool(ingredients_set & restricted_set)

filtered_meals = grouped_meals_data[~grouped_meals_data["main_ingredient"].apply(contains_restricted, args=(user_intolerances | user_allergies,))]
meal_rec = pd.DataFrame({"meal_id": indexes, "cosine_similarity": user_LK})
meal_rec = meal_rec.sort_values(by="cosine_similarity", ascending=False)

meal_rec = meal_rec.merge(filtered_meals, on="meal_id")
meal_rec


Unnamed: 0,meal_id,cosine_similarity,name,description,category,cuisines,main_ingredient
0,456099e2-0242-4462-8a50-49398ade9cf6,0.18794,Fonio Porridge/Acha Pudding: A Super-Grain Bre...,"Fonio is a gluten-free, nutritious, super grai...",Breakfast,West African Senegalese Nigerian,Fonio maple syrup salt Fruit of Choice Almond ...
1,475bc30d-ed44-40f0-b269-db797df2f5b4,0.09948,Nigerian Akara Recipe,"Akara (Accara) are tasty, deep fried, Nigerian...",Breakfast Lunch Main Course Brunch Dinner,West African Indian Nigerian,black eyed peas scotch bonnet red onion salt O...
2,61a63ff5-82ad-4d9a-b683-95f7a6f7db41,0.063584,Nigerian Tomato Stew (Nigerian Red Stew) Recipe,This Nigerian red stew recipe features my favo...,Main Course Dinner Lunch,Nigerian,Plum tomatoes ginger Curry powder Scotch bonne...
3,088136b1-ae27-4d02-a928-d5c89358d525,0.053085,Plantain Bread Recipe,This delicious vegan plantain bread made with ...,Breakfast,African European American Tropical,coconut oil baking soda vanilla Sugar salt Alm...
4,8a136ed1-2798-4971-b825-9f05a0767046,0.047404,Nigerian Chapman Drink Recipe,"Nigeria's favorite non-alcoholic cocktail, thi...",Drinks,Nigerian,cucumber limeade orange juice lemon grapefruit...
5,97186000-4e0a-4cf0-bea6-14764512c816,0.043425,Groundnut Soup Recipe (Spicy Nigerian Peanut S...,Groundnut soup (also called peanut stew) is a ...,Main Course,Nigerian,dried shrimp powder palm oil scotch bonnet Goa...
6,e987ff61-7647-4f08-9fc2-2837692680ed,0.032582,How to Make Ofada Stew (Nigerian Ofada Sauce r...,"In this ofada sauce recipe, I share how to mak...",Main Course,West African Nigerian,Iru palm oil Goat meat salt Scotch bonnet pepp...
7,beffc339-9f81-442c-8eb7-e2af48c5f232,0.02384,Seafood Beniseed Soup,"Beniseed or sesame seed, whichever name you ch...",Main Course,Nigerian,Plum tomatoes palm oil shrimp bullion salt sea...
8,6ea2ead0-1dd7-47f0-af16-4aa24d84e4cc,0.015694,An Egusi Stew Alternative: Nigerian Almond Stew,I thought since Egusi stew requires melon seed...,,,palm oil habanero almonds broth salt flaked sm...
9,4b772492-5cad-43bc-b3c1-15c468dc3087,0.014718,Recipe for a Stable and Flaky Meat Pie Crust,"After countless trials, I can finally present ...",Snack,British Global Nigerian,All-purpose flour Bread flour water Sugar butt...
