In [13]:
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import torch

In [2]:
# Load dataset
df = pd.read_csv("/home/minhas/cgn-dp-24-1/culinary_compass/data/recipes.csv")

In [3]:
df.head()

Unnamed: 0,RecipeId,Name,AuthorId,AuthorName,CookTime,PrepTime,TotalTime,DatePublished,Description,Images,...,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeServings,RecipeYield,RecipeInstructions
0,38,Low-Fat Berry Blue Frozen Dessert,1533,Dancer,PT24H,PT45M,PT24H45M,1999-08-09T21:46:00Z,Make and share this Low-Fat Berry Blue Frozen ...,"c(""https://img.sndimg.com/food/image/upload/w_...",...,1.3,8.0,29.8,37.1,3.6,30.2,3.2,4.0,,"c(""Toss 2 cups berries with sugar."", ""Let stan..."
1,39,Biryani,1567,elly9812,PT25M,PT4H,PT4H25M,1999-08-29T13:12:00Z,Make and share this Biryani recipe from Food.com.,"c(""https://img.sndimg.com/food/image/upload/w_...",...,16.6,372.8,368.4,84.4,9.0,20.4,63.4,6.0,,"c(""Soak saffron in warm milk for 5 minutes and..."
2,40,Best Lemonade,1566,Stephen Little,PT5M,PT30M,PT35M,1999-09-05T19:52:00Z,This is from one of my first Good House Keepi...,"c(""https://img.sndimg.com/food/image/upload/w_...",...,0.0,0.0,1.8,81.5,0.4,77.2,0.3,4.0,,"c(""Into a 1 quart Jar with tight fitting lid, ..."
3,41,Carina's Tofu-Vegetable Kebabs,1586,Cyclopz,PT20M,PT24H,PT24H20M,1999-09-03T14:54:00Z,This dish is best prepared a day in advance to...,"c(""https://img.sndimg.com/food/image/upload/w_...",...,3.8,0.0,1558.6,64.2,17.3,32.1,29.3,2.0,4 kebabs,"c(""Drain the tofu, carefully squeezing out exc..."
4,42,Cabbage Soup,1538,Duckie067,PT30M,PT20M,PT50M,1999-09-19T06:19:00Z,Make and share this Cabbage Soup recipe from F...,"""https://img.sndimg.com/food/image/upload/w_55...",...,0.1,0.0,959.3,25.1,4.8,17.7,4.3,4.0,,"c(""Mix everything together and bring to a boil..."


In [5]:
# Initialize Sentence Transformer Model

model = SentenceTransformer("all-MiniLM-L6-v2")

In [20]:
df2=df.iloc[1:5000]

In [21]:
# Step 3: Compute embeddings for ingredients
df2["IngredientEmbedding"] = df2["RecipeIngredientParts"].apply(lambda x: model.encode(str(x), convert_to_tensor=True).tolist())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["IngredientEmbedding"] = df2["RecipeIngredientParts"].apply(lambda x: model.encode(str(x), convert_to_tensor=True).tolist())


In [22]:

# Step 4: Save embeddings to a new file
df2.to_csv("recipes_with_embeddings.csv", index=False)

In [23]:
# Step 5: Load dataset with precomputed embeddings
df = pd.read_csv("recipes_with_embeddings.csv")

In [24]:
df["IngredientEmbedding"] = df["IngredientEmbedding"].apply(lambda x: torch.tensor(eval(x)))

In [25]:
# Step 6: Define recommendation function
def recommend_recipes(nutrients, ingredients):
    """Recommend recipes based on user nutrients and ingredients"""
    
    # Encode input ingredients
    input_ingredient_embedding = model.encode(" ".join(ingredients), convert_to_tensor=True)

    # Compute cosine similarity
    ingredient_similarities = util.pytorch_cos_sim(torch.stack(df["IngredientEmbedding"].tolist()), input_ingredient_embedding).squeeze().numpy()

    # Normalize nutrient similarity
    nutrient_columns = ["Calories", "FatContent", "SaturatedFatContent", "CholesterolContent", 
                        "SodiumContent", "CarbohydrateContent", "FiberContent", "SugarContent", "ProteinContent"]
    
    df_nutrients = df[nutrient_columns].fillna(0)
    nutrient_array = df_nutrients.to_numpy()

    # Compute Euclidean distance
    input_nutrient_array = np.array([nutrients[col] for col in nutrient_columns]).reshape(1, -1)
    nutrient_distances = np.linalg.norm(nutrient_array - input_nutrient_array, axis=1)
    nutrient_similarities = 1 / (1 + nutrient_distances)

    # Compute final scores
    final_scores = (0.4 * nutrient_similarities) + (0.6 * ingredient_similarities)

    # Rank recipes and return top results
    df["SimilarityScore"] = final_scores
    top_recipes = df.sort_values(by="SimilarityScore", ascending=False).head(5)

    return top_recipes[["Name", "Images", "RecipeInstructions"]].to_dict(orient="records")

In [30]:
# Step 7: User input for testing
user_nutrients = {
    "Calories": 500, "FatContent": 20, "SaturatedFatContent": 5,
    "CholesterolContent": 10, "SodiumContent": 500, "CarbohydrateContent": 50,
    "FiberContent": 10, "SugarContent": 10, "ProteinContent": 30
}

user_ingredients = ["Mushrooms, morel, raw", "Lemon grass (citronella), raw","Seaweed, irishmoss, raw",
                    "Tofu, raw, regular,", "Vegetable"]


# Step 8: Get recommendations
recommendations = recommend_recipes(user_nutrients, user_ingredients)

# Step 9: Display results
for recipe in recommendations:
    print(f"Name: {recipe['Name']}\nImage: {recipe['Images']}\nInstructions: {recipe['RecipeInstructions']}\n{'-'*50}")

Name: Mushroom Risotto Patties
Image: character(0)
Instructions: c("Bring stock to the boil in a saucepan and leave to simmer gently.", "In another pan, cook chopped onion and garlic with the mushrooms in 60g butter until softened.", "Add the rice, stirring with a wooden spoon for a few minutes, than add about 1/2 cup of simmering stock.", "Season.", "Cook the mixture, stirring, until the rice has absorbed the stock, then stir in another 1/2 cup of stock.", "Continue this way until the rice is tender and all the stock is used, about 20 minutes.", "Remove the pan from the heat and stir in the remaining butter and Parmesan.", 
"Cool the mixture, then shape into 4 - 6 round shape patties.", "They will become quite firm.", "Dust with flour and fry in olive oil in a large frying pan on both sides for about 5 minutes each side, until golden.", "Serve with fresh tomato sauce and whatever vegetables takes your fancy.", "Chop the tomatoes into small chunks.", "Place in a large bowl, add garlic,