In [23]:
import numpy as np
import pandas as pd
import json
import pickle
from tqdm.notebook import tqdm
from datasets import load_from_disk
from sentence_transformers import SentenceTransformer, util

In [24]:
with open('embeddings.pkl', 'rb') as f:
    embeds = pickle.load(f)

In [25]:
embeds.shape

(2171142, 512)

In [26]:
data = load_from_disk('full/')

train, val, test = data.values()

In [27]:
train[0]

{'title': 'Chocolate Fudge(Foolproof)  ',
 'ingredients': ['18 oz. semi-sweet chocolate chips',
  '1 can Eagle Brand milk',
  'dash of salt',
  '1 1/2 tsp. vanilla',
  '1/2 c. chopped nuts'],
 'directions': ['Melt chocolate and milk in saucepan on low heat.',
  'Remove from heat; add other ingredients.',
  'Chill 2 hours; cut into squares.'],
 'link': 'www.cookbooks.com/Recipe-Details.aspx?id=419450',
 'source': 'Gathered',
 'NER': ['semi-sweet chocolate chips', 'milk', 'salt', 'vanilla', 'nuts']}

In [28]:
def merge_columns(example):
    # Define the logic to merge columns, for instance concatenating two columns
    if example['title'] is None:
        print(example['NER'])
        example['title'] = 'NaN'
    merged_value = example['title'] + ' ; ' + ','.join(example['NER']) + ' ; ' + '.'.join(example['directions'])  # Adjust this based on your requirement
    example['embedding_text'] = merged_value
    return example

# Apply the function to each example in the dataset
train = train.map(merge_columns, num_proc=7)

In [29]:
model = SentenceTransformer.load('embed_model/')

In [30]:
sample = model.encode("semi-sweet chocolate chips,milk,salt,vanilla,nuts", batch_size=4)

In [31]:
top_n_indices = util.semantic_search(sample, embeds, top_k=5)[0]
top_n_indices

[{'corpus_id': 1111146, 'score': 0.7530437111854553},
 {'corpus_id': 314313, 'score': 0.7435571551322937},
 {'corpus_id': 103858, 'score': 0.7363246083259583},
 {'corpus_id': 403842, 'score': 0.7362555265426636},
 {'corpus_id': 152620, 'score': 0.7346318364143372}]

In [32]:
indexes = {i['corpus_id']: i['score']  for i in top_n_indices}

In [33]:
from collections import Counter
from tqdm import tqdm
ctr = Counter()

for row in tqdm(train):
    for item in row["NER"]:
        ctr[item.lower().strip()] += 1

100%|██████████| 2171142/2171142 [01:34<00:00, 23067.26it/s]


In [34]:
check = list(ctr)

In [84]:
sample_recipe = train[51].copy()


In [73]:
sample_recipe

{'title': 'Mississippi Mud Pie',
 'ingredients': ['2 c. sugar',
  '2 sticks margarine',
  '1 1/2 c. plain flour',
  '4 eggs',
  '1 c. chopped pecans',
  '3 tsp. vanilla',
  '1/3 c. cocoa (or less)',
  '1/4 tsp. salt',
  '1/2 (10 oz.) pkg. Kraft miniature marshmallows'],
 'directions': ['Cream the sugar and butter.',
  'Add eggs and mix well.',
  'Sift flour, cocoa and salt together.',
  'Add to creamed mixture.',
  'Mix well.',
  'Add vanilla and nuts.']}

In [82]:
formatted_recipe = []

In [85]:
def format_recipe(recipe):
    # Extracting title, ingredients, and directions from the recipe dictionary
    title = recipe['title'].strip()
    ingredients = recipe['ingredients']
    directions = recipe['directions']

    # Capitalizing the first letter of each word in the title
    formatted_title = ' '.join(word.capitalize() for word in title.split())

    # Formatting ingredients
    formatted_ingredients = "\n".join(f"* {ingredient.strip()}" for ingredient in ingredients)

    # Formatting directions
    formatted_directions = "\n".join([f"Step {i+1}: \n {step.strip()}\n" for i, step in enumerate(directions)])

    # Combining all formatted components into the final recipe string
    formatted_recipe = f"""Recipe Title: {formatted_title}

INGREDIENTS

{formatted_ingredients}

COOKING INSTRUCTIONS:

{formatted_directions}"""

    return formatted_recipe

# Example recipe dictionary
recipe = sample_recipe

# Format the recipe
formatted_recipe.append(format_recipe(recipe))

# Print the formatted recipe

print('\n ---------------- \n'.join(formatted_recipe))

Recipe Title: Mississippi Mud Pie

INGREDIENTS

* 2 c. sugar
* 2 sticks margarine
* 1 1/2 c. plain flour
* 4 eggs
* 1 c. chopped pecans
* 3 tsp. vanilla
* 1/3 c. cocoa (or less)
* 1/4 tsp. salt
* 1/2 (10 oz.) pkg. Kraft miniature marshmallows

COOKING INSTRUCTIONS:

Step 1: 
 Cream the sugar and butter.

Step 2: 
 Add eggs and mix well.

Step 3: 
 Sift flour, cocoa and salt together.

Step 4: 
 Add to creamed mixture.

Step 5: 
 Mix well.

Step 6: 
 Add vanilla and nuts.


Recipe Title: Caramel Pumpkin Pie

INGREDIENTS

* 1 12 cups all-purpose flour
* 58 cup cold unsalted butter, cut into 1/2-inch cubes
* 14 teaspoon salt
* 4 -5 tablespoons ice water
* 1 cup sugar
* 13 cup water
* 2 cups heavy cream
* 1 (15 ounce) can solid-pack pumpkin (not pie filling)
* 1 14 teaspoons ground ginger
* 1 14 teaspoons ground cinnamon
* 14 teaspoon freshly grated nutmeg
* 1 pinch ground cloves
* 12 teaspoon salt
* 4 large eggs, lightly beaten

COOKING INSTRUCTIONS:

Step 1: 
 Make the crust: Blend toget

In [36]:
query_embed = model.encode('white chocolate chips')

In [37]:
first_500 = list(set([x[0].lower() for x in ctr.most_common()[0:500]]))
next_500 = list(set([x[0].lower() for x in ctr.most_common()[500:1000]]))

In [87]:
recipe_format = '''
Recipe Title: Zesty Lemon Chicken with Roasted Garlic and Thyme

INGREDIENTS

* 1 pound boneless, skinless chicken breasts
* 1 lemon, zested and juiced
* 2 tablespoons olive oil
* 1 teaspoon dried thyme
* 1 head of garlic, cloves separated and peeled
* Salt and pepper to taste

COOKING INSTRUCTIONS:

Step 1: Prepare the Chicken

Preheat oven to 400°F (200°C).
In a bowl, combine the chicken breasts, lemon zest and juice, olive oil, thyme, and a generous amount of salt and pepper.
Stir to coat the chicken evenly.

Step 2: Roast the Garlic

Place the garlic cloves on a small baking sheet.
Drizzle with olive oil and sprinkle with a pinch of salt.
Roast in the oven for 15-20 minutes, or until golden brown and tender.

Step 3: Cook the Chicken

Transfer the chicken to a baking dish.
Arrange the roasted garlic cloves around the chicken.
Bake in the preheated oven for 25-30 minutes, or until the chicken is cooked through and no longer pink inside.

Step 4: Serve

Remove the chicken from the oven and let it rest for 5 minutes before slicing.
Serve the chicken with the roasted garlic cloves and lemon slices.
Garnish with fresh thyme leaves for extra flavor.
Tips:

For a crispy skin, broil the chicken for the last 5 minutes of cooking.
Add a side of roasted vegetables or a bed of rice for a complete meal.
Use a food processor to quickly zest the lemon.
If you don't have dried thyme, you can use 2 teaspoons of fresh thyme leaves.
'''

In [88]:
with open('config.json','w') as f:
    json.dump({'first_500':first_500,
               'next_500': next_500 ,
               'all_ingredients':list(ctr),
               'recipe_format':recipe_format,
               },f)

In [3]:
with open('config.json') as f:
    config = json.load(f)
config.keys()

dict_keys(['first_500', 'next_500', 'all_ingredients'])

In [6]:
from fuzzywuzzy import fuzz
from itertools import combinations

In [47]:
pd.DataFrame({'0':train[0],'1':train[10]}).T

Unnamed: 0,title,ingredients,directions,link,source,NER,embedding_text
0,Chocolate Fudge(Foolproof),"[18 oz. semi-sweet chocolate chips, 1 can Eagl...",[Melt chocolate and milk in saucepan on low he...,www.cookbooks.com/Recipe-Details.aspx?id=419450,Gathered,"[semi-sweet chocolate chips, milk, salt, vanil...",Chocolate Fudge(Foolproof) ; semi-sweet choc...
1,Pumpkin Pie Squares,"[1 pkg. yellow cake mix (reserve 1 c.), 1/2 c....","[Combine cake mix, 1/2 c. melted margarine and...",www.cookbooks.com/Recipe-Details.aspx?id=46954,Gathered,"[yellow cake, margarine, egg, pumpkin, brown s...","Pumpkin Pie Squares ; yellow cake,margarine,eg..."


In [143]:
len(config['all_ingredients'])

195541

In [79]:
f"{train[0]['directions']}"

"['Melt chocolate and milk in saucepan on low heat.', 'Remove from heat; add other ingredients.', 'Chill 2 hours; cut into squares.']"