In [1]:
import json as json
import pprint as pp
from opensearchpy import OpenSearch
import os
import pickle
import search
import index
import recipe_parser as rp
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

## Index Setup

In [2]:
host = 'api.novasearch.org'
port = 443
user = 'user201' # Add your user name here.
password = 'Lrr1531' # Add your user password here. For testing only. Don't store credentials in code. 
index_name = user

# Create the client with SSL/TLS enabled, but hostname verification disabled.
client = OpenSearch(
    hosts = [{'host': host, 'port': port}],
    http_compress = True, # enables gzip compression for request bodies
    http_auth = (user, password),
    url_prefix = 'opensearch',
    use_ssl = True,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False
)

### Index Deletion


In [3]:
index.delete_index(client, index_name)

{'acknowledged': True}


### Index Creation

In [4]:
index.create_index(client, index_name)


Creating index:
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'user201'}


## Recipes Setup

In [5]:
# check if a pickle file exists

if os.path.exists('recipe_titles.pkl'):
    titles = pickle.load(open('recipe_titles.pkl', 'rb'))
else:
    titles = rp.get_recipe_titles()

if os.path.exists('recipe_descs.pkl'):
    descs = pickle.load(open('recipe_descs.pkl', 'rb'))
else:
    descs = rp.get_recipe_descs()



### Index Recipes

In [6]:
index.index_document(client, index_name, rp.get_recipes())

created: How To Make Chicken Parmesan
created: How to Make Pesto
created: How To Make Corn Tortillas From Scratch
created: How To Make Elote (Mexican Street Corn)
created: How To Make Macarons
created: How To Make Meringue
created: How To Make Handmade Pasta
created: How to Make Perfect Polenta
created: How To Make Miso Soup
created: How to Make Perfect Guacamole
created: How to Cook Trout
created: How to Cook a Turkey
created: How to Cook: Boiled Eggs
created: How To Cook A Perfect Risotto
created: How to Cook: Crispy Tofu
created: How to Cook: Brown Rice
created: How To Cook Salmon in the Oven
created: How To Cook Brown Rice
created: How To Cook Filet Mignon
created: How To Cook Lentils on the Stove
created: Sylvia's World Famous Talked About Spareribs
created: Holiday Eggnog
created: Holiday Pizza
created: Holiday Salad
created: Holiday Seafood Pot
created: Holiday Chicken Salad
created: Holiday milk punch
created: Holiday Manhattan
created: Snickerdoodles I
created: Holiday Pumpkin

# Text-based Search

### Simple query search and response containing title and description

In [7]:
query = "chicken marsala"
search.search_titleTxt(client, index_name, query)


Search results:
{'_shards': {'failed': 0, 'skipped': 0, 'successful': 4, 'total': 4},
 'hits': {'hits': [{'_id': 'G-kKto4B7xTIhwxmzbjd',
                    '_index': 'user201',
                    '_score': 5.7071795,
                    '_source': {'description': 'Our step-by-step recipe for '
                                               'classic chicken Marsala, a '
                                               'delicious yet surprisingly '
                                               'easy one-pot chicken dinner '
                                               'with all the Italian flavor '
                                               'you crave. ',
                                'title': 'How To Make Chicken Marsala at Home'},
                    '_type': '_doc'},
                   {'_id': 'cekKto4B7xTIhwxmqbfl',
                    '_index': 'user201',
                    '_score': 3.420907,
                    '_source': {'description': None,
                          

### Search Recipes with duration

In [8]:
recipe = "Doughnut"
search.search_titleTotalTime(client, index_name, recipe)


Search results:
{'_shards': {'failed': 0, 'skipped': 0, 'successful': 4, 'total': 4},
 'hits': {'hits': [{'_id': 'KukKto4B7xTIhwxmm7dL',
                    '_index': 'user201',
                    '_score': 6.5513535,
                    '_source': {'duration': 45, 'title': 'Doughnut Tree'},
                    '_type': '_doc'},
                   {'_id': 'eOkKto4B7xTIhwxm4LjA',
                    '_index': 'user201',
                    '_score': 5.894712,
                    '_source': {'duration': 10,
                                'title': 'Doughnut Strawberry Shortcake'},
                    '_type': '_doc'}],
          'max_score': 6.5513535,
          'total': {'relation': 'eq', 'value': 2}},
 'timed_out': False,
 'took': 3}


### Search Recipes by duration

In [9]:
max_time = 60
search.search_recipeByTime(client, index_name, max_time)


Search results:
{'_shards': {'failed': 0, 'skipped': 0, 'successful': 4, 'total': 4},
 'hits': {'hits': [{'_id': 'LukKto4B7xTIhwxmnLdG',
                    '_index': 'user201',
                    '_score': 1.0,
                    '_source': {'duration': 55,
                                'title': 'Chicken with Shallots, Prunes, and '
                                         'Armagnac'},
                    '_type': '_doc'},
                   {'_id': 'NekKto4B7xTIhwxmnber',
                    '_index': 'user201',
                    '_score': 1.0,
                    '_source': {'duration': 30,
                                'title': 'Cauliflower-Chorizo Burritos'},
                    '_type': '_doc'},
                   {'_id': 'NukKto4B7xTIhwxmnbfg',
                    '_index': 'user201',
                    '_score': 1.0,
                    '_source': {'duration': 30,
                                'title': 'Chorizo Breakfast Burrito'},
                    '_type': '_doc

### Text-Based Search using term queries

In [10]:
query = "yogurt"
search.search_titleTxt_terms(client, index_name, query)


Search results:
{'_shards': {'failed': 0, 'skipped': 0, 'successful': 4, 'total': 4},
 'hits': {'hits': [{'_id': 'LukKto4B7xTIhwxm0biT',
                    '_index': 'user201',
                    '_score': 2.8551745,
                    '_source': {'description': 'The answer is skewers and '
                                               'yogurt, my friends. Curious? '
                                               "Let's light up the grill and "
                                               'literally put shrimp on the '
                                               'barbie. (Yep, I went there.)',
                                'title': 'How To Grill Juicy, Flavorful '
                                         'Shrimp'},
                    '_type': '_doc'}],
          'max_score': 2.8551745,
          'total': {'relation': 'eq', 'value': 1}},
 'timed_out': False,
 'took': 3}


### Text-Based Search using boolean queries


In [11]:
query = "chocolate"
search.search_titleIngredients_bool(client, index_name, query)


Search results:
{'_shards': {'failed': 0, 'skipped': 0, 'successful': 4, 'total': 4},
 'hits': {'hits': [{'_id': 'UOkKto4B7xTIhwxmo7cA',
                    '_index': 'user201',
                    '_score': 10.00655,
                    '_source': {'ingredients': [None,
                                                'chocolate',
                                                None,
                                                None,
                                                None,
                                                None,
                                                None],
                                'title': 'Bat and Cat Doughnuts'},
                    '_type': '_doc'},
                   {'_id': 'l-kKto4B7xTIhwxm5rj5',
                    '_index': 'user201',
                    '_score': 10.00655,
                    '_source': {'ingredients': ['egg',
                                                'chocolate',
                                          

In [12]:
print(titles)

['How To Make Chicken Parmesan', 'How to Make Pesto', 'How To Make Corn Tortillas From Scratch', 'How To Make Elote (Mexican Street Corn)', 'How To Make Macarons', 'How To Make Meringue', 'How To Make Handmade Pasta', 'How to Make Perfect Polenta', 'How To Make Miso Soup', 'How to Make Perfect Guacamole', 'How to Cook Trout', 'How to Cook a Turkey', 'How to Cook: Boiled Eggs', 'How To Cook A Perfect Risotto', 'How to Cook: Crispy Tofu', 'How to Cook: Brown Rice', 'How To Cook Salmon in the Oven', 'How To Cook Brown Rice', 'How To Cook Filet Mignon', 'How To Cook Lentils on the Stove', "Sylvia's World Famous Talked About Spareribs", 'Holiday Eggnog', 'Holiday Pizza', 'Holiday Salad', 'Holiday Seafood Pot', 'Holiday Chicken Salad', 'Holiday milk punch', 'Holiday Manhattan', 'Snickerdoodles I', 'Holiday Pumpkin Bread', 'Yellow Curry Shrimp', 'How to Make a Choco Taco', 'How To Make Lemonade', 'How To Make Tomato Purée', "My New Roots' Life-Changing Loaf of Bread", 'Beer Can Chicken', 'Bee

# Encoding - Dual Encoders

In [13]:
#Mean Pooling - Take average of all tokens
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output.last_hidden_state #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

#Encode text
def encode(texts):
    # Tokenize sentences
    encoded_input = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')

    # Compute token embeddings
    with torch.no_grad():
        model_output = model(**encoded_input, return_dict=True)

    # Perform pooling
    embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

    # Normalize embeddings
    embeddings = F.normalize(embeddings, p=2, dim=1)
    
    return embeddings


# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/msmarco-distilbert-base-v2")
model = AutoModel.from_pretrained("sentence-transformers/msmarco-distilbert-base-v2")





tokenizer_config.json:   0%|          | 0.00/440 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/545 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/265M [00:00<?, ?B/s]

### Index the embeddings

In [14]:
titles_emb = encode(titles)


with open('title_embeddings.pickle', 'wb') as f:
    pickle.dump(titles_emb, f)
    

index.index_titleEmbeddings(client, index_name, titles)


created: How To Make Chicken Parmesan
created: How to Make Pesto
created: How To Make Corn Tortillas From Scratch
created: How To Make Elote (Mexican Street Corn)
created: How To Make Macarons
created: How To Make Meringue
created: How To Make Handmade Pasta
created: How to Make Perfect Polenta
created: How To Make Miso Soup
created: How to Make Perfect Guacamole
created: How to Cook Trout
created: How to Cook a Turkey
created: How to Cook: Boiled Eggs
created: How To Cook A Perfect Risotto
created: How to Cook: Crispy Tofu
created: How to Cook: Brown Rice
created: How To Cook Salmon in the Oven
created: How To Cook Brown Rice
created: How To Cook Filet Mignon
created: How To Cook Lentils on the Stove
created: Sylvia's World Famous Talked About Spareribs
created: Holiday Eggnog
created: Holiday Pizza
created: Holiday Salad
created: Holiday Seafood Pot
created: Holiday Chicken Salad
created: Holiday milk punch
created: Holiday Manhattan
created: Snickerdoodles I
created: Holiday Pumpkin

In [15]:
descs_emb = encode(descs)

with open('desc_embeddings.pickle', 'wb') as f:
   pickle.dump(descs_emb, f)

index.index_descEmbeddings(client, index_name, descs)

created: Master the classic dish of chicken Parmesan by starting with the chicken, choosing a marinara sauce you love, and using a trio of cheese. 
created: Spread it on sandwiches, toss it with pasta, or treat yourself a single happy spoonful, but definitely absolutely positively make pesto any chance you get.
created: Corn tortillas are made with just two ingredients: masa harina and water. What could be easier? Here's how to make tortillas for the best-ever tacos!
created: NYFL94QYZJ
created: AOYYV08HZL
created: With a crisp outer shell, slightly chewy center, and a subtle sweetness, baked meringue is a melt-in-your-mouth delight.
created: 71GSPAQ5HE
created: NXI4W9IQ4X
created: ZZ1RSSQBJO
created: The BEST guacamole! So easy to make with ripe avocados, salt, serrano chiles, cilantro and lime. Garnish with red radishes or jicama. Serve with tortilla chips. Watch how to make guacamole - it's easy!
created: GSL04VKVQV
created: PFVS27LNF6
created: Nobody likes overcooked, rubbery break

### Embedding Title Search

### Title embedding

In [16]:
query = "cake"
query_emb = encode(query)

search.search_titleEmbedding(client, index_name, query_emb)


Search results:
{'_shards': {'failed': 0, 'skipped': 0, 'successful': 4, 'total': 4},
 'hits': {'hits': [{'_id': 'WekLto4B7xTIhwxm67sc',
                    '_index': 'user201',
                    '_score': 1.7172348,
                    '_source': {'title': 'My Birthday Cake'},
                    '_type': '_doc'},
                   {'_id': 'w-kMto4B7xTIhwxmPrxs',
                    '_index': 'user201',
                    '_score': 1.6898841,
                    '_source': {'title': "A Chocolate Cake That's Got It All"},
                    '_type': '_doc'},
                   {'_id': 'NekMto4B7xTIhwxmHrw3',
                    '_index': 'user201',
                    '_score': 1.6858522,
                    '_source': {'title': 'Christmas Cake Cookies'},
                    '_type': '_doc'},
                   {'_id': 'c-kMto4B7xTIhwxmLLyo',
                    '_index': 'user201',
                    '_score': 1.6794477,
                    '_source': {'title': 'Chocolate Chip 

### Description embedding

In [17]:
query = "chicken marsala"
query_emb = encode(query)

search.search_title_descEmbedding(client, index_name, query_emb)


Search results:
{'_shards': {'failed': 0, 'skipped': 0, 'successful': 4, 'total': 4},
 'hits': {'hits': [{'_id': '3-kOto4B7xTIhwxmYL98',
                    '_index': 'user201',
                    '_score': 1.6438727,
                    '_source': {'title': 'Our step-by-step recipe for classic '
                                         'chicken Marsala, a delicious yet '
                                         'surprisingly easy one-pot chicken '
                                         'dinner with all the Italian flavor '
                                         'you crave. '},
                    '_type': '_doc'},
                   {'_id': 'zekOto4B7xTIhwxmIb44',
                    '_index': 'user201',
                    '_score': 1.3787875,
                    '_source': {'title': 'Chinese-style sweet and sour '
                                         'chicken, stir-fried with bell '
                                         'peppers and pineapple chunks.'},
                