In [1]:
import json as json
import pprint as pp
from opensearchpy import OpenSearch
import os
import pickle
import search
import index
import recipe_parser as rp
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

## Index Setup

In [3]:
host = 'api.novasearch.org'
port = 443
user = 'user201' # Add your user name here.
password = 'Lrr1531' # Add your user password here. For testing only. Don't store credentials in code. 
index_name = user

# Create the client with SSL/TLS enabled, but hostname verification disabled.
client = OpenSearch(
    hosts = [{'host': host, 'port': port}],
    http_compress = True, # enables gzip compression for request bodies
    http_auth = (user, password),
    url_prefix = 'opensearch',
    use_ssl = True,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False
)

### Index Creation

In [4]:
index.create_index(client, index_name)


Creating index:
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'user201'}


### Index Deletion


In [3]:
index.delete_index(client, index_name)

{'acknowledged': True}


## Recipes Setup

In [4]:
# check if a pickle file exists

if os.path.exists('recipe_titles.pkl'):
    titles = pickle.load(open('recipe_titles.pkl', 'rb'))
else:
    titles = rp.get_recipe_titles()

if os.path.exists('recipe_descs.pkl'):
    descs = pickle.load(open('recipe_descs.pkl', 'rb'))
else:
    descs = rp.get_recipe_descs()



### Index Recipes

In [6]:
index.index_document(client, index_name, rp.get_recipes())

created: How To Make Chicken Parmesan
created: How to Make Pesto
created: How To Make Corn Tortillas From Scratch
created: How To Make Elote (Mexican Street Corn)
created: How To Make Macarons
created: How To Make Meringue
created: How To Make Handmade Pasta
created: How to Make Perfect Polenta
created: How To Make Miso Soup
created: How to Make Perfect Guacamole
created: How to Cook Trout
created: How to Cook a Turkey
created: How to Cook: Boiled Eggs
created: How To Cook A Perfect Risotto
created: How to Cook: Crispy Tofu
created: How to Cook: Brown Rice
created: How To Cook Salmon in the Oven
created: How To Cook Brown Rice
created: How To Cook Filet Mignon
created: How To Cook Lentils on the Stove
created: Sylvia's World Famous Talked About Spareribs
created: Holiday Eggnog
created: Holiday Pizza
created: Holiday Salad
created: Holiday Seafood Pot
created: Holiday Chicken Salad
created: Holiday milk punch
created: Holiday Manhattan
created: Snickerdoodles I
created: Holiday Pumpkin

# Text-based Search

### Simple query search and response containing title and description

In [7]:
query = "chicken marsala"
search.search_titleTxt(client, index_name, query)


Search results:
{'_shards': {'failed': 0, 'skipped': 0, 'successful': 4, 'total': 4},
 'hits': {'hits': [{'_id': 'Eum0tI4B7xTIhwxmzKB5',
                    '_index': 'user201',
                    '_score': 5.932764,
                    '_source': {'description': 'Our step-by-step recipe for '
                                               'classic chicken Marsala, a '
                                               'delicious yet surprisingly '
                                               'easy one-pot chicken dinner '
                                               'with all the Italian flavor '
                                               'you crave. ',
                                'title': 'How To Make Chicken Marsala at Home'},
                    '_type': '_doc'},
                   {'_id': 'q-m0tI4B7xTIhwxm8aB4',
                    '_index': 'user201',
                    '_score': 3.5119915,
                    '_source': {'description': None,
                          

### Search Recipes with ingredient

In [8]:
recipe = "Doughnut"
search.search_titleTotalTime(client, index_name, recipe)


Search results:
{'_shards': {'failed': 0, 'skipped': 0, 'successful': 4, 'total': 4},
 'hits': {'hits': [{'_id': 'Hum0tI4B7xTIhwxmkZ_e',
                    '_index': 'user201',
                    '_score': 6.642566,
                    '_source': {'duration': 45, 'title': 'Doughnut Tree'},
                    '_type': '_doc'},
                   {'_id': 'b-m0tI4B7xTIhwxm46AO',
                    '_index': 'user201',
                    '_score': 5.7062964,
                    '_source': {'duration': 10,
                                'title': 'Doughnut Strawberry Shortcake'},
                    '_type': '_doc'}],
          'max_score': 6.642566,
          'total': {'relation': 'eq', 'value': 2}},
 'timed_out': False,
 'took': 3}


In [5]:
max_time = 60
search.search_recipeByTime(client, index_name, max_time)


Search results:
{'_shards': {'failed': 0, 'skipped': 0, 'successful': 4, 'total': 4},
 'hits': {'hits': [{'_id': 'hem0tI4B7xTIhwxmbZ6F',
                    '_index': 'user201',
                    '_score': 1.0,
                    '_source': {'duration': 15,
                                'title': 'How to Cook: Boiled Eggs'},
                    '_type': '_doc'},
                   {'_id': 'kOm0tI4B7xTIhwxmcJ4y',
                    '_index': 'user201',
                    '_score': 1.0,
                    '_source': {'duration': 15, 'title': 'Holiday Salad'},
                    '_type': '_doc'},
                   {'_id': 'k-m0tI4B7xTIhwxmcJ7_',
                    '_index': 'user201',
                    '_score': 1.0,
                    '_source': {'duration': 15, 'title': 'Holiday milk punch'},
                    '_type': '_doc'},
                   {'_id': 'num0tI4B7xTIhwxmc56E',
                    '_index': 'user201',
                    '_score': 1.0,
                  

### Text-Based Search using term queries

In [10]:
query = "yogurt"
search.search_titleTxt_terms(client, index_name, query)


Search results:
{'_shards': {'failed': 0, 'skipped': 0, 'successful': 4, 'total': 4},
 'hits': {'hits': [{'_id': 'Jem0tI4B7xTIhwxm0aAS',
                    '_index': 'user201',
                    '_score': 2.937347,
                    '_source': {'description': 'The answer is skewers and '
                                               'yogurt, my friends. Curious? '
                                               "Let's light up the grill and "
                                               'literally put shrimp on the '
                                               'barbie. (Yep, I went there.)',
                                'title': 'How To Grill Juicy, Flavorful '
                                         'Shrimp'},
                    '_type': '_doc'}],
          'max_score': 2.937347,
          'total': {'relation': 'eq', 'value': 1}},
 'timed_out': False,
 'took': 3}


### Text-Based Search using boolean queries


In [11]:
query = "chocolate"
search.search_titleIngredients_bool(client, index_name, query)


Search results:
{'_shards': {'failed': 0, 'skipped': 0, 'successful': 4, 'total': 4},
 'hits': {'hits': [{'_id': 'Rem0tI4B7xTIhwxmm59r',
                    '_index': 'user201',
                    '_score': 6.759122,
                    '_source': {'ingredients': [None,
                                                'flour',
                                                'sugar',
                                                None,
                                                'egg',
                                                'egg',
                                                'milk',
                                                'vanilla extract',
                                                None,
                                                'orange',
                                                'salt',
                                                'butter',
                                                'oil',
                                             

In [13]:
print(titles)

['How To Make Chicken Parmesan', 'How to Make Pesto', 'How To Make Corn Tortillas From Scratch', 'How To Make Elote (Mexican Street Corn)', 'How To Make Macarons', 'How To Make Meringue', 'How To Make Handmade Pasta', 'How to Make Perfect Polenta', 'How To Make Miso Soup', 'How to Make Perfect Guacamole', 'How to Cook Trout', 'How to Cook a Turkey', 'How to Cook: Boiled Eggs', 'How To Cook A Perfect Risotto', 'How to Cook: Crispy Tofu', 'How to Cook: Brown Rice', 'How To Cook Salmon in the Oven', 'How To Cook Brown Rice', 'How To Cook Filet Mignon', 'How To Cook Lentils on the Stove', "Sylvia's World Famous Talked About Spareribs", 'Holiday Eggnog', 'Holiday Pizza', 'Holiday Salad', 'Holiday Seafood Pot', 'Holiday Chicken Salad', 'Holiday milk punch', 'Holiday Manhattan', 'Snickerdoodles I', 'Holiday Pumpkin Bread', 'Yellow Curry Shrimp', 'How to Make a Choco Taco', 'How To Make Lemonade', 'How To Make Tomato Purée', "My New Roots' Life-Changing Loaf of Bread", 'Beer Can Chicken', 'Bee

# Encoding - Dual Encoders

In [6]:
#Mean Pooling - Take average of all tokens
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output.last_hidden_state #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

#Encode text
def encode(texts):
    # Tokenize sentences
    encoded_input = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')

    # Compute token embeddings
    with torch.no_grad():
        model_output = model(**encoded_input, return_dict=True)

    # Perform pooling
    embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

    # Normalize embeddings
    embeddings = F.normalize(embeddings, p=2, dim=1)
    
    return embeddings


# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/msmarco-distilbert-base-v2")
model = AutoModel.from_pretrained("sentence-transformers/msmarco-distilbert-base-v2")





### Index the embeddings

In [13]:
titles_emb = encode(titles)


with open('title_embeddings.pickle', 'wb') as f:
    pickle.dump(titles_emb, f)
    

index.index_titleEmbeddings(client, index_name, titles)


created: How To Make Chicken Parmesan
created: How to Make Pesto
created: How To Make Corn Tortillas From Scratch
created: How To Make Elote (Mexican Street Corn)
created: How To Make Macarons
created: How To Make Meringue
created: How To Make Handmade Pasta
created: How to Make Perfect Polenta
created: How To Make Miso Soup
created: How to Make Perfect Guacamole
created: How to Cook Trout
created: How to Cook a Turkey
created: How to Cook: Boiled Eggs
created: How To Cook A Perfect Risotto
created: How to Cook: Crispy Tofu
created: How to Cook: Brown Rice
created: How To Cook Salmon in the Oven
created: How To Cook Brown Rice
created: How To Cook Filet Mignon
created: How To Cook Lentils on the Stove
created: Sylvia's World Famous Talked About Spareribs
created: Holiday Eggnog
created: Holiday Pizza
created: Holiday Salad
created: Holiday Seafood Pot
created: Holiday Chicken Salad
created: Holiday milk punch
created: Holiday Manhattan
created: Snickerdoodles I
created: Holiday Pumpkin

In [None]:
descs_emb = encode(descs)

with open('desc_embeddings.pickle', 'wb') as f:
   pickle.dump(descs_emb, f)

index.index_descEmbeddings(client, index_name, descs)

### Embedding Title Search

### Title embedding

In [8]:
query = "cake"
query_emb = encode(query)

search.search_titleEmbedding(client, index_name, query_emb)


Search results:
{'_shards': {'failed': 0, 'skipped': 0, 'successful': 4, 'total': 4},
 'hits': {'hits': [{'_id': 'Wum2tI4B7xTIhwxmV6OQ',
                    '_index': 'user201',
                    '_score': 1.7172348,
                    '_source': {'title': 'My Birthday Cake'},
                    '_type': '_doc'},
                   {'_id': 'xOm2tI4B7xTIhwxms6Sd',
                    '_index': 'user201',
                    '_score': 1.689884,
                    '_source': {'title': "A Chocolate Cake That's Got It All"},
                    '_type': '_doc'},
                   {'_id': 'Num2tI4B7xTIhwxmkKQ8',
                    '_index': 'user201',
                    '_score': 1.6858522,
                    '_source': {'title': 'Christmas Cake Cookies'},
                    '_type': '_doc'},
                   {'_id': 'dOm2tI4B7xTIhwxmn6S1',
                    '_index': 'user201',
                    '_score': 1.6794477,
                    '_source': {'title': 'Chocolate Chip C

### Description embedding

In [11]:
query = "chicken marsala"
query_emb = encode(query)

search.search_title_descEmbedding(client, index_name, query_emb)


Search results:
{'_shards': {'failed': 0, 'skipped': 0, 'successful': 4, 'total': 4},
 'hits': {'hits': [{'_id': '4unHtI4B7xTIhwxmeas4',
                    '_index': 'user201',
                    '_score': 1.643873,
                    '_source': {'title': 'Our step-by-step recipe for classic '
                                         'chicken Marsala, a delicious yet '
                                         'surprisingly easy one-pot chicken '
                                         'dinner with all the Italian flavor '
                                         'you crave. '},
                    '_type': '_doc'},
                   {'_id': '0OnHtI4B7xTIhwxmLqqR',
                    '_index': 'user201',
                    '_score': 1.3787874,
                    '_source': {'title': 'Chinese-style sweet and sour '
                                         'chicken, stir-fried with bell '
                                         'peppers and pineapple chunks.'},
                 