# NutriNudge

## PIP installs

In [None]:
%pip install langchain_community
%pip install langchain
%pip install accelerate
%pip install qdrant_client
%pip install transformers

In [2]:
%pip install gradio

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


## ChatBot

In [16]:
# Imports
from langchain_community.vectorstores import Qdrant
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate
from langchain_community.llms import CTransformers
from langchain.chains import RetrievalQA
from accelerate import Accelerator
from qdrant_client import QdrantClient
from qdrant_client.http import models
import json
import time

accelerator = Accelerator()

allergens_set = {'Soybeans', 'Eggs', 'Celery', 'Pine nuts', 'Peanuts', 'Almonds', 'Shellfish', 'Pork', 'Nuts', 'Anchovies',
                 'Mustard', 'Milk', 'Coconut', 'Strawberries', 'Alcohol', 'Chicken', 'Ghee', 'Fish', 'Cocoa', 'Wheat', 'Oats', 'Dairy', 'Rice'}

# Importing config file
CONFIG = None
with open(".\\..\\config.json") as f:
    CONFIG = json.load(f)
print("[-] Loaded configurations")

# Tokenizer Details
# model_name = CONFIG["token-model-name"]
model_kwargs = {"device": 'cuda'}
encode_kwargs = {"normalize_embeddings": False}

embeddings = HuggingFaceBgeEmbeddings(
    model_kwargs=model_kwargs,
    # model_name = model_name,
    encode_kwargs=encode_kwargs
)

print("[-] Embedding model initialised")

hist = ""

url = CONFIG["vector-db-url"]
api_key = CONFIG["vector-api-key"]
collection_name = CONFIG["vector-collection-name"]

client = QdrantClient(
    url=url,
    api_key=api_key,
    # prefer_grpc=True
)

print("[-] Qdrant Vector Database client started")

db = Qdrant(
    client=client,
    embeddings=embeddings,
    collection_name=collection_name
)

print("[-] Vector embeddings obtained")


def demo_vector_select():
    query = "List the products you have..."
    docs = db.similarity_search_with_score(query=query, k=5)
    doc, score = docs[0]
    print("##########################################################")
    print({"score": score, "content": doc.page_content, "metadata": doc.metadata})
    print("##########################################################")


cpt = """
<s>
[INST]
## System
You are an AI assistant for NutriNudge, an online retail shop specializing in food products. Your role is to offer a personalized shopping experience by suggesting suitable products for users, taking into account their specific allergies. You must filter out products that contain any allergens listed by the user.
The user specific allergens are given in "User Allergies" section. The products present are provided in the "Product Info" section.

## Product Info: 
{context}

------------------------------------------------------------------
## User Allergies:
$user_allergies$

Below provided is the list of you previous conversations, use it to give personilized reponses.
## Chat History:
$chat_history$
------------------------------------------------------------------
## Question: 
{question}

Only return the helpful answer below and nothing else.
[/INST]
## Helpful answer:
</s>
"""


def update_prompt(user_allergies):
    global cpt
    cpt = cpt.replace("$chat_history$", hist)
    print("[-] History added to prompt")
    cpt = cpt.replace("$user_allergies$", str(user_allergies))
    print("[-] Allergies intimated to model")
    prompt = PromptTemplate(template=cpt,
                            input_variables=['context', 'question'])
    print("[-] Prompt formatted")
    return prompt


def retrieval_qa_chain(llm, prompt, db, allergens):
    qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                           chain_type='stuff',
                                           retriever=db.as_retriever(
                                               search_kwargs={'k': 10,
                                                              "filter": models.Filter(
                                                                  must_not=[
                                                                      models.FieldCondition(
                                                                          key="Ingredients",
                                                                          match=models.MatchAny(
                                                                              any=allergens,
                                                                          )
                                                                      )
                                                                  ]
                                                              ), }),
                                           return_source_documents=True,
                                           chain_type_kwargs={
                                               'prompt': prompt},
                                           )
    print("[-] QA chain initialised")

    return qa_chain


def load_llm():
    conf = {"max_new_tokens": 1024, "top_k": 5, "top_p": 0.80,
            "context_length": 5096, "gpu_layers": 10}
    llm = CTransformers(
        model=CONFIG["model-link"],
        model_type=CONFIG["model-name"],
        temperature=0.3,
        config=conf
    )
    llm, conf = accelerator.prepare(llm, conf)
    # llm = AutoModelForCausalLM.from_pretrained(CONFIG["model-link"], model_type="mistral", gpu_layers=0, config=conf, local_files_only=True)
    print("[-] LLM loaded")
    return llm


def qa_bot(user_allergies):
    llm = load_llm()
    qa_prompt = update_prompt(user_allergies)
    qa = retrieval_qa_chain(llm, qa_prompt, db, user_allergies)
    print("[-] Chatbot is online")
    return qa


def final_result(query, user_allergies):
    global hist
    qa_result = qa_bot(user_allergies)
    print("[.] Generating recommendations for the query: "+query)
    res = qa_result.invoke({'query': query})
    print("[.] Response obtained")
    answer = res["result"]
    sources = res["source_documents"]
    with open("res.json", "w") as f:
        f.write(str(res))
    with open("answer.json", "w") as f:
        f.write(str(answer))
    with open("sources.json", "w") as f:
        print("sources:\n", sources)
        f.write(str(sources))
    hist += query + ":" + answer + "\n"
    rfcnc = "Look at the below suggested results..."
    components = []
    if sources:
        for source in sources:
            components.append({
                "title": source.metadata["Food Product"],
                "ingredients": source.metadata["Ingredients"]
            })
    else:
        rfcnc = ""
    response = answer+"\n"+rfcnc
    return {"response": response, "components": components}


def search_product(prompt, user_allergies):
    global cpt
    st = time.time()
    res = final_result(prompt, eval(user_allergies))
    ed = time.time()
    print("&&&&"*10)
    print(cpt)
    print("===="*10)
    print(res)
    print("####"*10)
    print("Time Taken:", ed-st, "secs")
    print("!!!!"*10)
    print("[->] Responding...", res)
    print("[$$] Time Taken:", ed-st, "secs")
    return res


# if __name__ == "__main__":
#     # demo_vector_select()
#     search_product("i want cookie", ["almond", "chocolate"])
#     search_product("i want lemon", [])
#     search_product("Hey do you guys offer refund?", ["chocolate"])


[-] Loaded configurations
[-] Embedding model initialised
[-] Qdrant Vector Database client started
[-] Vector embeddings obtained


In [18]:
chat = gr.Interface(fn=search_product, inputs=["text", "text"], outputs=["text"])
chat.launch(share=True)

Running on local URL:  http://127.0.0.1:7862

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.




## Recipe

In [3]:
from transformers import FlaxAutoModelForSeq2SeqLM
from transformers import AutoTokenizer

MODEL_NAME_OR_PATH = "flax-community/t5-recipe-generation"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH)
model = FlaxAutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME_OR_PATH)

prefix = "items: "
# generation_kwargs = {
#     "max_length": 512,
#     "min_length": 64,
#     "no_repeat_ngram_size": 3,
#     "early_stopping": True,
#     "num_beams": 5,
#     "length_penalty": 1.5,
# }
generation_kwargs = {
    "max_length": 512,
    "min_length": 64,
    "no_repeat_ngram_size": 3,
    "do_sample": True,
    "top_k": 60,
    "top_p": 0.95
}


special_tokens = tokenizer.all_special_tokens
tokens_map = {
    "<sep>": "--",
    "<section>": "\n"
}


def skip_special_tokens(text, special_tokens):
    for token in special_tokens:
        text = text.replace(token, "")

    return text


def target_postprocessing(texts, special_tokens):
    if not isinstance(texts, list):
        texts = [texts]

    new_texts = []
    for text in texts:
        text = skip_special_tokens(text, special_tokens)

        for k, v in tokens_map.items():
            text = text.replace(k, v)

        new_texts.append(text)

    return new_texts


def generation_function(texts):
    _inputs = texts if isinstance(texts, list) else [texts]
    inputs = [prefix + inp for inp in _inputs]
    inputs = tokenizer(
        inputs,
        max_length=256,
        padding="max_length",
        truncation=True,
        return_tensors="jax"
    )

    input_ids = inputs.input_ids
    attention_mask = inputs.attention_mask

    output_ids = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        **generation_kwargs
    )
    generated = output_ids.sequences
    generated_recipe = target_postprocessing(
        tokenizer.batch_decode(generated, skip_special_tokens=False),
        special_tokens
    )
    return generated_recipe


def get_recipe(items):
    res = []
    tmp = ""
    generated = generation_function(items)
    for text in generated:
        sections = text.split("\n")
        for section in sections:
            section = section.strip()
            if section.startswith("title:"):
                section = section.replace("title:", "")
                headline = "TITLE"
            elif section.startswith("ingredients:"):
                section = section.replace("ingredients:", "")
                headline = "INGREDIENTS"
            elif section.startswith("directions:"):
                section = section.replace("directions:", "")
                headline = "DIRECTIONS"

            if headline == "TITLE":
                tmp += f"[{headline}]: {section.strip().capitalize()}\n"
            else:
                section_info = [
                    f"  - {i+1}: {info.strip().capitalize()}" for i, info in enumerate(section.split("--"))]
                tmp += f"[{headline}]:\n"
                tmp += "\n".join(section_info)
        res.append(tmp)
    return res


# ========================================================================================================

if __name__ == "__main__":
    items = [
        "macaroni, butter, salt, bacon, milk, flour, pepper, cream corn",
        "provolone cheese, bacon, bread, ginger"
    ]
    generated = generation_function(items)
    for text in generated:
        sections = text.split("\n")
        for section in sections:
            section = section.strip()
            if section.startswith("title:"):
                section = section.replace("title:", "")
                headline = "TITLE"
            elif section.startswith("ingredients:"):
                section = section.replace("ingredients:", "")
                headline = "INGREDIENTS"
            elif section.startswith("directions:"):
                section = section.replace("directions:", "")
                headline = "DIRECTIONS"

            if headline == "TITLE":
                print(f"[{headline}]: {section.strip().capitalize()}")
            else:
                section_info = [
                    f"  - {i+1}: {info.strip().capitalize()}" for i, info in enumerate(section.split("--"))]
                print(f"[{headline}]:")
                print("\n".join(section_info))

        print("-" * 130)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Search

In [4]:
from qdrant_client import QdrantClient
from qdrant_client.http import models
import json


# Importing config file
CONFIG = None
with open(".\\..\\config.json") as f:
    CONFIG = json.load(f)
print("[-] Loaded configurations")

client = QdrantClient(url=CONFIG["vector-db-url"],
                      api_key=CONFIG["vector-api-key"])


def searchProducts(query, allergens):
    res = client.search(
        collection_name=CONFIG["vector-collection-name"],
        query_filter=models.Filter(
            must=[
                models.FieldCondition(
                    key="Food Product",
                    match=models.MatchValue(
                        value=query,
                    ),
                ),
                models.FieldCondition(
                    key="Ingredients",
                    match=models.MatchAny(
                        any=allergens,
                        must_not=True  # This is the key change, indicating that the ingredients must not match any of the allergens
                    )
                )
            ]
        ),
        search_params=models.SearchParams(hnsw_ef=128, exact=False),
        query_vector=[0.2, 0.1, 0.9, 0.7],
        # limit=3,
        with_payload=["Food Product", "Ingredients", "Allergens"],
    )
    return res


def search_non_allergic_products(query, allergens):
    out = client.scroll(
        collection_name=CONFIG["vector-collection-name"],
        scroll_filter=models.Filter(
            # should=[],
            must=[
                models.FieldCondition(
                    key='metadata."Food Product"', match=models.MatchText(text=query)),
            ],
            must_not=[
                models.FieldCondition(key="metadata.Ingredients", match=models.MatchText(text=allergen)) for allergen in allergens
            ],
        ),
        with_payload=['metadata."Food Product"',
                      "metadata.Ingredients", "metadata.Allergens"],
    )
    res = []
    for i in out[0]:
        ele = i.payload["metadata"]
        if ele not in res:
            res.append(ele)
    return res


if __name__ == "__main__":
    print(search_non_allergic_products("cookie", ["chocolate", "almond"]))


[-] Loaded configurations


## Flask

In [None]:
from flask import Flask, jsonify, request
import pandas as pd
# from recipeBot import get_recipe
# from search_better import search_non_allergic_products
# from mistralBot import search_product

data = pd.read_csv("dataset.csv")

app = Flask(__name__)


@app.route('/')
def home():
    return "This is backend"


@app.route('/search', methods=['POST'])
def search():
    data = request.get_json()
    query = data["query"]
    allergens = data["allergens"]
    res = search_non_allergic_products(query, allergens)
    return jsonify(res), 200


@app.route("/recipe", methods=["POST"])
def recipe():
    data = request.get_json()
    ingredients = data["ingredients"]
    res = get_recipe(ingredients)
    return jsonify({"recipes": res}), 200


@app.route("/chat", methods=["POST"])
def chat():
    data = request.get_json()
    query = data["query"]
    allergies = data["allergies"]
    res = search_product(query, allergies)
    return jsonify(res), 200


if __name__ == '__main__':
    app.run(host="0.0.0.0", debug=True)


## Flask Subprocess

In [9]:
import subprocess
subprocess.Popen(['python','app.py'])

<Popen: returncode: None args: ['python', 'app.py']>

## Data Inspection

In [10]:
import pandas as pd
df = pd.read_csv("dataset.csv")
print(df.head(10))

          Food Product                                        Ingredients  \
0       Almond Cookies                      Almonds, Sugar, Butter, Flour   
1       Ranch Dressing    Buttermilk, Sugar, Vegetable oil, Garlic, herbs   
2      Caramel Popcorn                       Popcorn, Sugar, Butter, Salt   
3        Berry Parfait  Mixed berries, Sugar, Yogurt (milk, cultures),...   
4          Mango Lassi    Mango, Sugar, Yogurt (milk, cultures), Cardamom   
5       Banana Pudding  Bananas, Sugar, Milk, Vanilla pudding mix, coo...   
6     Chocolate Mousse     Chocolate, Sugar, Heavy cream, Vanilla extract   
7  Strawberry Smoothie                  Strawberries, Sugar, Yogurt, Milk   
8        Berry Parfait                    Berries, Honey, Yogurt, Granola   
9       Butter Chicken        Chicken, Sugar, Butter, Tomato sauce, cream   

               Allergens  
0  Almonds, Wheat, Dairy  
1                  Dairy  
2                  Dairy  
3                  Dairy  
4                

# Gradio Tests

In [10]:
%pip install gradio-client

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
from gradio_client import Client

client = Client("http://127.0.0.1:7861/")
def print_res(res):
	print(res)
result = client.submit(
		"cookies",	# str in 'prompt' Textbox component
		["chocolate"],	# str in 'user_allergies' Textbox component
		api_name="/predict",
		result_callbacks=[print_res]
)

  from .autonotebook import tqdm as notebook_tqdm


Loaded as API: http://127.0.0.1:7861/ ✔


ConnectError: [WinError 10061] No connection could be made because the target machine actively refused it

ValueError: None