# Food Match Algorithm for Ratings
Goal: make an algorithm that takes a description and finds the most relevant base rating from a vector database, then uses a simple NER algorithm to scope out a + or -. Very rudimentary, but practical.

In [1]:
import pandas as pd

In [24]:
#First, create the mock database entries by just having foods and seperated by ":"

In [29]:
#Create vector database.
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter

loader = TextLoader("/Users/devam/PycharmProjects/foodRating/food.text")
documents = loader.load()
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=20, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()

db = Chroma.from_documents(docs, embeddings)

Created a chunk of size 21, which is longer than the specified 20


Establish vDB as a retriever.

In [35]:
retriever = db.as_retriever(search_type="similarity")
match = retriever.invoke("Chocolate bar")[0].page_content
match

"Hershey's Milk Chocolate Bar:D"

As seen, fetches semantically closest thing. How do we get the rating and the actual food item? Simple text splitting.

In [44]:
longDescription = "A cookies and cream chocolate bar with a lot of sugar and a decent amount of fat"
match = retriever.invoke(input=longDescription)[0].page_content
match

"Hershey's Milk Chocolate Bar:D"

As seen above, it works. 

In [74]:
from langchain_openai import ChatOpenAI
from langchain_experimental.tools import PythonREPLTool
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass()

In [89]:
model = ChatOpenAI(model = "gpt-3.5-turbo-16k")

In [109]:
from langchain.agents import create_openai_functions_agent

pythonTools = PythonREPLTool()
tools = []
tools.append(pythonTools)

modelWithTools = model.bind_tools(tools)

In [136]:
instructions = ('You are a ratings generator for foods based on their healthyness. Identify all foods or ingredients in the given description, and identify them as healthy or unhealthy. Then, assess the intensifiers for these foods or ingredients. If there are more healthy foods/ingredients than unhealthy ones, and if the intensifiers for the healthy ones are stronger than the unhealthy ones, return a +. Else if  there are more unhealthy foods/ingredients than healthy ones, and if the intensifiers for the unhealthy ones are stronger than the healthy ones, return a -. If there are the same number of unhealthy foods/ingredients as healthy foods/ingreidients, assess the intensifiers and return + if the healthy ones have stronger intensifiers or - if the unhealthy ones have stronger intensifiers. ALWAYS GIVE A DEFINITE RATING (+, - OR NEUTRAL). DO NOT LEAVE THIS OUT. SIMPLY FOLLOW THE INSTRUCTIONS ABOVE, DO NOT TRY TO INTERPRET ANYTHING.'
                'Example output:'
                '[\'a\', \'fiber-packed\', \'cookies\', \'and\', \'cream\', \'bar\', \'with\', \'no\', \'sugar\', \'and\', \'healthy\', \'fat\']\nBased on the given description, here is the assessment of each food item and its intensifier connotation:\n\n1. Fiber-packed: Healthy intensifier\n2. Cookies: Unhealthy food\n3. Cream: Unhealthy food\n4. Bar: Neutral food\n5. No sugar: Healthy intensifier\n6. Healthy fat: Healthy intensifier\n\nNow, let\'s count the number of healthy and unhealthy food items:\n\nHealthy foods/ingredients: 3 (Fiber-packed, No sugar, Healthy fat)\nUnhealthy foods/ingredients: 2 (Cookies, Cream)\n\nNext, let\'s assess the intensifiers:\n\nHealthy intensifiers: 2 (Fiber-packed, No sugar)\nUnhealthy intensifiers: 0\n\nSince there are more healthy foods/ingredients than unhealthy ones, and the intensifiers for the healthy ones are stronger than the unhealthy ones, the overall rating for this description is "+".'
                ''
                ''
                '')

In [137]:
from langchain import hub
base_prompt = hub.pull("langchain-ai/openai-functions-template")
prompt = base_prompt.partial(instructions=instructions)
agent = create_openai_functions_agent(llm=model, tools=tools, prompt=prompt)

In [138]:
from langchain.agents import AgentExecutor
agentExecutor = AgentExecutor(agent=agent, tools=tools, prompt=prompt)

In [139]:
agentExecutor.invoke({"input":"Assess whether this description should return a + or - by assessing all food items and their respective intensifier connotations as seen in the prompt: A cookies and cream chocolate bar with a lot of sugar and a decent amount of fat"})['output']

'[\'A\', \'cookies\', \'and\', \'cream\', \'chocolate\', \'bar\', \'with\', \'a\', \'lot\', \'of\', \'sugar\', \'and\', \'a\', \'decent\', \'amount\', \'of\', \'fat\']\nBased on the given description, here is the assessment of each food item and its intensifier connotation:\n\n1. Cookies: Unhealthy food\n2. Cream: Unhealthy food\n3. Chocolate: Unhealthy food\n4. Bar: Neutral food\n5. Sugar: Unhealthy intensifier\n6. Fat: Unhealthy intensifier\n\nNow, let\'s count the number of healthy and unhealthy food items:\n\nHealthy foods/ingredients: 0\nUnhealthy foods/ingredients: 3 (Cookies, Cream, Chocolate)\n\nNext, let\'s assess the intensifiers:\n\nHealthy intensifiers: 0\nUnhealthy intensifiers: 2 (Sugar, Fat)\n\nSince there are more unhealthy foods/ingredients than healthy ones, and the intensifiers for the unhealthy ones are stronger than the healthy ones, the overall rating for this description is "-".'

In [140]:
agentExecutor.invoke({"input":"Assess whether this description should return a + or - by assessing all food items and their respective intensifier connotations as seen in the prompt: A cookies and cream chocolate bar with no sugar and some fat"})['output']


'[\'A\', \'cookies\', \'and\', \'cream\', \'chocolate\', \'bar\', \'with\', \'no\', \'sugar\', \'and\', \'some\', \'fat\']\nBased on the given description, here is the assessment of each food item and its intensifier connotation:\n\n1. Cookies: Unhealthy food\n2. Cream: Unhealthy food\n3. Chocolate: Unhealthy food\n4. Bar: Neutral food\n5. No sugar: Healthy intensifier\n6. Some fat: Healthy intensifier\n\nNow, let\'s count the number of healthy and unhealthy food items:\n\nHealthy foods/ingredients: 2 (No sugar, Some fat)\nUnhealthy foods/ingredients: 3 (Cookies, Cream, Chocolate)\n\nNext, let\'s assess the intensifiers:\n\nHealthy intensifiers: 2 (No sugar, Some fat)\nUnhealthy intensifiers: 0\n\nSince there are more unhealthy foods/ingredients than healthy ones, and there are no unhealthy intensifiers, the overall rating for this description is "-".'

In [141]:
agentExecutor.invoke({"input":"Assess whether this description should return a + or - by assessing all food items and their respective intensifier connotations as seen in the prompt: A fiber-packed cookies and cream  bar with no sugar and healthy fat"})['output']


'[\'A\', \'fiber-packed\', \'cookies\', \'and\', \'cream\', \'bar\', \'with\', \'no\', \'sugar\', \'and\', \'healthy\', \'fat\']\nBased on the given description, here is the assessment of each food item and its intensifier connotation:\n\n1. Fiber-packed: Healthy intensifier\n2. Cookies: Unhealthy food\n3. Cream: Unhealthy food\n4. Bar: Neutral food\n5. No sugar: Healthy intensifier\n6. Healthy fat: Healthy intensifier\n\nNow, let\'s count the number of healthy and unhealthy food items:\n\nHealthy foods/ingredients: 3 (Fiber-packed, No sugar, Healthy fat)\nUnhealthy foods/ingredients: 2 (Cookies, Cream)\n\nNext, let\'s assess the intensifiers:\n\nHealthy intensifiers: 2 (Fiber-packed, No sugar)\nUnhealthy intensifiers: 0\n\nSince there are more healthy foods/ingredients than unhealthy ones, and the intensifiers for the healthy ones are stronger than the unhealthy ones, the overall rating for this description is "+".'