In case if you want to discover thought process and prototyping of final solution

In [None]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing_extensions import List, TypedDict

file_path = "cocktails.csv"
loader = CSVLoader(file_path=file_path,
                   # content_columns=["name", "alcoholic", "category", "glassType",
                   #                  "instructions", "ingredients", "ingredientMeasures"],
                   # source_column="drink_name"
                   )
data = loader.load_and_split()

In [178]:
print(data[0])

page_content='drink_name: A1
is_alcoholic: Alcoholic
category: Cocktail
glassType: Cocktail glass
drink_ingredients: Gin, Grand Marnier, Lemon Juice, Grenadine
drink_instructions: Pour all ingredients into a cocktail shaker, mix and serve over ice into a chilled glass.
ingredientMeasures: ['1 3/4 shot ', '1 Shot ', '1/4 Shot', '1/8 Shot']' metadata={'source': 'drinks.csv', 'row': 0}


In [4]:
import pandas as pd
df = pd.read_csv("final_cocktails.csv")

In [5]:
df = df.drop(columns=['Unnamed: 0', 'id', 'drinkThumbnail', 'text'])
# df = df.rename(columns={"name": "drink_name", "alcoholic": "is_alcoholic", "instructions": "drink_instructions", "ingredients": "drink_ingredients"})
df["ingredients"] = df["ingredients"].apply(lambda x: ", ".join(eval(x)))

In [7]:
from langchain_community.document_loaders import DataFrameLoader
loader = DataFrameLoader(df, page_content_column="name")
data2 = loader.load_and_split()

In [8]:
print(data2[0])

page_content='A1' metadata={'alcoholic': 'Alcoholic', 'category': 'Cocktail', 'glassType': 'Cocktail glass', 'instructions': 'Pour all ingredients into a cocktail shaker, mix and serve over ice into a chilled glass.', 'ingredients': 'Gin, Grand Marnier, Lemon Juice, Grenadine', 'ingredientMeasures': "['1 3/4 shot ', '1 Shot ', '1/4 Shot', '1/8 Shot']"}


In [130]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever

metadata_field_info = [
    AttributeInfo(
        name="alcoholic",
        description="Indicates whether the drink is alcoholic or non-alcoholic",
        type="string",
    ),
    AttributeInfo(
        name="category",
        description="Type of drink (e.g., Cocktail, Ordinary Drink)",
        type="string",
    ),
    AttributeInfo(
        name="glassType",
        description="Recommended glass type for serving",
        type="string",
    ),
    AttributeInfo(
        name="instructions",
        description="Preparation instructions for the drink",
        type="string",
    ),
    AttributeInfo(
        name="ingredients",
        description="List of ingredients used in the recipe",
        type="string",
    ),
    AttributeInfo(
        name="ingredientMeasures",
        description="Measurements corresponding to each ingredient",
        type="string",
    ),
]
document_content_description = "Name of the drink"

In [131]:
from typing import Any, Dict, List


class CustomSelfQueryRetriever(SelfQueryRetriever):
    def _get_docs_with_query(
        self, query: str, search_kwargs: Dict[str, Any]
    ) -> List[Document]:
        """Get docs, adding score information."""
        docs, scores = zip(
            *self.vectorstore.similarity_search_with_score(query, **search_kwargs)
        )
        for doc, score in zip(docs, scores):
            doc.metadata["score"] = score

        return docs

In [225]:
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, separators=[''])
# all_splits = text_splitter.split_documents(data)

In [186]:
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [83]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)
_ = vector_store.add_documents(documents=data)

In [209]:
from langchain.vectorstores import FAISS

vector_store = FAISS.from_documents(data, embeddings)

In [210]:
from pinecone import Pinecone, ServerlessSpec
import time

if not os.getenv("PINECONE_API_KEY"):
    os.environ["PINECONE_API_KEY"] = ""
pinecone_api_key = os.getenv("PINECONE_API_KEY")
pc = Pinecone(api_key=pinecone_api_key)

index_name = "cocktails"

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [211]:
from langchain_pinecone import PineconeVectorStore

cocktails_vector_store = PineconeVectorStore.from_documents(
    data, index_name="cocktails", embedding=embeddings
)

# vectorstore = PineconeVectorStore(index_name="drinks-info", embedding=embeddings)

In [212]:
retriever = cocktails_vector_store.as_retriever(
    # search_type="similarity_score_threshold", search_kwargs={"score_threshold": .4, "k": 20}
    search_kwargs={"k": 20}
)

In [215]:
from langchain.chains import RetrievalQA
local_qa_chain = RetrievalQA.from_chain_type(
    llm = llm,
    retriever=retriever
)

In [216]:
res = local_qa_chain.invoke("What are the 5 cocktails containing lemon juice?")
print(res["result"])

Here are 5 cocktails that contain lemon juice:

1. New York Lemonade
2. Havana Cocktail
3. Ice Pick
4. Mojito
5. Lemon Elderflower Spritzer


In [217]:
res = local_qa_chain.invoke("What are the 5 non-alcoholic cocktails containing sugar in ingredients?")
print(res['result'])

The following 5 non-alcoholic drinks contain sugar:

1. Cranberry Punch
2. Lassi - Sweet
3. Fruit Cooler
4. Egg Nog - Healthy
5. Egg Nog #4


In [218]:
res = local_qa_chain.invoke("Recommend a cocktail similar to “Hot Creamy Bush”")
print(res['result'])

The "Hot Toddy" is similar to the "Hot Creamy Bush" as they are both hot alcoholic drinks served in an Irish coffee cup.  Both also contain whiskey.  The main difference is that the Hot Toddy includes honey, cinnamon, lemon, and cloves in addition to the whiskey, while the Hot Creamy Bush uses Baileys and coffee.


In [219]:
user_input = "My favorite ingredients are milk and honey"
user_vector_store = PineconeVectorStore(index_name="user-memories", embedding=embeddings)
document_1 = Document(
    page_content=user_input,
    metadata={"source": "user"},
)
user_vector_store.add_documents(documents=[document_1])

['aa9f6de1-7ee3-49c6-adc4-54e4bee343fe']

In [270]:
from langchain_core.tools import tool

@tool
def user_mem_retrieve(query: str):
    """Retrieve information about user favorite cocktails and ingredients."""
    retrieved_docs = user_vector_store.similarity_search(query, k=10)
    serialized = "\n\n".join(
        f"Source: {doc.metadata}\n" f"Content: {doc.page_content}"
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

In [257]:
@tool
def cocktails_retrieve(query: str):
    """Retrieve information about cocktails from local document database."""
    retrieved_docs = cocktails_vector_store.similarity_search(query, k=20)
    serialized = "\n\n".join(
        f"Source: {doc.metadata}\n" f"Content: {doc.page_content}"
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

In [271]:
from langchain_core.messages import SystemMessage
from langgraph.prebuilt import create_react_agent

system_message = "You are a cocktail advisor."

model = ChatGoogleGenerativeAI(model="gemini-1.5-pro")

langgraph_agent_executor = create_react_agent(
    model, [user_mem_retrieve, cocktails_retrieve], state_modifier=system_message
)

In [314]:
# messages = langgraph_agent_executor.invoke({"messages": [("user", "What are my favourite ingredients?")]})
messages = langgraph_agent_executor.invoke({"messages": [("user", "Recommend 5 cocktails that contain my favourite ingredients")]})

In [315]:
for mes in messages["messages"]:
    mes.pretty_print()


Recommend 5 cocktails that contain my favourite ingredients
Tool Calls:
  user_mem_retrieve (d002b6e4-3ca2-4540-9f2f-cdf7b4614b46)
 Call ID: d002b6e4-3ca2-4540-9f2f-cdf7b4614b46
  Args:
    query: favorite ingredients
Name: user_mem_retrieve

("Source: {'source': 'user'}\nContent: My favorite ingredients are milk and honey\n\nSource: {'source': 'user'}\nContent: I like lime", [Document(id='aa9f6de1-7ee3-49c6-adc4-54e4bee343fe', metadata={'source': 'user'}, page_content='My favorite ingredients are milk and honey'), Document(id='e3eb009d-f43f-46c8-aac9-cd8a98a00cb3', metadata={'source': 'user'}, page_content='I like lime')])
Tool Calls:
  cocktails_retrieve (2ed32c57-9742-4baa-9b1b-aca08bd5a65c)
 Call ID: 2ed32c57-9742-4baa-9b1b-aca08bd5a65c
  Args:
    query: milk AND honey AND lime
Name: cocktails_retrieve

("Source: {'row': 153.0, 'source': 'drinks.csv'}\nContent: drink_name: Honey Bee\nis_alcoholic: Alcoholic\ncategory: Cocktail\nglassType: Margarita glass\ndrink_ingredients: White

In [289]:
print(messages["messages"][-1].content)

Based on your liking for milk, honey and lime, I recommend the following cocktails:

1. Honey Bee: A refreshing mix of white rum, honey, and lemon juice.
2. Sweet Bananas: A creamy blend of milk, banana, and honey.
3. Rum Milk Punch: A comforting combination of light rum, milk, powdered sugar, and a sprinkle of nutmeg.
4. Jamaica Kiss: An indulgent mix of coffee liqueur, light rum, milk, and ice, blended until smooth.
5. Limeade: A simple and thirst-quenching non-alcoholic drink made with lime, sugar, and soda water. 


"content='Based on your liking for milk, honey and lime, I recommend the following cocktails:\\n\\n1. Honey Bee: A refreshing mix of white rum, honey, and lemon juice.\\n2. Sweet Bananas: A creamy blend of milk, banana, and honey.\\n3. Rum Milk Punch: A comforting combination of light rum, milk, powdered sugar, and a sprinkle of nutmeg.\\n4. Jamaica Kiss: An indulgent mix of coffee liqueur, light rum, milk, and ice, blended until smooth.\\n5. Limeade: A simple and thirst-quenching non-alcoholic drink made with lime, sugar, and soda water. ' additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []} id='run-d2838e68-6402-41a5-831b-c74bddc89205-0' usage_metadata={'input_tokens': 6684, 'output_tokens': 132, 'total_tokens': 6816, 'input_token_details': {'cache_read': 0}}"

In [253]:
user_input = "I like lime"

document_1 = Document(
    page_content=user_input,
    metadata={"source": "user"},
)

from langchain_core.prompts import ChatPromptTemplate
user_vector_store.add_documents(documents=[document_1])

['e3eb009d-f43f-46c8-aac9-cd8a98a00cb3']

In [297]:
from pydantic import BaseModel, Field
class Preferences(BaseModel):
    """Information about user favorite cocktails and ingredients."""
    name: str = Field(
        ..., description="Name of cocktail or ingredient"
    )
    type: str = Field(
        ..., description="Is it ingredient or cocktail?"
    )
    attitude: str = Field(
        ..., description="How does the user feel about it?"
    )

class ExtractionData(BaseModel):
    """Extracted information about  user favorite cocktails or ingredients."""
    favorite_things: List[Preferences]

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert at identifying user favorite ingredients and cocktails in text."
            "Only extract favorite ingredients and cocktails. Extract nothing if no important information can be found in the text.",
        ),
        ("human", "{text}"),
    ]
)

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
extractor = prompt | llm.with_structured_output(
    schema=ExtractionData,
    include_raw=False,
)

In [304]:
res = extractor.invoke(user_input)

In [312]:
for preference in res.favorite_things:
    document = Document(
        page_content=str(preference.content),
        metadata={"source": "user"},
    )
    user_vector_store.add_documents(documents=[document])

name='lime' type='ingredient' attitude='like'
name='vodka' type='ingredient' attitude='like'


In [251]:
messages = langgraph_agent_executor.invoke({"messages": [("user", "I like lime")]})

In [175]:
# from langgraph.graph import START, StateGraph
#
# # Define prompt for question-answering
# prompt = hub.pull("rlm/rag-prompt")
#
#
# # Define state for application
# class State(TypedDict):
#     question: str
#     context: List[Document]
#     answer: str
#
#
# # Define application steps
# def retrieve(state: State):
#     retrieved_docs = vector_store.similarity_search(state["question"], k=10)
#     print(retrieved_docs)
#     return {"context": retrieved_docs}
#
#
# def generate(state: State):
#     docs_content = "\n\n".join(doc.page_content for doc in state["context"])
#     messages = prompt.invoke({"question": state["question"], "context": docs_content})
#     response = llm.invoke(messages)
#     return {"answer": response.content}
#
#
# # Compile application and test
# graph_builder = StateGraph(State).add_sequence([retrieve, generate])
# graph_builder.add_edge(START, "retrieve")
# graph = graph_builder.compile()



In [182]:
# response = graph.invoke({"question": "What are the 5 cocktails containing lemon?"})
# print(response["answer"])

In [183]:
# response = graph.invoke({"question": "Which ingredients are in Frisco Sour?"})
# print(response["answer"])

In [184]:
# response = graph.invoke({"question": "What are the 5 non-alcoholic drinks containing sugar?"})
# print(response["answer"])

In [185]:
# response = graph.invoke({"question": "What is A1?"})
# print(response["answer"])