# Install Required Libraries

In [8]:
%pip install langchain==0.1.20
%pip install langchain-openai==0.1.7
%pip install pymongo==4.7.2

# Load Environment Variables 

In [None]:
# Create .env file if it doesn't exist
%cp -n .env.example .env

In [1]:
# load the environment variables from .env file
import os

from dotenv import load_dotenv

load_dotenv()

True

# Initialize OpenAI Client

Save the `api_type`, `api_base`, `api_version`, and `api_key` as global variables to avoid the need to supply them later in code.

In [14]:
import openai

openai.api_type = os.getenv("OPENAI_API_TYPE", "azure")
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT", "https://<YOUR-OPENAI-DEPLOYMENT-NAME>.openai.azure.com/")
openai.api_version = os.getenv("OPENAI_API_VERSION", "2023-09-15-preview")
openai.api_key = os.getenv("OPENAI_API_KEY", "<YOUR-DEPLOYMENT-KEY>")

# Intialize the MongoDB Client

In [13]:
from pymongo import MongoClient

# Read and Store Environment variables
mongo_connection_string = os.getenv("AZURE_COSMOS_CONNECTION_STRING", "<YOUR-COSMOS-DB-CONNECTION-STRING>")
collection_name = os.getenv("AZURE_COSMOS_COLLECTION_NAME", "collectionName")
database_name = os.getenv("AZURE_COSMOS_DATABASE_NAME", "DatabaseName")

# Initialize the MongoClient
mongo_client = MongoClient(mongo_connection_string)

# Create the database if it doesn't exist
db = mongo_client[database_name]

# Create the collection if it doesn't exist
collection = db[collection_name]

  mongo_client = MongoClient(mongo_connection_string)


# Load JSON Data

In [15]:
import json

from langchain.docstore.document import Document

SOURCE_FILE_NAME = "./src/data/food_items.json"

def read_data(file_path) -> list[Document]:
    # Load JSON file
    with open(file_path) as file:
        json_data = json.load(file)


    documents = []
    absolute_path = os.path.abspath(file_path)
    # Process each item in the JSON data
    for idx, item in enumerate(json_data):
        documents.append(
            Document(
                page_content=json.dumps(item),
                metadata={'source': absolute_path, 'seq_num': idx+1}
            )
        )

    return documents

In [None]:
json_data = read_data(SOURCE_FILE_NAME)

In [16]:
# Display a sample from the data
print(json_data[1])

page_content='{"category": "Smoothies", "name": "Jimmy Jam Smoothie", "description": "Berries n kale, strawberries, bananas, blueberries kale, tropical fruit blend, and dragon fruit. Our fruity tasty smoothies are blended to perfection.", "price": "5.49 USD"}' metadata={'source': 'C:\\Users\\Khelan Modi\\OneDrive - Microsoft\\Desktop\\Build demo\\build-24-langchain-vcore\\src\\data\\food_items.json', 'seq_num': 2}


# Initialize the Embeddings Client

In [17]:
from langchain_openai import AzureOpenAIEmbeddings

openai_embeddings_model = os.getenv("AZURE_OPENAI_EMBEDDINGS_MODEL_NAME", "text-embedding-ada-002")
openai_embeddings_deployment = os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME", "text-embedding")

azure_openai_embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
    model=openai_embeddings_model,
    azure_deployment=openai_embeddings_deployment,
)

# Generate and Save Embeddings to MongoDB

In [20]:
from langchain_community.vectorstores.azure_cosmos_db import AzureCosmosDBVectorSearch

index_name = os.getenv("AZURE_COSMOS_INDEX_NAME", "indexName")

# Only Run this the first time you open the notebook
# Create embeddings from the data, save to the database and return a connection to MongoDB vCore
vector_store: AzureCosmosDBVectorSearch = AzureCosmosDBVectorSearch.from_documents(
    json_data,
    azure_openai_embeddings,
    collection=collection,
    index_name=index_name,
)

In [11]:
# Run this to connect to the vector store
vector_store: AzureCosmosDBVectorSearch =  AzureCosmosDBVectorSearch.from_connection_string(
    connection_string=mongo_connection_string,
    namespace=f"{database_name}.{collection_name}",
    embedding=azure_openai_embeddings,
)

  client: MongoClient = MongoClient(connection_string, appname=appname)


# Create Vector Index (HNSW)

In [21]:
from langchain_community.vectorstores.azure_cosmos_db import (
    CosmosDBSimilarityType,
    CosmosDBVectorSearchType,
)

# Read more about these variables in detail here. https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search
num_lists = 100
dimensions = 1536
similarity_algorithm = CosmosDBSimilarityType.COS
kind = CosmosDBVectorSearchType.VECTOR_HNSW
m = 16
ef_construction = 64

# Create the collection and the index
vector_store.create_index(
    num_lists, dimensions, similarity_algorithm, kind, m, ef_construction
)

{'raw': {'defaultShard': {'numIndexesBefore': 1,
   'numIndexesAfter': 2,
   'createdCollectionAutomatically': False,
   'ok': 1}},
 'ok': 1}

## Test Vector Search Flow

In [22]:
query = "Beef Bacon"
docs = vector_store.similarity_search(query)
print(docs[0].page_content)

{"category": "Sandwiches", "name": "Bacon Turkey Bravo Sandwich", "description": "Whole (1010 Cal.), Half (500 Cal.) Oven-roasted turkey breast raised without antibiotics, Applewood-smoked bacon, smoked Gouda, emerald greens, vine-ripened tomatoes, signature sauce , salt and pepper on Tomato Basil Bread. Allergens: Contains Wheat, Milk, Egg", "price": "8.79 USD"}


# Initialize the Chat Client

In [14]:
from langchain_openai import AzureChatOpenAI

openai_chat_model = os.getenv("AZURE_OPENAI_CHAT_MODEL_NAME", "gpt-35-turbo")
openai_chat_deployment= os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", "chat-gpt")

azure_openai_chat: AzureChatOpenAI = AzureChatOpenAI(
    model=openai_chat_model,
    azure_deployment=openai_chat_deployment,
)

In [15]:
# Test the chat flow
chat_response = azure_openai_chat.invoke("Tell me a joke")
print(chat_response.content)

Why did the tomato turn red?

Because it saw the salad dressing!


# Create RAG Function

In [16]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder

history_prompt = ChatPromptTemplate.from_messages(
    [
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
        (
            "user",
            """Given the above conversation,
            generate a search query to look up to get information relevant to the conversation""",
        ),
    ]
)

context_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "Answer the user's questions based on the below context:\n\n{context}"),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
    ]
)

In [17]:
limit = 3
score_threshold = 0.5
search_type = "similarity"

In [18]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain.chains.retrieval import create_retrieval_chain
from langchain_core.runnables import Runnable

vector_store_retriever = vector_store.as_retriever(
    search_type=search_type, search_kwargs={"k": limit, "score_threshold": score_threshold}
)


retriever_chain = create_history_aware_retriever(azure_openai_chat, vector_store_retriever, history_prompt)

context_chain = create_stuff_documents_chain(llm=azure_openai_chat, prompt=context_prompt)

rag_chain: Runnable = create_retrieval_chain(
    retriever=retriever_chain,
    combine_docs_chain=context_chain,
)

## Test RAG Flow

In [34]:
first_question = "recommend me a strawberry smoothi"
chat_history = []
response = rag_chain.invoke({"input": first_question, "chat_history": chat_history})
print(response['answer'])

Sure! I would recommend trying the "Aw Shuckie Shuckie Now Smoothie". It contains a triple berry blend, including strawberries, as well as bananas, non-fat yogurt, carrots, and mango. It's priced at 6.49 USD.


In [35]:
from langchain_core.messages import HumanMessage

chat_history.extend([HumanMessage(content=first_question), response["answer"]])

second_question = "What did I just ask you about?"
response = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

In [36]:
print(response['answer'])

You asked for a recommendation for a strawberry smoothie.


# Test with Gradio

In [None]:
%pip install ipywidgets gradio

In [20]:
import gradio as gr


def setup_gradio_interface(chain):    
    with gr.Blocks() as demo_interface:
        chatbot = gr.Chatbot(label="Food Ordering System")
        chat_history = gr.State([])
        lc_chat_history = gr.State([])
        msg = gr.Textbox(label="Your question")
        gr.ClearButton([msg, chatbot])
  
        def fetch_response(message, chat_history, lc_chat_history):
            response = chain.invoke({"question": message, "chat_history": lc_chat_history})
            lc_chat_history.append((message, response['answer']))
            chat_history.append([message, response["answer"]])
            return "", chat_history, lc_chat_history

        msg.submit(fetch_response, inputs=[msg, chatbot, lc_chat_history], outputs=[msg, chatbot, lc_chat_history])
    
    return demo_interface

In [None]:
food_ordering_demo = setup_gradio_interface(rag_chain)
food_ordering_demo.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




# Inline Embeddings Generation

In [None]:
mongo_connection_string = os.getenv("AZURE_COSMOS_CONNECTION_STRING_AUTO_EMBEDDING", "conn_string")
mongo_client = MongoClient(mongo_connection_string)

db_name = os.getenv("AZURE_COSMOS_DATABASE_NAME", "DatabaseName")
db = mongo_client[db_name]

collection_name = os.getenv("AZURE_COSMOS_COLLECTION_NAME", "collectionName")
index_name = os.getenv("AZURE_COSMOS_INDEX_NAME", "indexName")

collection = db[collection_name]

# Insert data
docs = [json.loads(item.page_content) for item in json_data[0:20]]
for doc in docs:
    collection.insert_one(doc)

# Inline generate embeddings
collection.update_many({}, {"$generateEmbeddings": {"description": "embeddings"}})

# Create HNSW index
createIndexCommand = {
    "createIndexes": collection_name,
    "indexes": [
        {
            "key": {"embeddings": "cosmosSearch"},
            "name": "hnsw_index",
            "cosmosSearchOptions": {
                "kind": "vector-hnsw",
                "m": 4,
                "efConstruction": 16,
                "similarity": "COS",
                "dimensions": 1536
            }
        }
    ]
}
db.command(createIndexCommand)


In [10]:
search_pipeline = [ 
    { "$search": { "cosmosSearch": { "query": docs[0]["description"], "k": 5, "path": "embeddings", "efSearch": 100 }}} , 
    { "$project": { "similarityScore": { "$meta": "searchScore" }, "_id":0, "name":1, "description":1 } }
]

results = collection.aggregate(search_pipeline)

for result in results:
    print(f"[Score: {result['similarityScore']:.3f}] {result['name']}: {result['description']}")

[Score: 1.000] Ashunti`Way Smoothie: Fruit n greens, mango bananas, tropical fruit blend, dragon fruit mix, mango, bananas, pineapples, apples, and spinach. Special green with strawberry bananas juice blend . Our fruity tasty smoothies are blended to perfection.
[Score: 0.986] Dayton 500 Smoothie: Tropical fruit blend, dragon fruit mix, mango, bananas, pineapples, apples. Special green juice blend. Our fruity tasty smoothies are blended to perfection.
[Score: 0.973] Tongue Teaser Smoothie: Tropical fruit blend, dragon fruit, pineapples, bananas, mango, apples, spinach, ginger powder. Special green blend, pineapple and ginger smoothies. Our fruity tasty smoothies are blended to perfection.
[Score: 0.967] Tejay Impact Smoothie: Tropical fruit blend, dragon fruit mix, mango, bananas, pineapples, apples, and spinach. Special blue juice blend smoothies.
[Score: 0.961] Jimmy Jam Smoothie: Berries n kale, strawberries, bananas, blueberries kale, tropical fruit blend, and dragon fruit. Our fru