In [23]:
# load embedding from azure
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

azure_openai_api_key = os.getenv("AZURE_OPENAI_API_KEY_4")
azure_openai_api_endpoint = os.getenv("AZURE_OPENAI_API_ENDPOINT_4")
deployment_name = os.getenv("AZURE_DEPLOYMENT_NAME_4")


In [24]:
# instanciate LLM
from langchain.chat_models import AzureChatOpenAI

llm = AzureChatOpenAI(api_key=azure_openai_api_key,
                      azure_deployment=deployment_name,
                      api_version="2023-12-01-preview",
                      azure_endpoint=azure_openai_api_endpoint,
                      temperature=0.9
                      )


In [25]:
from langchain.embeddings import AzureOpenAIEmbeddings 
embedding_model = AzureOpenAIEmbeddings(openai_api_key=azure_openai_api_key,
                                    azure_deployment='text-embedding-3-large',
                                    azure_endpoint=azure_openai_api_endpoint,
                                    openai_api_version="2023-05-15",
                                    chunk_size=500
)

In [26]:
### model with only database

from langchain_community.vectorstores import FAISS
faiss_vector_store = FAISS.load_local('data', 
                                embeddings=embedding_model,
                                index_name = 'travel_geography',
                                allow_dangerous_deserialization = True)

# make request
from langchain.chains import RetrievalQA
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=faiss_vector_store.as_retriever(search_type="similarity", 
                                              search_kwargs={"k": 2}), 
    verbose=True) # set False when production

In [27]:
# chek of vector store
faiss_vector_store.as_retriever(search_type="similarity", 
                                              search_kwargs={"k": 2}).invoke("Do you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book the top 5 activities to do with kids?")

[Document(metadata={'Unnamed: 0': 1682, 'Author': 'Reid, Thomas H.', 'Title': 'Across the Equator: A Holiday Trip in Java', 'Credits': 'Produced by a Project Gutenberg volunteer from digitalmaterial generously made available by the Internet Archive', 'Language': 'English', 'LoC Class': 'DS: History: General and Eastern Hemisphere: Asia', 'Subject': 'Java (Indonesia) -- History', 'Category': 'Text', 'EBook-No.': 27556, 'Release Date': 'Dec 18, 2008', 'Most Recently Updated': 'Jan 4, 2021', 'Copyright Status': 'Public domain in the USA.', 'Downloads': '235 downloads in the last 30 days.', 'Uniform Title': nan, 'Alternate Title': nan, 'Note': nan, 'Editor': nan, 'Contents': nan, 'Illustrator': nan, 'Author of introduction, etc.': nan, 'Original Publication': nan, 'Language Note': nan, 'Translator': nan, 'Contributor': nan, 'Unknown role': nan, 'Series Title': nan, 'Commentator': nan, 'Creator': nan, 'LoC No.': nan, 'Compiler': nan, 'Annotator': nan, 'Other': nan, 'Adapter': nan, 'Engraver

if k:2 vector store dont retreive documenst with keyword Java or indonesia

In [28]:
# check of RetrievalQA
qa_stuff.run("Do you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book the top 5 activities to do with kids?")



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


"I don't have specific information on activities to do in Indonesia with kids from those books. However, I can suggest you look for recent travel guides or books specifically focusing on family travel in Indonesia for the most relevant information. If you need more guidance on where to find these resources, feel free to ask!"

In [29]:
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser

output_parser = StrOutputParser()

template = "You are an assistant for question-answering tasks. \
    Use the following pieces of retrieved context to extract information. \
        If you don't know the answer, just say that you don't know. \
        Provide 3 choices of book where the information can be retreived\
            Use three sentences maximum and keep the answer concise.\
Use available functions to retreive information from a QA vectorstore.\
      The vectorstore are summarized chunks of the book identified by the doc id in metadata of Documents in context \
Question: {question} \
Context: {context} \
 \
Expected Output Format: \
Answer : " 
            # In addition to your answer, extract from the metadata the information from the keys : '#Text', 'Release Date', 'Title'.\
            # Provide a python dictionary with metadata as keys \
            # and values as lists of all the retreived documents.\
            #     \nDictionary:"

prompt = ChatPromptTemplate.from_template(template)

In [30]:
# make Q&A as a chain

from langchain.schema.runnable import RunnableMap

chain = RunnableMap({
    "context": lambda x: faiss_vector_store.as_retriever(search_kwargs={"k": 5}).invoke(x["question"]),
    "question": lambda x: x["question"]
}) | prompt | llm 

In [31]:
chain.invoke({"question": "Do you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book information about life there"})

AIMessage(content='Sure, here are some books where you can find information about life in Indonesia, particularly Java:\n\n1. **"Travels in the East Indian Archipelago" by Albert S. Bickmore** - This book offers insights into Bickmore\'s interactions with local cultures and his observations of life in Java.\n   \n2. **"Across the Equator: A Holiday Trip in Java" by Thomas H. Reid** - Reid provides a detailed account of his travel experiences, focusing on Java’s scenic beauty and cultural life.\n\n3. **"Java: The Garden of the East" by Eliza Ruhamah Scidmore** - This travelogue describes the natural beauty and cultural intricacies of Java, aiming to inspire other travelers.\n\nThese books provide various perspectives on exploring and understanding life in Indonesia, especially Java.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 163, 'prompt_tokens': 2939, 'total_tokens': 3102, 'completion_tokens_details': None}, 'model_name': 'gpt-4o-2024-08-06', 'syste

Without functions binded, returns a content

### load function end of notebook

In [32]:
# convert pydantic NOT function @tool
# from langchain.utils.openai_functions import convert_pydantic_to_openai_function
# convert_pydantic_to_openai_function(GetFullText)

from langchain.tools.render import format_tool_to_openai_function

In [34]:

chain2 = RunnableMap({
    "context": lambda x: faiss_vector_store.as_retriever(search_kwargs={"k": 5}).invoke(x["question"]),
    "question": lambda x: x["question"]
}) | prompt | llm.bind(functions=[format_tool_to_openai_function(QA_retrive_from_text)])

In [35]:
a=chain2.invoke({"question": "Do you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book information about life there"})

In [36]:
a

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"id":4958,"query":"things to do in Indonesia"}', 'name': 'QA_retrive_from_text'}}, response_metadata={'token_usage': {'completion_tokens': 28, 'prompt_tokens': 3008, 'total_tokens': 3036, 'completion_tokens_details': None}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_d54531d9eb', 'finish_reason': 'function_call', 'logprobs': None}, id='run-f9a9370d-7f29-408f-8884-6231915cfba7-0')

function bind is working without fircong usage (content = "")

In [37]:
a.additional_kwargs

{'function_call': {'arguments': '{"id":4958,"query":"things to do in Indonesia"}',
  'name': 'QA_retrive_from_text'}}

In [38]:
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
import json
args = json.loads(a.additional_kwargs['function_call']['arguments'])
args


{'id': 4958, 'query': 'things to do in Indonesia'}

### 2_Functions .. L5_tool-rooting
* using OpenAIFunctionsAgentOutputParser()

In [46]:
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser



chain3 = RunnableMap({
    "context": lambda x: faiss_vector_store.as_retriever(search_kwargs={"k": 5}).invoke(x["question"]),
    "question": lambda x: x["question"]
}) | prompt | llm.bind(functions=[format_tool_to_openai_function(QA_retrive_from_text)]) | OpenAIFunctionsAgentOutputParser()

In [47]:
b=chain3.invoke({"question": "Do you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book information about life there"})

In [52]:
b.tool, b.tool_input

('QA_retrive_from_text',
 {'id': 4993, 'query': 'things to do in Indonesia and life there'})

In [53]:
QA_retrive_from_text(b.tool_input)

  return qa_chain({"query": query})


{'query': 'things to do in Indonesia and life there',
 'result': "Indonesia offers a diverse range of activities and experiences, reflecting its rich culture, natural beauty, and vibrant cities. Here are some highlights:\n\n1. **Explore Bali**: Known for its beaches, temples, and vibrant nightlife, Bali is a top destination. Don't miss the Ubud Monkey Forest and Tanah Lot Temple.\n\n2. **Visit Borobudur**: This iconic Buddhist temple in Central Java is one of the largest in the world and a UNESCO World Heritage site.\n\n3. **Dive in Raja Ampat**: Renowned for its marine biodiversity, Raja Ampat in West Papua is a paradise for scuba divers and snorkelers.\n\n4. **Experience Jakarta**: The capital city offers a mix of modernity and tradition with its shopping districts, historical sites, and diverse culinary scene.\n\n5. **Hike Mount Bromo**: Witness the stunning sunrise views from this active volcano in East Java.\n\n6. **Relax in Lombok**: Known for beautiful beaches and the mighty Mou

In [57]:
result_none = chain3.invoke({"question":"hi"})

In [58]:
type(result_none)

langchain_core.agents.AgentActionMessageLog

In [54]:
result = chain3.invoke({"question": "Do you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book information about life there"})

In [56]:
type(result)

langchain_core.agents.AgentActionMessageLog

If langchain need function, one needs to call function

In [60]:
from langchain.schema.agent import AgentFinish
def route(result):
    if isinstance(result, AgentFinish):
        return result.return_values['output']
    else:
        tools = {
            "QA_retrive_from_text": QA_retrive_from_text
        }
        return tools[result.tool].run(result.tool_input)

In [61]:
runnable = RunnableMap({
    "context": lambda x: faiss_vector_store.as_retriever(search_kwargs={"k": 5}).invoke(x["question"]),
    "question": lambda x: x["question"]
})

llm_tool = llm.bind(functions=[format_tool_to_openai_function(QA_retrive_from_text)]) 

chain4 = runnable | prompt | llm_tool | OpenAIFunctionsAgentOutputParser() | route

In [62]:
result = chain4.invoke({"question": "Do you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book information about life there"})

In [63]:
result

{'query': 'life in Indonesia',
 'result': "Life in Indonesia is diverse and multifaceted, reflecting the country's rich cultural heritage and geographical diversity. As an archipelago with over 17,000 islands, Indonesia boasts a variety of lifestyles influenced by its many ethnic groups, languages, and religions. Major cities like Jakarta and Surabaya are bustling urban centers with modern amenities, while rural areas often rely on agriculture and maintain traditional customs.\n\nIndonesian society is community-oriented, with strong family ties and a focus on communal values. Religion plays a significant role, with the majority being Muslim, though there are also Christian, Hindu, and Buddhist communities.\n\nThe cuisine is flavorful and varied, featuring dishes such as nasi goreng, satay, and rendang. Natural beauty is abundant, from the beaches of Bali to the rainforests of Sumatra and the rice terraces of Java. Despite economic growth, challenges like infrastructure development and 

In [76]:

# qa_stuff.run("what are 5 differents topics of the ensemble of documents?")



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'Here are five different topics from the ensemble of documents:\n\n1. **Spanish Colonial History in the Philippines:**\n   - Explored in "The Philippine Islands, 1493-1898 — Volume 25 of 55" and "The Philippine Islands, 1493-1803 — Volume 05 of 55," focusing on governance, commerce, and ecclesiastical affairs during Spanish colonial rule.\n\n2. **Cultural Experiences and Customs:**\n   - Discussed in "Strange Teas, Dinners, Weddings and Fetes," which explores diverse cultural experiences related to social events worldwide.\n\n3. **European Ethnicities and Historical Developments:**\n   - Examined in "The Peoples of Europe," which provides insights into the diverse ethnicities, languages, and cultures of Europe.\n\n4. **Scientific Inquiries and Natural Phenomena:**\n   - Addressed in "Miscellanea Curiosa, Vol. 1," covering various scientific discoveries and theories, including atmospheric vapors and magnetic compass variations.\n\n5. **Life and Society in Early 20th Century Persia:**\n 

In [70]:
# debug faiss_vector_store
docs =faiss_vector_store.as_retriever(search_kwargs={"k": 12}).invoke("DO you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book the top 5 activities to do with kids?")
docs[8]

Document(metadata={'Unnamed: 0': 4831, 'Author': 'Anonymous', 'Title': 'Holidays at Brighton :  or, sea-side amusements', 'Credits': 'Bob Taylor, Charlene Taylor and the Online Distributed Proofreading Team at https://www.pgdp.net(This file was produced from images generously made available by The Internet Archive)', 'Language': 'English', 'LoC Class': 'PZ: Language and Literatures: Juvenile belles lettres', 'Subject': 'Brighton (England) -- Description and travel -- Juvenile literature', 'Category': 'Text', 'EBook-No.': 71058, 'Release Date': 'Jun 27, 2023', 'Most Recently Updated': nan, 'Copyright Status': 'Public domain in the USA.', 'Downloads': '30 downloads in the last 30 days.', 'Uniform Title': nan, 'Alternate Title': nan, 'Note': nan, 'Editor': nan, 'Contents': nan, 'Illustrator': nan, 'Author of introduction, etc.': nan, 'Original Publication': 'United Kingdom: Darton and Harvey, 1834.', 'Language Note': nan, 'Translator': nan, 'Contributor': nan, 'Unknown role': nan, 'Series

# function agent book parser

In [71]:
from langchain.tools.render import format_tool_to_openai_function
format_tool_to_openai_function(QA_retrive_from_text)

{'name': 'QA_retrive_from_text',
 'description': 'use this function to answer a query over the text referenced by this id',
 'parameters': {'properties': {'id': {'description': 'ID of the text to parse in the function',
    'type': 'integer'},
   'query': {'description': 'thing to search for', 'type': 'string'}},
  'required': ['id', 'query'],
  'type': 'object'}}

In [33]:
from pydantic import BaseModel, Field
import requests

# loader
# from langchain_community.document_loaders import TextLoader # not needed we load a string
from langchain.docstore.document import Document # process string in Documents

# splitter
from langchain.text_splitter import RecursiveCharacterTextSplitter

# embedding
from langchain.vectorstores import Chroma
from langchain_community.vectorstores import FAISS

# compression
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

# define prompt
from langchain.prompts import PromptTemplate

# send query
from langchain.chains import RetrievalQA


# Define the input schema
class GetFullText(BaseModel):
    id: int = Field(description="ID of the text to parse in the function")
    query: str = Field(description="thing to search for")

from langchain.agents import tool


@tool(args_schema=GetFullText)
def QA_retrive_from_text(id: int,query: str) -> dict:
    """use this function to answer a query over the text referenced by this id"""


    id_text = id
    url_text = f"https://www.gutenberg.org/cache/epub/{id_text}/pg{id_text}.txt"


    # Make the request
    response = requests.get(url_text)

    if response.status_code == 200:
        full_text = response.content

    else:
        raise Exception(f"text not available {response.status_code}")
    # load text
    # loader = TextLoader([full_text])
    # doc = loader.load()
    doc = Document(full_text[:42000])

    # split for generic texts
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(model_name="gpt-4",
                                                                        chunk_size=5000, 
                                                                        chunk_overlap=50)
    splits = text_splitter.split_documents([doc])
    # embedded vector store
    vectordb = FAISS.from_documents(splits, embedding_model)

    # compression
    compressor = LLMChainExtractor.from_llm(llm)
    compression_retriever = ContextualCompressionRetriever(base_compressor=compressor,
                                                            base_retriever=vectordb.as_retriever(search_kwargs={"k": 20})) # to get maximu diverse context
    # QA retreiver
    qa_chain = RetrievalQA.from_chain_type(llm,
                                            retriever=compression_retriever,
                                            return_source_documents=True)

    return qa_chain({"query": query})

# result["result"]

In [64]:
from langchain.tools.render import format_tool_to_openai_function
format_tool_to_openai_function(QA_retrive_from_text)
QA_retrive_from_text({"id":"23","query":"weather in afganistan"})

{'query': 'weather in afganistan',
 'result': "I don't have the current weather information for Afghanistan. You might want to check a reliable weather website or app for the latest updates.",
 'source_documents': []}