In [71]:
# load embedding from azure
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

azure_openai_api_key = os.getenv("AZURE_OPENAI_API_KEY_4")
azure_openai_api_endpoint = os.getenv("AZURE_OPENAI_API_ENDPOINT_4")
deployment_name = os.getenv("AZURE_DEPLOYMENT_NAME_4")


In [72]:
# instanciate LLM
from langchain.chat_models import AzureChatOpenAI

llm = AzureChatOpenAI(api_key=azure_openai_api_key,
                      azure_deployment=deployment_name,
                      api_version="2023-12-01-preview",
                      azure_endpoint=azure_openai_api_endpoint,
                      temperature=0.9
                      )


In [73]:
from langchain.embeddings import AzureOpenAIEmbeddings 
embedding_model = AzureOpenAIEmbeddings(openai_api_key=azure_openai_api_key,
                                    azure_deployment='text-embedding-3-large',
                                    azure_endpoint=azure_openai_api_endpoint,
                                    openai_api_version="2023-05-15",
                                    chunk_size=500
)

In [74]:
### model with only database

from langchain_community.vectorstores import FAISS
faiss_vector_store = FAISS.load_local('data', 
                                embeddings=embedding_model,
                                index_name = 'travel_geography',
                                allow_dangerous_deserialization = True)

# make request
from langchain.chains import RetrievalQA
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=faiss_vector_store.as_retriever(search_type="similarity", 
                                              search_kwargs={"k": 10}), 
    verbose=True) # set False when production

In [75]:
# qa_stuff.run("Do you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book the top 5 activities to do with kids?")

In [76]:

qa_stuff.run("what are 5 differents topics of the ensemble of documents?")



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'Here are five different topics from the ensemble of documents:\n\n1. **Spanish Colonial History in the Philippines:**\n   - Explored in "The Philippine Islands, 1493-1898 — Volume 25 of 55" and "The Philippine Islands, 1493-1803 — Volume 05 of 55," focusing on governance, commerce, and ecclesiastical affairs during Spanish colonial rule.\n\n2. **Cultural Experiences and Customs:**\n   - Discussed in "Strange Teas, Dinners, Weddings and Fetes," which explores diverse cultural experiences related to social events worldwide.\n\n3. **European Ethnicities and Historical Developments:**\n   - Examined in "The Peoples of Europe," which provides insights into the diverse ethnicities, languages, and cultures of Europe.\n\n4. **Scientific Inquiries and Natural Phenomena:**\n   - Addressed in "Miscellanea Curiosa, Vol. 1," covering various scientific discoveries and theories, including atmospheric vapors and magnetic compass variations.\n\n5. **Life and Society in Early 20th Century Persia:**\n 

In [86]:
# debug faiss_vector_store
docs =faiss_vector_store.as_retriever(search_kwargs={"k": 12}).invoke("DO you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book the top 5 activities to do with kids?")
docs[8]

Document(metadata={'Unnamed: 0': 4831, 'Author': 'Anonymous', 'Title': 'Holidays at Brighton :  or, sea-side amusements', 'Credits': 'Bob Taylor, Charlene Taylor and the Online Distributed Proofreading Team at https://www.pgdp.net(This file was produced from images generously made available by The Internet Archive)', 'Language': 'English', 'LoC Class': 'PZ: Language and Literatures: Juvenile belles lettres', 'Subject': 'Brighton (England) -- Description and travel -- Juvenile literature', 'Category': 'Text', 'EBook-No.': 71058, 'Release Date': 'Jun 27, 2023', 'Most Recently Updated': nan, 'Copyright Status': 'Public domain in the USA.', 'Downloads': '30 downloads in the last 30 days.', 'Uniform Title': nan, 'Alternate Title': nan, 'Note': nan, 'Editor': nan, 'Contents': nan, 'Illustrator': nan, 'Author of introduction, etc.': nan, 'Original Publication': 'United Kingdom: Darton and Harvey, 1834.', 'Language Note': nan, 'Translator': nan, 'Contributor': nan, 'Unknown role': nan, 'Series

In [108]:
# create prompt
template = "You are an assistant for question-answering tasks. \
    Use the following pieces of retrieved context to extract information. \
        If you don't know the answer, just say that you don't know. \
            Use three sentences maximum and keep the answer concise.\
                \nQuestion: {question} \
                \nContext: {context} \
            If the answer can be extracted from a book referenced in the information,\
            extract the 'Unnamed: 0' metadata and use it as variable name 'id'\
            for the dedicated function, pass also the question as variable 'query'.\
                \nAnswer:"#\
            # In addition to your answer, extract from the metadata the information from the keys : '#Text', 'Release Date', 'Title'.\
            # Provide a python dictionary with metadata as keys \
            # and values as lists of all the retreived documents.\
            #     \nDictionary:"

prompt = PromptTemplate(input_variables=['context', 'question'], 
                        input_types={}, 
                        partial_variables={}, 
                        template=template)
prompt = ChatPromptTemplate(input_variables=['context', 'question'], 
                            input_types={}, 
                            partial_variables={}, 
                            messages=[HumanMessagePromptTemplate(prompt=prompt)])

In [111]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

retriever = faiss_vector_store.as_retriever(search_kwargs={"k": 5})
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm.bind(functions=QA_retrive_from_text)
    | StrOutputParser()
)

In [117]:
rep = rag_chain.("DO you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book the top 5 activities to do with kids?")
rep

AttributeError: 'RunnableSequence' object has no attribute 'run'

In [115]:
rep()

TypeError: 'generator' object is not callable

In [112]:
for chunk in rag_chain.stream("DO you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book the top 5 activities to do with kids?"):
    print(chunk, end="", flush=True)

TypeError: Object of type ModelMetaclass is not JSON serializable

In [82]:
# meta data for llm to search
'Release Date', 'LoC Class',  'Title',  'Author'

('Release Date', 'LoC Class', 'Title', 'Author')

In [100]:
from pydantic import BaseModel, Field
import requests

# loader
# from langchain_community.document_loaders import TextLoader # not needed we load a string
from langchain.docstore.document import Document # process string in Documents


# splitter
from langchain.text_splitter import RecursiveCharacterTextSplitter

# embedding
from langchain.vectorstores import Chroma
from langchain_community.vectorstores import FAISS


# compression
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

# define prompt
from langchain.prompts import PromptTemplate

# send query
from langchain.chains import RetrievalQA

# Define the input schema
class GetFullText(BaseModel):
    id: int = Field(description="ID of the text to parse in the function")
    query: str = Field(description="thing to search for")
from langchain.agents import tool


@tool(args_schema=GetFullText)
def QA_retrive_from_text(id: int,query: str):
    """use this function to answer a query over the text referenced by this id"""


    id_text = id
    url_text = f"https://www.gutenberg.org/cache/epub/{id_text}/pg{id_text}.txt"


    # Make the request
    response = requests.get(url_text)

    if response.status_code == 200:
        full_text = response.content

    else:
        raise Exception(f"text not available {response.status_code}")
    # load text
    # loader = TextLoader([full_text])
    # doc = loader.load()
    doc = Document(full_text[:42000])

    # split for generic texts
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(model_name="gpt-4",
                                                                        chunk_size=5000, 
                                                                        chunk_overlap=50)
    splits = text_splitter.split_documents([doc])
    # embedded vector store
    vectordb = FAISS.from_documents(splits, embedding_model)

    # compression
    compressor = LLMChainExtractor.from_llm(llm)
    compression_retriever = ContextualCompressionRetriever(base_compressor=compressor,
                                                            base_retriever=vectordb.as_retriever(search_kwargs={"k": 20})) # to get maximu diverse context
    # QA retreiver
    qa_chain = RetrievalQA.from_chain_type(llm,
                                            retriever=compression_retriever,
                                            return_source_documents=True)

    return qa_chain({"query": query})

# result["result"]

In [None]:
from langchain.tools.render import format_tool_to_openai_function
format_tool_to_openai_function(QA_retrive_from_text)
QA_retrive_from_text({"id":"","query":})

In [84]:
from langchain import LLMChain, PromptTemplate
from langchain.chains import RetrievalQA
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate

# Define your custom prompt template
custom_prompt = ChatPromptTemplate.from_template(
    input_variables=["context", "question"],
    template="""
    You are an expert assistant in travel and geography. 

    Given the following summaries of books:
    {context}

    And the question or query of user:
    {question}

    extract the following information:
\
    question: same question written differently
    answer: answer to the user
    id: value of metadata with key #Text


    Format the output as JSON with the following keys:
    
    id
    question
    answer
    
    """
)

# prompt = ChatPromptTemplate.from_template(custom_prompt)
# message = custom_prompt.format_messages(question="DO you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book the top 5 activities to do with kids?")
qa_chain = custom_prompt | llm

TypeError: langchain_core.prompts.prompt.PromptTemplate() got multiple values for keyword argument 'input_variables'

[Document(metadata={'Unnamed: 0': 1682, 'Author': 'Reid, Thomas H.', 'Title': 'Across the Equator: A Holiday Trip in Java', 'Credits': 'Produced by a Project Gutenberg volunteer from digitalmaterial generously made available by the Internet Archive', 'Language': 'English', 'LoC Class': 'DS: History: General and Eastern Hemisphere: Asia', 'Subject': 'Java (Indonesia) -- History', 'Category': 'Text', 'EBook-No.': 27556, 'Release Date': 'Dec 18, 2008', 'Most Recently Updated': 'Jan 4, 2021', 'Copyright Status': 'Public domain in the USA.', 'Downloads': '235 downloads in the last 30 days.', 'Uniform Title': nan, 'Alternate Title': nan, 'Note': nan, 'Editor': nan, 'Contents': nan, 'Illustrator': nan, 'Author of introduction, etc.': nan, 'Original Publication': nan, 'Language Note': nan, 'Translator': nan, 'Contributor': nan, 'Unknown role': nan, 'Series Title': nan, 'Commentator': nan, 'Creator': nan, 'LoC No.': nan, 'Compiler': nan, 'Annotator': nan, 'Other': nan, 'Adapter': nan, 'Engraver

In [43]:
qa_chain.invoke("DO you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book the top 5 activities to do with kids?")

TypeError: Expected mapping type as input to PromptTemplate. Received <class 'str'>.

In [7]:
from langchain_community.vectorstores import FAISS
faiss_vector_store = FAISS.load_local('data', 
                                embeddings=embedding_model
                                ,index_name = 'travel_geography',
                                allow_dangerous_deserialization = True)

In [19]:
# make request
from langchain.chains import RetrievalQA
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=faiss_vector_store.as_retriever(search_kwargs={"k": 20}), 
    verbose=True) # set False when production

qa_stuff.run("in this book with ")



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'"Through the Malay Archipelago" by Emily Richings is a travel narrative that explores Indonesian culture, particularly focusing on Java. The book delves into the exotic landscapes, rich heritage, and intricate customs of the region. Richings provides vivid descriptions of the lush environment, unique flora and fauna, and the vibrant local life she encounters. She reflects on the historical significance of the areas she visits, offering insights into the beauty and complexity of these tropical islands. Themes of aspiration, the search for beauty, and understanding diverse cultures are woven throughout.\n\nThe narrative immerses readers in the allure of the Far East, painting poetic images of the ocean\'s sights and sounds. Richings introduces Java with its vibrant scenery, setting a foundation for a deep exploration of the region\'s intricate customs and lush landscapes. She highlights the significant cultural markers of Java, influenced by Hinduism, Islam, and colonial history, showca

In [58]:
from langchain.tools.render import format_tool_to_openai_function

tools=[QA_retrive_from_text]
functions = [format_tool_to_openai_function(f) for f in tools]

model_get_text = llm.bind(functions=functions)



In [62]:
model_get_text.invoke("according to the book dabedidbou, what ist the most relevant destination in africa?")

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"id":8594534,"query":"most relevant destination in Africa"}', 'name': 'QA_retrive_from_text'}}, response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 110, 'total_tokens': 139, 'completion_tokens_details': None}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_67802d9a6d', 'finish_reason': 'function_call', 'logprobs': None}, id='run-800c639e-9d88-49ea-bb6e-d33020b272df-0')

In [47]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=model, 
    chain_type="stuff", 
    retriever=faiss_vector_store.as_retriever(search_kwargs={"k": 20}), 
    verbose=True) # set False when production


In [59]:

from langchain.prompts import ChatPromptTemplate
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser


prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an assistant with information on a large database of books\
      and you have accessible the summaries. upon receiving a question you can look \
     for information in that database and if you don't find it you can use a tool \
     to look at an entire book in order to find answers for user"),
    ("user", "{input}"),
])
chain = prompt | model_get_text 

In [61]:
chain.invoke("information to travel with 3 adventurous friends in africa")

AIMessage(content="Traveling to Africa with friends can be an exciting adventure filled with diverse experiences. Here are some tips and information to help you plan an adventurous trip:\n\n### Destination Choices\n1. **Safari Adventure**: Explore wildlife in countries like Kenya, Tanzania, or South Africa.\n2. **Desert Exploration**: Visit the Sahara Desert in Morocco or Namibia's Namib Desert.\n3. **Mountain Climbing**: Trek Mount Kilimanjaro in Tanzania or the Atlas Mountains in Morocco.\n4. **Beaches and Islands**: Relax on the beaches of Zanzibar or the islands of Seychelles.\n5. **Cultural Experiences**: Visit historic sites and vibrant cities in Ethiopia, Egypt, or Ghana.\n\n### Planning Tips\n- **Research Destinations**: Understand the best time to visit, local customs, and visa requirements.\n- **Travel Insurance**: Consider comprehensive travel insurance, especially if engaging in adventure activities.\n- **Vaccinations and Health**: Check vaccination requirements and health 

In [40]:
model.invoke({"input":"info on indonesia?"})



[1m> Entering new RetrievalQA chain...[0m


ValueError: Missing some input keys: {'query'}

In [27]:
output = prompt.invoke("DO you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book the top 5 activities to do with kids?")
output

ChatPromptValue(messages=[SystemMessage(content="You are an assistant with information on a large database of books and you have accessible the summaries. upon receiving a question you can look for information in that database and if you don't find it you can use a tool to look at an entire book in order to find answers for user", additional_kwargs={}, response_metadata={}), HumanMessage(content='DO you have information about stuff to do in indonesia? if so can you retreive a book reference and extract from this book the top 5 activities to do with kids?', additional_kwargs={}, response_metadata={})])