#### Incorporating Final RAG components

In [None]:
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

In [None]:
save_directory_faiss = "/content/drive/MyDrive/FAISS"

In [None]:
# Using FAISS vector database - use for faster search
vector_db_faiss = FAISS.from_documents(documents = split_documents, embedding = embeddings)
vector_db_faiss.save_local(save_directory_faiss)

In [None]:
vector_db_faiss2 = FAISS.load_local(save_directory_faiss, embeddings, allow_dangerous_deserialization=True)

In [None]:
retriever_faiss = vector_db_faiss.as_retriever(search_kwargs={"k": 10})
retriever_faiss_mmr = vector_db_faiss.as_retriever(search_type = "mmr", fetch_k =50, search_kwargs={"k": 5, "lambda_mult": 0.5})
retriever_faiss_sst = vector_db_faiss.as_retriever(search_type = "similarity_score_threshold", search_kwargs={"k": 5, "score_threshold": 0.8})

In [None]:
# persist_directory = "/content/drive/MyDrive/data/ChromaDB"
# # Using ChromaDB vector data base - Use if data base has a  very large size
# vector_db_chroma = Chroma.from_documents(documents=split_documents, embedding=embeddings, persist_directory=persist_directory,)
# vector_db_chromas = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
# retriever_chroma = vector_db_chroma.as_retriever(search_kwargs={"k": 5})
# retriever_chroma_mmr = vector_db_chroma.as_retriever(search_type = "mmr", search_kwargs={"k": 5, "lambda_mult": 0.5})
# retriever_chroma_sst = vector_db_chroma.as_retriever(search_type = "similarity_score_threshold", search_kwargs={"k": 5, "score_threshold": 0.8})

In [None]:
# Retrival using MultiQuery
llm_retriever = ChatOpenAI(temperature=0)

retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=retriever_faiss_mmr, llm=llm_retriever
)

  warn_deprecated(


In [None]:
results = vector_db_faiss2.similarity_search_with_score("Shloka", k = 50)

In [None]:
documents = []
scores = []
for doc, score in results:
    documents.append(doc)
    scores.append(score)

In [None]:
documents[0].metadata["source"]

'/content/drive/MyDrive/ncert_ocr/IKS/Lilavati of Bhaskaracharya 1908 - Khemraj_112_text.txt'

In [None]:
logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [None]:
llm = ChatOpenAI(openai_api_key=openai_api_key, temperature=0.9, model='gpt-4o')

In [None]:
# Defining various prompt templates for different tasks


#System Roles

prompt_compilation = PromptTemplate(
    input_variables=["data"],
    template=''' You will be given some data below, and a topic by the user. You have to properly and comprehensively compile the data to be relevant to the topic and explain it's concepts clearly from only the given data. Do not make up information.
     {data}  '''

)

prompt_story_generation = PromptTemplate(
    input_variables=["data"],
    template=''' You will be given some data below, and a topic by the user. You have to properly and comprehensively make a fun kids' story to explain the concepts relevant to the user's query in the datain simple terms so that kids can understand . Do not make up information.
     {data}  '''

)

prompt_poem_generation = PromptTemplate(
    input_variables=["data"],
    template=''' You will be given some data below, and a topic by the user. You have to properly and comprehensively make a fun kids' poem to explain the concepts relevant to the user's query in the data, in simple terms so that kids can understand. Do not make up information.
     {data}  '''

)

prompt_image_generation = PromptTemplate(
    input_variables=["data"],
    template=''' You will be given some story or poem below, and a topic by the user. Generate a detailed prompt, under 500 characters of length, to generate an image based on story/poem and the topic provided by the user.
     {data}  '''

)


#Human Roles

human_template_compilation = "{topic}"
human_template_story_generation = "{topic}"
human_template_poem_generation = "{topic}"
human_template_image_generation = "{topic}"

In [None]:
system_message_prompt_1 = SystemMessagePromptTemplate(prompt=prompt_compilation)
human_message_prompt_1 = HumanMessagePromptTemplate.from_template(human_template_compilation)
chat_prompt_compilation = ChatPromptTemplate.from_messages([system_message_prompt_1, human_message_prompt_1])

In [None]:
system_message_prompt_2 = SystemMessagePromptTemplate(prompt=prompt_story_generation)
human_message_prompt_2 = HumanMessagePromptTemplate.from_template(human_template_story_generation)
chat_prompt_story_generation = ChatPromptTemplate.from_messages([system_message_prompt_2, human_message_prompt_2])

In [None]:
system_message_prompt_3 = SystemMessagePromptTemplate(prompt=prompt_poem_generation)
human_message_prompt_3 = HumanMessagePromptTemplate.from_template(human_template_poem_generation)
chat_prompt_poem_generation = ChatPromptTemplate.from_messages([system_message_prompt_3, human_message_prompt_3])

In [None]:
system_message_prompt_4 = SystemMessagePromptTemplate(prompt=prompt_image_generation)
human_message_prompt_4 = HumanMessagePromptTemplate.from_template(human_template_image_generation)
chat_prompt_image_generation = ChatPromptTemplate.from_messages([system_message_prompt_4, human_message_prompt_4])

In [None]:
llm_compressor = ChatOpenAI(temperature=0, model='gpt-4')

compressor = LLMChainExtractor.from_llm(llm_compressor)
# compression_retriever = ContextualCompressionRetriever(base_compressor=compressor,
#                                                        base_retriever=vectordb2.as_retriever(search_type="mmr",search_kwargs={"k":8, "lambda_mult": 0.8}))

In [None]:
i = 1
processes = ["compilation", "story_generation", "poem_generation", "image_generation"]

In [None]:

input_query = input("Your query: ")
for process in processes:
  if process == "compilation":
    print("Fetching relevant documents.... \n\n")
    unique_docs_multi_query = retriever_from_llm.get_relevant_documents(query=input_query)
    print("Displaying relevant document sources... \n\n")
    for i in range(len(unique_docs_multi_query)):
      print(unique_docs_multi_query[i].metadata)
    print("Compressing chunks for information density... \n\n")
    input_data = compressor.compress_documents(unique_docs_multi_query, query=input_query)
    if (len(input_data)<2):
      input_data = unique_docs_multi_query
    print("\n")
    print(input_data)
    print("\n")
    chat_prompt_with_values = chat_prompt_compilation.format_prompt(topic=input_query, data=input_data)
    messages_compilation = chat_prompt_with_values.to_messages()
    print("\n")
    print("Compiling your data... \n\n")
    response = llm(messages_compilation)
    print(response.content)
    messages_compilation.append(AIMessage(content=response.content))
    print("\n\n\n\n")

  elif process == "story_generation":
      print("generating story... \n")

  elif process == "poem_generation":
      print("hello")

  elif process == "image_generation":
      print("hello")


# else:
#   # appending logic
#   for process in processes:
#     if process == "compilation":
#         print("Fetching relevant documents.... \n\n")
#         unique_docs_multi_query = retriever_from_llm.get_relevant_documents(query=input_query)
#         print("Displaying relevant document sources... \n\n")
#         for i in range(len(unique_docs_multi_query)):
#           print(unique_docs_multi_query[i].metadata)
#         print("Compressing chunks for information density... \n\n")
#         input_data = compressor.compress_documents(unique_docs_multi_query, query=input_query)
#         if (len(input_data)<2):
#           input_data = unique_docs_multi_query
#         print(input_data)
#         system_prompt_with_values = system_message_prompt.format(data = input_data)
#         human_prompt_with_values = human_message_prompt.format(topic = input_query)
#         messages_compilation.append(SystemMessage(content = system_prompt_with_values.content))
#         messages_compilation.append(HumanMessage(content = human_prompt_with_values.content))
#         print("Compiling your response... \n\n")
#         response = llm(messages_compilation)
#         print(response.content)
#         messages_compilation.append(AIMessage(content=response.content))
#         print("\n\n\n\n")

#     elif process == "story_generation":
#         print("hello")
#     elif process == "poem_generation":
#         print("hello")
#     elif process == "image_generation":
#         print("hello")

In [None]:
!python --version

Python 3.10.12


In [None]:
len(messages_compilation)

3

In [None]:
messages_compilation

[SystemMessage(content=" You will be given some data below, and a topic by the user. You have to properly and comprehensively compile the data to be relevant to the topic and explain it's concepts clearly from only the given data. Do not make up information.\n     [Document(page_content='लिखते हैं   एक श्लोकमें.\\n\\nसाध्येनोनोऽनल्पवर्णों विधेयः साध्यो वर्णः खल्पवर्णोंनितश्व॥\\n\\nइप्टक्सण्णे  शेषके स्वर्णमाने  स्यातां स्वल्पानल्पयोर्वर्णयोस्ते२९॥\\n\\n: अन्वयः-अनल्पवर्णः साध्येन ऊनः विधेयः - । साध्यः वर्णः - च. ख़ल्पवर्णाँवितः\\n\\nविधियः । ततः स्वल्पानल्पयोः वर्णयोः शेषके इष्ट्र्सुण्णे स्वर्णमाने स्यातामू ॥ २९. स\\n\\nCC-0. Late Pt. Manmohan Shastri Collection Jammu. Digitized by eGangotri', metadata={'source': '/content/drive/MyDrive/ncert_ocr/IKS/Lilavati of Bhaskaracharya 1908 - Khemraj_112_text.txt'}), Document(page_content='जानते हैं तहां गच्छ जाननेकी रीति एक श्लोकमें लिखते हैं–\\n\\nथ्रेटी फ़्लाइटरहोचन ना च्चयाई व झन्तर वर्ग युक्तात् ॥\\n\\nमूलं मुखोनं चयलण्ड्युक्तं चयोद्धतं गच

In [None]:
messages_compilation[0].content

" You will be given some data below, and a topic by the user. You have to properly and comprehensively compile the data to be relevant to the topic and explain it's concepts clearly from only the given data. Do not make up information.\n     [Document(page_content='लिखते हैं   एक श्लोकमें.\\n\\nसाध्येनोनोऽनल्पवर्णों विधेयः साध्यो वर्णः खल्पवर्णोंनितश्व॥\\n\\nइप्टक्सण्णे  शेषके स्वर्णमाने  स्यातां स्वल्पानल्पयोर्वर्णयोस्ते२९॥\\n\\n: अन्वयः-अनल्पवर्णः साध्येन ऊनः विधेयः - । साध्यः वर्णः - च. ख़ल्पवर्णाँवितः\\n\\nविधियः । ततः स्वल्पानल्पयोः वर्णयोः शेषके इष्ट्र्सुण्णे स्वर्णमाने स्यातामू ॥ २९. स\\n\\nCC-0. Late Pt. Manmohan Shastri Collection Jammu. Digitized by eGangotri', metadata={'source': '/content/drive/MyDrive/ncert_ocr/IKS/Lilavati of Bhaskaracharya 1908 - Khemraj_112_text.txt'}), Document(page_content='जानते हैं तहां गच्छ जाननेकी रीति एक श्लोकमें लिखते हैं–\\n\\nथ्रेटी फ़्लाइटरहोचन ना च्चयाई व झन्तर वर्ग युक्तात् ॥\\n\\nमूलं मुखोनं चयलण्ड्युक्तं चयोद्धतं गच्छमुदाहरन्ति ॥ ३७॥\\n\\

In [None]:
list = []

In [None]:
for i in range(len(messages_compilation)):
  list.append(messages_compilation[i].content)

In [None]:
list

[" You will be given some data below, and a topic by the user. You have to properly and comprehensively compile the data to be relevant to the topic and explain it's concepts clearly from only the given data. Do not make up information.\n     [Document(page_content='लिखते हैं   एक श्लोकमें.\\n\\nसाध्येनोनोऽनल्पवर्णों विधेयः साध्यो वर्णः खल्पवर्णोंनितश्व॥\\n\\nइप्टक्सण्णे  शेषके स्वर्णमाने  स्यातां स्वल्पानल्पयोर्वर्णयोस्ते२९॥\\n\\n: अन्वयः-अनल्पवर्णः साध्येन ऊनः विधेयः - । साध्यः वर्णः - च. ख़ल्पवर्णाँवितः\\n\\nविधियः । ततः स्वल्पानल्पयोः वर्णयोः शेषके इष्ट्र्सुण्णे स्वर्णमाने स्यातामू ॥ २९. स\\n\\nCC-0. Late Pt. Manmohan Shastri Collection Jammu. Digitized by eGangotri', metadata={'source': '/content/drive/MyDrive/ncert_ocr/IKS/Lilavati of Bhaskaracharya 1908 - Khemraj_112_text.txt'}), Document(page_content='जानते हैं तहां गच्छ जाननेकी रीति एक श्लोकमें लिखते हैं–\\n\\nथ्रेटी फ़्लाइटरहोचन ना च्चयाई व झन्तर वर्ग युक्तात् ॥\\n\\nमूलं मुखोनं चयलण्ड्युक्तं चयोद्धतं गच्छमुदाहरन्ति ॥ ३७॥\\n\

In [None]:
# Define file paths
system_messages_file = './responses/system/system_messages.txt'
human_messages_file = './responses/human/human_messages.txt'
ai_messages_file = './responses/ai/ai_messages.txt'

# Initialize counters for each type of message
system_index = 1
human_index = 1
ai_index = 1

# Open files for writing
with open(system_messages_file, 'w') as sys_file, open(human_messages_file, 'w') as hum_file, open(ai_messages_file, 'w') as ai_file:
    # Iterate through the messages list
    for message in messages:
        # Check the type of the message and write to the corresponding file
        if isinstance(message, SystemMessage):
            sys_file.write(f"SystemMessage index {system_index}:\n{message.content}\n\n")
            system_index += 1
        elif isinstance(message, HumanMessage):
            hum_file.write(f"HumanMessage index {human_index}:\n{message.content}\n\n")
            human_index += 1
        elif isinstance(message, AIMessage):
            ai_file.write(f"AIMessage index {ai_index}:\n{message.content}\n\n")
            ai_index += 1

print("Messages have been written to their respective files.")

### **Code for various methods used for integration into RAG**

#### Base RAG

In [None]:
llm = OpenAI(temperature=0.9, max_tokens=2048)

  warn_deprecated(


In [None]:
vector_db = FAISS.from_documents(documents = split_documents, embedding = embeddings)

In [None]:
retriever = vector_db.as_retriever(search_kwargs={"k": 10})
q = input("Please ask your query : ")
# should fetch  n relevant documents
rdocs = retriever.get_relevant_documents(q)

In [None]:
print(type(rdocs))
print(len(rdocs))
rdocs

In [None]:
#design prompt template
from langchain import PromptTemplate
prompt_template = """Use the following pieces of context to answer the question at the end, only using the data you are presented. If you don't know the answer, just say that you don't know, don't try to make up an answer. Keep the answer as detailed as possible. Always say "Namaskaram" at the end of the answer.
context = {context}
question = {question}
Answer: """
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

In [None]:
# Modify the PromptTemplate to include context and question printing
class CustomPromptTemplate(PromptTemplate):
    def format_prompt(self, **kwargs):
        context = kwargs.get("context")
        question = kwargs.get("question")
        print(f"Question: {question}")
        print(f"Context: {context}")

        return super().format_prompt(**kwargs)

PROMPT = CustomPromptTemplate(template=prompt_template, input_variables=["context", "question"])

# Initialize the chain with the custom prompt template
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    input_key="query",
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)


In [None]:
result = chain.invoke(q)

In [None]:
print(result['result'])

In [None]:
len(result['source_documents'])

#### Multi Query Advanced Retrieval

In [None]:
!pip install chromadb



In [None]:
vectordb2 = Chroma.from_documents(documents=split_documents, embedding=embeddings)

In [None]:
logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [None]:
question = "What are arteries and what is their difference with the pulmonary artery"
llm = ChatOpenAI(temperature=0)

retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=vectordb2.as_retriever(search_type="mmr",search_kwargs={"k":8}), llm=llm
)

  warn_deprecated(


In [None]:
unique_docs = retriever_from_llm.get_relevant_documents(query=question)

  warn_deprecated(
INFO:langchain.retrievers.multi_query:Generated queries: ['1. Can you explain the function and distinction between arteries and the pulmonary artery?', '2. How do arteries differ from the pulmonary artery in terms of their roles in the circulatory system?', '3. What separates arteries from the pulmonary artery in terms of their anatomical and physiological characteristics?']


In [None]:
len(unique_docs)
unique_docs

[Document(page_content='SCIENCE\n72\nTable 7.1 Pulse rate\nS. No.\nName\nPulse per minute\n1.\n2.\n3.\n4.\n5.\nPaheli explained that the\npulmonary artery carries blood\nfrom the heart, so it is called an\nartery and not a vein. It carries\ncarbon dioxide-rich blood to the\nlungs. Pulmonary vein carries\noxygen-rich blood from the\nlungs to the heart. \nVeins are the vessels which carry\ncarbon dioxide-rich blood from all parts', metadata={'author': 'admin', 'creationDate': "D:20220830115851+05'30'", 'creator': 'PageMaker 7.0', 'file_path': '/content/drive/MyDrive/ncert_ocr/Class 7 Science/Class 7 - 107.pdf', 'format': 'PDF 1.6', 'keywords': '', 'modDate': "D:20240521135656+05'30'", 'page': 2, 'producer': 'Acrobat Distiller 10.1.16 (Windows)', 'source': '/content/drive/MyDrive/ncert_ocr/Class 7 Science/Class 7 - 107.pdf', 'subject': '', 'title': 'Ch_07.pmd', 'total_pages': 12, 'trapped': ''}),
 Document(page_content='Pulmonary\nartery\nPulmonary\nvein\nFig. 7.3  Schematic diagram of ci

In [None]:
prompt_template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [None]:
text=PROMPT.format_prompt(
    context=unique_docs,
    question=question
)

In [None]:
unique_docs

[Document(page_content='SCIENCE\n72\nTable 7.1 Pulse rate\nS. No.\nName\nPulse per minute\n1.\n2.\n3.\n4.\n5.\nPaheli explained that the\npulmonary artery carries blood\nfrom the heart, so it is called an\nartery and not a vein. It carries\ncarbon dioxide-rich blood to the\nlungs. Pulmonary vein carries\noxygen-rich blood from the\nlungs to the heart. \nVeins are the vessels which carry\ncarbon dioxide-rich blood from all parts', metadata={'author': 'admin', 'creationDate': "D:20220830115851+05'30'", 'creator': 'PageMaker 7.0', 'file_path': '/content/drive/MyDrive/ncert_ocr/Class 7 Science/Class 7 - 107.pdf', 'format': 'PDF 1.6', 'keywords': '', 'modDate': "D:20240521135656+05'30'", 'page': 2, 'producer': 'Acrobat Distiller 10.1.16 (Windows)', 'source': '/content/drive/MyDrive/ncert_ocr/Class 7 Science/Class 7 - 107.pdf', 'subject': '', 'title': 'Ch_07.pmd', 'total_pages': 12, 'trapped': ''}),
 Document(page_content='Pulmonary\nartery\nPulmonary\nvein\nFig. 7.3  Schematic diagram of ci

In [None]:
llm.predict(text=PROMPT.format_prompt(
    context=unique_docs,
    question=question
).text)

  warn_deprecated(


'Arteries are blood vessels that carry blood away from the heart to various organs of the body. The main difference between arteries and the pulmonary artery is that the pulmonary artery carries carbon dioxide-rich blood from the heart to the lungs, while other arteries carry oxygen-rich blood to different parts of the body.'

In [None]:
llm = ChatOpenAI(temperature=0, model='gpt-4')

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor,
                                                       base_retriever=vectordb2.as_retriever(search_type="mmr",search_kwargs={"k":8, "lambda_mult": 0.8}))

In [None]:
compressed_docs = compression_retriever.get_relevant_documents(question)

In [None]:
print(len(compressed_docs))
type(compressed_docs)

1


list

In [None]:
for i in range(len(compressed_docs)):
  print(compressed_docs[i].page_content)

Paheli explained that the pulmonary artery carries blood from the heart, so it is called an artery and not a vein. It carries carbon dioxide-rich blood to the lungs. Pulmonary vein carries oxygen-rich blood from the lungs to the heart.


In [None]:
output = llm.predict(text=PROMPT.format_prompt(
    context=compressed_docs,
    question=question
).text)

type(output)

str

In [None]:
question = "What are arteries and what is their difference with the pulmonary artery"
llm = ChatOpenAI(temperature=0)

retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=vectordb2.as_retriever(search_type="mmr",search_kwargs={"k":8}), llm=llm
)


#### Advanced RAG

In [None]:
llm = ChatOpenAI(openai_api_key=openai_api_key, temperature=0, model='gpt-4o')

In [None]:
messages = [
    SystemMessage(content="You are a helpful assistant that translates English to Sankrit."),
    HumanMessage(content="What is the capital of India?"),
    AIMessage(content="New Delhi"),
    HumanMessage(content="How many states are there in India?")
]
llm(messages)

  warn_deprecated(


AIMessage(content='भारतस्य कति राज्यानि सन्ति?', response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 48, 'total_tokens': 58}, 'model_name': 'gpt-4o', 'system_fingerprint': 'fp_319be4768e', 'finish_reason': 'stop', 'logprobs': None}, id='run-2259710d-4f9f-4915-adcf-7291333b0386-0')

In [None]:
prompt = PromptTemplate(
    input_variables=["data"],
    template=''' Give a funny poem, given the user's query and data given below
     {data}  '''
)

system_message_prompt = SystemMessagePromptTemplate(prompt=prompt)

In [None]:
system_message_prompt.format(data = compressed_docs)

SystemMessage(content=' Give a funny poem, given the user\'s query and data given below\n     [Document(page_content=\'Paheli explained that the pulmonary artery carries blood from the heart, so it is called an artery and not a vein. It carries carbon dioxide-rich blood to the lungs. Pulmonary vein carries oxygen-rich blood from the lungs to the heart.\', metadata={\'author\': \'admin\', \'creationDate\': "D:20220830115851+05\'30\'", \'creator\': \'PageMaker 7.0\', \'file_path\': \'/content/drive/MyDrive/ncert_ocr/Class 7 Science/Class 7 - 107.pdf\', \'format\': \'PDF 1.6\', \'keywords\': \'\', \'modDate\': "D:20240521135656+05\'30\'", \'page\': 2, \'producer\': \'Acrobat Distiller 10.1.16 (Windows)\', \'source\': \'/content/drive/MyDrive/ncert_ocr/Class 7 Science/Class 7 - 107.pdf\', \'subject\': \'\', \'title\': \'Ch_07.pmd\', \'total_pages\': 12, \'trapped\': \'\'})]  ')

In [None]:
human_template = "{query}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [None]:
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
chat_prompt

ChatPromptTemplate(input_variables=['data', 'query'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['data'], template=" Give a funny poem, given the user's query and data given below\n     {data}  ")), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['query'], template='{query}'))])

In [None]:
input_query = input("Please input your query: ")

Please input your query: meristematic tissue


In [None]:
input_data = retriever_from_llm.get_relevant_documents(query=input_query)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. What is the function of meristematic tissue in plants?', '2. How does meristematic tissue contribute to plant growth and development?', '3. Can you explain the role of meristematic tissue in plant biology?']


In [None]:
print(input_data)

[Document(page_content='Lateral meristem\nuestions\n1.\nWhat is a tissue?\n2.\nWhat is the utility of tissues in\nmulti-cellular organisms?\n6.2 Plant Tissues\n6.2.1 MERISTEMATIC TISSUE\n•\nFrom the above observations, answer\nthe following questions:\n1. Which of the two onions has longer\nroots? Why?\n2. Do the roots continue growing\neven after we have removed their\ntips?\n3. Why would the tips stop growing', metadata={'author': 'admin', 'creationDate': "D:20220822205120+05'30'", 'creator': 'Bullzip PDF Printer (12.2.0.2905)', 'file_path': '/content/drive/MyDrive/ncert_ocr/Class 9 Science/Class 9 - 106.pdf', 'format': 'PDF 1.5', 'keywords': '', 'modDate': "D:20240520133837+05'30'", 'page': 1, 'producer': 'PDF Printer / www.bullzip.com / FG / Freeware Edition (max 10 users)', 'source': '/content/drive/MyDrive/ncert_ocr/Class 9 Science/Class 9 - 106.pdf', 'subject': '', 'title': 'D:\\Textbooks\\Rationalised Books\\0964-Science\\1 Source Files\\Chapter-6\\CHAP 6.pmd', 'total_pages': 1

In [None]:
chat_prompt_with_values = chat_prompt.format_prompt(query=input_query, data=input_data)
messages = chat_prompt_with_values.to_messages()
messages

[SystemMessage(content=' Give a funny poem, given the user\'s query and data given below\n     [Document(page_content=\'Lateral meristem\\nuestions\\n1.\\nWhat is a tissue?\\n2.\\nWhat is the utility of tissues in\\nmulti-cellular organisms?\\n6.2 Plant Tissues\\n6.2.1 MERISTEMATIC TISSUE\\n•\\nFrom the above observations, answer\\nthe following questions:\\n1. Which of the two onions has longer\\nroots? Why?\\n2. Do the roots continue growing\\neven after we have removed their\\ntips?\\n3. Why would the tips stop growing\', metadata={\'author\': \'admin\', \'creationDate\': "D:20220822205120+05\'30\'", \'creator\': \'Bullzip PDF Printer (12.2.0.2905)\', \'file_path\': \'/content/drive/MyDrive/ncert_ocr/Class 9 Science/Class 9 - 106.pdf\', \'format\': \'PDF 1.5\', \'keywords\': \'\', \'modDate\': "D:20240520133837+05\'30\'", \'page\': 1, \'producer\': \'PDF Printer / www.bullzip.com / FG / Freeware Edition (max 10 users)\', \'source\': \'/content/drive/MyDrive/ncert_ocr/Class 9 Science

In [None]:
llm = ChatOpenAI(temperature=0, model='gpt-4o')

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor,
                                                       base_retriever=vectordb2.as_retriever(search_type="mmr",search_kwargs={"k":8}))

In [None]:
input_data = compression_retriever.get_relevant_documents(input_query)

In [None]:
chat_prompt_with_values = chat_prompt.format_prompt(query=input_query, data=input_data)
messages = chat_prompt_with_values.to_messages()
messages

[SystemMessage(content=' Give a funny poem, given the user\'s query and data given below\n     [Document(page_content=\'meristematic tissues are classified as apical,\\nlateral and intercalary (Fig. 6.2). New cells\\nproduced by meristem are initially like those\\nof meristem itself, but as they grow and\\nmature, their characteristics slowly change and\\nthey become differentiated as components of\\nother tissues.\', metadata={\'author\': \'admin\', \'creationDate\': "D:20220822205120+05\'30\'", \'creator\': \'Bullzip PDF Printer (12.2.0.2905)\', \'file_path\': \'/content/drive/MyDrive/ncert_ocr/Class 9 Science/Class 9 - 106.pdf\', \'format\': \'PDF 1.5\', \'keywords\': \'\', \'modDate\': "D:20240520133837+05\'30\'", \'page\': 1, \'producer\': \'PDF Printer / www.bullzip.com / FG / Freeware Edition (max 10 users)\', \'source\': \'/content/drive/MyDrive/ncert_ocr/Class 9 Science/Class 9 - 106.pdf\', \'subject\': \'\', \'title\': \'D:\\\\Textbooks\\\\Rationalised Books\\\\0964-Science\\

In [None]:
response = llm(messages)
response

AIMessage(content="In the world of plants, there's a tissue,\nMeristematic, it's got a special issue.\nIt divides and grows, in regions aplenty,\nApical, lateral, intercalary, count them, that's twenty!\n\nNew cells it produces, like a magic trick,\nBut as they mature, their roles they pick.\nFrom meristem they come, but not they stay,\nDifferentiated they become, in a fascinating way.\n\nThey take up roles, and then retire,\nLose the ability to divide, no more to aspire.\nThey form a permanent tissue, oh what a fate,\nIn the grand scheme of life, they participate.\n\nIn the cortex, a strip of secondary kind,\nForms layers of cells, a cork you'll find.\nSo here's to meristematic, the tissue so fine,\nIn the story of growth, it's the main headline!", response_metadata={'token_usage': {'completion_tokens': 177, 'prompt_tokens': 1280, 'total_tokens': 1457}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-20a54f16-5087-41eb-822e-bc3854

In [None]:
print(response.content)

In the world of plants, there's a tissue,
Meristematic, it's got a special issue.
It divides and grows, in regions aplenty,
Apical, lateral, intercalary, count them, that's twenty!

New cells it produces, like a magic trick,
But as they mature, their roles they pick.
From meristem they come, but not they stay,
Differentiated they become, in a fascinating way.

They take up roles, and then retire,
Lose the ability to divide, no more to aspire.
They form a permanent tissue, oh what a fate,
In the grand scheme of life, they participate.

In the cortex, a strip of secondary kind,
Forms layers of cells, a cork you'll find.
So here's to meristematic, the tissue so fine,
In the story of growth, it's the main headline!


In [None]:
chat = ChatOpenAI(openai_api_key=openai_api_key, streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)
resp = chat(chat_prompt_with_values.to_messages())

In the plant, there's a tissue so fine,
Meristematic, it's the dividing line.
Apical, lateral, and intercalary too,
These tissues know just what to do.

New cells they create, fresh and green,
Like a meristem, they start unseen.
But as they grow, their fate is clear,
Differentiation whispers in their ear.

They lose the power to divide and clone,
Becoming part of other tissues, they're not alone.
So next time you see a plant so grand,
Remember the meristematic tissue, oh so grand!

##### Google Search Feature

In [None]:
!pip install google-search-results

Collecting google-search-results
  Downloading google_search_results-2.4.2.tar.gz (18 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: google-search-results
  Building wheel for google-search-results (setup.py) ... [?25l[?25hdone
  Created wheel for google-search-results: filename=google_search_results-2.4.2-py3-none-any.whl size=32004 sha256=27f338300084525bd7e7823adc9d3368577f82281aa5379d01b3366a3b2b2b5a
  Stored in directory: /root/.cache/pip/wheels/d3/b2/c3/03302d12bb44a2cdff3c9371f31b72c0c4e84b8d2285eeac53
Successfully built google-search-results
Installing collected packages: google-search-results
Successfully installed google-search-results-2.4.2


In [None]:
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.llms import OpenAI
from google.colab import userdata
os.environ['SERPAPI_API_KEY']=userdata.get('SERP_API_KEY')

In [None]:
llm = OpenAI(temperature=0)

In [None]:
tool_names = ["serpapi"]
tools = load_tools(tool_names)

In [None]:
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)

  warn_deprecated(


In [None]:
agent.run(input_query)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m Meristematic tissue is a type of plant tissue that is responsible for growth and development.
Action: Search
Action Input: "meristematic tissue"[0m
Observation: [36;1m[1;3mMeristematic tissues are cells or group of cells that have the ability to divide. These tissues in a plant consist of small, densely packed cells that can keep dividing to form new cells.[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: Meristematic tissue is a type of plant tissue that is responsible for growth and development by continuously dividing and producing new cells.[0m

[1m> Finished chain.[0m


str

##### Wolfram Alpha Integration

In [None]:
tool_names = ["wolfram-alpha"]
tools = load_tools(tool_names)

#### Ensemble Retrieval

In [None]:
!pip install rank_bm25

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2


In [None]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever

In [None]:
vector_db_bm25 = BM25Retriever.from_documents(documents = split_documents, embedding = embeddings)
vector_db_bm25.k = 4

In [None]:
vectordb_chroma = Chroma.from_documents(documents = split_documents, embedding = embeddings)
vectordb_chroma = vectordb_chroma.as_retriever(search_kwargs={"k": 4})

In [None]:
ensemble_retriever = EnsembleRetriever(retrievers=[vector_db_bm25, vectordb_chroma], weights=[0.5, 0.5], verbose = True)

In [None]:
ensemble_docs = ensemble_retriever.get_relevant_documents("what is the ovary?")
ensemble_docs

[Document(page_content='using a lens (Fig. 4.26). Do you see some\nsmall bead like structures inside the\novary? They are called ovules. Draw and\nlabel the inner parts of the ovary in your\nnotebook.\nTry to find out the names of as many\nflowers as you can by asking the\ngardener or any other person.\nRemember, not to pluck more flowers\nthan you need. Based on what you have\nfilled in Table 4.3, answer the following\nquestions.', metadata={'source': '/content/drive/MyDrive/ncert_ocr/Class 6 Science/Class 6 - 104.pdf', 'file_path': '/content/drive/MyDrive/ncert_ocr/Class 6 Science/Class 6 - 104.pdf', 'page': 10, 'total_pages': 14, 'format': 'PDF 1.6', 'title': 'Chapter 4.pmd', 'author': 'admin', 'subject': '', 'keywords': '', 'creator': 'PageMaker 7.0', 'producer': 'Acrobat Distiller 10.1.16 (Windows)', 'creationDate': "D:20220912091940+05'30'", 'modDate': "D:20230301123816+05'30'", 'trapped': ''}),
 Document(page_content='Science\n124\nWhen a girl is born, the ovaries already contai