## Install the required packages

In [7]:
%pip install -qU transformers accelerate sentence_transformers langchain_community langchain-huggingface  pymilvus langchain langchain-milvus langchain-openai

Note: you may need to restart the kernel to use updated packages.


In [8]:
%pip install --upgrade --quiet  rank_bm25

Note: you may need to restart the kernel to use updated packages.


In [9]:
%pip install --upgrade --quiet  flashrank

Note: you may need to restart the kernel to use updated packages.


## Import the necessary modules

In [10]:
import pandas as pd
import numpy as np
from langchain.docstore.document import Document
#from nltk.corpus import wordnet
from nltk.stem import PorterStemmer
import nltk
#from langchain_milvus.utils.sparse import BM25SparseEmbedding
from langchain.retrievers import EnsembleRetriever
#from transformers import DPRContextEncoder, DPRContextEncoderTokenizerFast
#from langchain_milvus.retrievers import MilvusCollectionHybridSearchRetriever
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_community.retrievers import BM25Retriever

In [11]:
#pickle helper functions for loading the raptor index
import pickle
def save_chunk_to_pickle(results,file_path):
    with open(file_path,'wb') as f:
        pickle.dump(results,f)
        
        
def load_chunks_to_pickle(file_path):
    with open(file_path,'rb') as f:
        embedding_with_metadata = pickle.load(f)
    return embedding_with_metadata


## Load the raptor index ,format the metadata and do collapsed tree retrieval mentioned in the paper

In [12]:
# load the raptor index output from the raptor-index_final_kaggle.ipynb notebook
raptor_results = load_chunks_to_pickle("/kaggle/input/raptor-index/rec_results_full.pkl")

In [13]:
# we can see the metadata needs a bit of formatting 
raptor_results[5][1]

Unnamed: 0,text_chunk,depth,cluster,metadata
0,The text provides a comprehensive overview of ...,5,0,[[[[[{'book_name': 'Introduction to Autonomous...


In [14]:
# helper function to flattern the list of lists dict in metadata
def flatten_metadata(metadata):
    """Flatten nested metadata into a list of dictionaries."""
    if isinstance(metadata, list):
        # Check if the list contains only dictionaries
        if all(isinstance(item, dict) for item in metadata):
            return metadata
        else:
            # Flatten any nested lists
            flat_list = []
            for item in metadata:
                if isinstance(item, list):
                    # Recursively flatten nested lists
                    flat_list.extend(flatten_metadata(item))
                else:
                    # Add non-list items directly
                    flat_list.append(item)
            return flat_list
    else:
        # If metadata is not a list, return as a single-item list
        return [metadata]

In [15]:
# transform the metadata
for depth in sorted(raptor_results.keys()):
    raptor_results[depth][1]["metadata"] = raptor_results[depth][1]['metadata'].apply(flatten_metadata)


In [16]:
#checking 
raptor_results[5][1]

Unnamed: 0,text_chunk,depth,cluster,metadata
0,The text provides a comprehensive overview of ...,5,0,[{'book_name': 'Introduction to Autonomous Mob...


In [17]:
#Collapsed tree retrieval

#get the original text chunks and metadata
all_textbook_text = raptor_results[1][0]["text_chunk"].tolist()
all_textbook_metadata = raptor_results[1][0]["metadata"].tolist()


for depth in sorted(raptor_results.keys()):
    
    # extract the textbook summaries from the current depth
    text_summaries = raptor_results[depth][1]["text_chunk"].tolist()
    text_summaries_metadata = raptor_results[depth][1]["metadata"].tolist()
    
    
    all_textbook_text.extend(text_summaries)
    all_textbook_metadata.extend(text_summaries_metadata)
 
    
    


In [18]:
#checking if the lengths are same

print(len(all_textbook_metadata))
print(len(all_textbook_text))

5087
5087


## Add the Raptor index to a Milvus db using Milvus lite

In [19]:
# format the data to document format in langchain
# Create Document objects with texts and their corresponding metadata
from langchain.docstore.document import Document


documents = []

for text, metadata in zip(all_textbook_text,all_textbook_metadata):
        if isinstance(metadata,dict):
                # single dict case
                documents.append(Document(page_content=text,metadata={"metadata":metadata}))
        elif isinstance(metadata,list) and all(isinstance(item,dict) for item in metadata):
                #list of dicts case
                documents.append(Document(page_content=text,metadata={"metadata":metadata}))
        else:   
                #exception
                print(f"Unexpected metadata format : {metadata}")
          

In [20]:
#check if the documents are all there
len(documents)

5087

In [21]:
from sentence_transformers import SentenceTransformer
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_milvus import Milvus


# define the embedding model for the db
embd_model = HuggingFaceEmbeddings(model_name='multi-qa-MiniLM-L6-cos-v1')
EMBD_DIM = embd_model.dict()['client'].get_sentence_embedding_dimension()
print(EMBD_DIM)



# add the collection to the db
vectorstore = Milvus.from_documents(
                         documents=documents, embedding=embd_model,
                         connection_args={"uri":"milvus_robo_qa.db"},
                        collection_name="robotics_textbooks",
                        drop_old =True,
                        index_params={"metric_type":"COSINE"})
                    



  from tqdm.autonotebook import tqdm, trange
2024-07-24 18:18:32.611020: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-24 18:18:32.611121: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-24 18:18:32.758183: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/11.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/383 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

384


### Testing the Milvus db with similarity search

In [22]:
#testing the db
query = "What is reinforcement learning?"
docs_ans = vectorstore.similarity_search(query)
docs_ans[0].page_content

'Reinforcement learning involves trying different things and seeing what happens; if goodthings happen, we tend to do the behavior again, and if bad things happen, we tend to avoid it. This basic process turns out to be a remarkably versatile tool for learn- ing. It allows robots to learn what to do and not to do in various situations. Consider a typical reactive controller that tells the robot how to react underdifferent sensory inputs.'

In [23]:
#testing the db
query = "What is inverse kinematics?"
docs_ans = vectorstore.similarity_search(query)
docs_ans[0].page_content

'This con-version from a Cartesian (x,y ,z) position of the endpoint (e.g., a ﬁngertip) andthe angles of the whole manipulator (e.g., an arm) is called inverse kinematics . INVERSE KINEMATICS The name refers to the fact that this is the opposite of the simpler process of ﬁguring out where the endpoint of the manipulator is given the joint angles for all of the joints. That was kinematics , presented earlier in this chapter. The'

In [24]:
#define milvus retriever 
milvus_retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# Retrieval techniques

## Query expansion helper functions - Using LLM (Step-Back prompting)

In [40]:
from kaggle_secrets import UserSecretsClient
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import OpenAIEmbeddings,ChatOpenAI
#from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
from langchain_core.runnables import RunnableLambda


user_secrets = UserSecretsClient()
openai_key = user_secrets.get_secret("OPEN_AI_KEY")

def stepback_prompting_expansion(query):
    
    # Step- back prompting
    examples = [
    {
        "input": "Could the members of The Police perform lawful arrests?",
        "output": "what can the members of The Police do?",
    },
    {
        "input": "Jan Sindel’s was born in what country?",
        "output": "what is Jan Sindel’s personal history?",

        "input" : "Is it possible to get forward kinematics from inverse in robotics?",
        "output": "what is the relationship between forward and inverse kinematics in robotics?",
    },
    ]
    
    
    # transform these to example messages
    example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
        ]
    )
    few_shot_prompt = FewShotChatMessagePromptTemplate(
        example_prompt=example_prompt,
        examples=examples,
    )


    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:""",
            ),
            # Few shot examples
            few_shot_prompt,
            # New question
            ("user", "{question}"),
        ]
    )


    llm_model = "gpt-4o-mini"
    prompt_model= ChatOpenAI(model_name= llm_model,temperature=0,openai_api_key = openai_key)
    question_gen = prompt | prompt_model | StrOutputParser()
    few_shot_ques = question_gen.invoke({"question":query})
    
    return few_shot_ques



## Hybrid retrieval  - BM25 + vector_store_retriever

In [29]:
#initialize the retrievers
#BM25 
bm25_retriever = BM25Retriever.from_documents(documents)
bm25_retriever.k = 3

In [30]:
#combine the retrievers
ensemble_retriever = EnsembleRetriever(retrievers=[milvus_retriever,bm25_retriever] )

    

## Reranking 

In [32]:
#reranking using FlashRank using - "ms-marco-MultiBERT-L-2-v2"
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import FlashrankRerank

compressor = FlashrankRerank()
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor,base_retriever=ensemble_retriever)


# Question Answering section

In [84]:
#helper function for deduplicating the metadata
def deduplicate_dicts(dict_list):
    #convert dictionaries to tuples of sorted key-val pairs
    dict_tuples = [tuple(sorted(d.items()))for d in dict_list]
    
    # remove duplicates by converting the list to a set
    unique_tuples = set(dict_tuples)
    
    unique_dicts = [dict(t) for t in unique_tuples]
    return unique_dicts

#helper function to extract metadata
def consolidate_pages(dict_list):
    # Create a dictionary to store book names and sets of page numbers
    book_pages = {}

    for entry in dict_list:
        book_name = entry['book_name']
        page_number = entry['page_number']
        if book_name not in book_pages:
            book_pages[book_name] = set()
        book_pages[book_name].add(page_number)
    
    # Format the output
    consolidated_output = []
    for book_name, pages in book_pages.items():
        pages_list = sorted(list(pages))
        consolidated_output.append(f"Book_name: {book_name}, Page_numbers: {tuple(pages_list)}")

    return " ; ".join(consolidated_output)


In [41]:

from langchain.chains import RetrievalQA


# QA llm 
llm_model = "gpt-4o-mini"
QA_llm = ChatOpenAI(model_name= llm_model,temperature=0,openai_api_key = openai_key)



In [43]:


# question answering template
QA_template = """Answer the question about the field of robotics based only on the given context.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
:
{context}
Question: {question}
Answer:"""

#qa chain prompt
QA_chain_prompt = PromptTemplate.from_template(QA_template)
    

#define the QA chain
QA_chain = RetrievalQA.from_chain_type(QA_llm,retriever= compression_retriever,
                                       return_source_documents=True,
                                     chain_type_kwargs={"prompt":QA_chain_prompt})

# Testing the robotics textbook QA-bot

## Question 1

## Perform query expansion with Step-back prompting

In [37]:
#query expansion
query = "can you tell me what is inverse kinematics and how we can get it from forward kinematics?."
expanded_query = stepback_prompting_expansion(query)


In [38]:
expanded_query

'what is the concept of inverse kinematics and how does it relate to forward kinematics?'

## Pass the Step-back prompt to the Question answering chain

In [47]:
result = QA_chain.invoke({"query": expanded_query})
result["result"]

"Inverse kinematics is the process of calculating the joint parameters (angles or positions) required to achieve a desired position and orientation of the end effector of a robotic manipulator. It is often more complex than forward kinematics, which is the process of determining the position and orientation of the end effector based on given joint parameters. While forward kinematics provides a straightforward calculation from joint angles to end effector position, inverse kinematics involves solving nonlinear equations that may have multiple solutions or may not have a solution at all, depending on the manipulator's configuration and joint limits."

In [56]:
result["source_documents"]

[Document(metadata={'metadata': {'book_name': 'Introduction-to-Robotics-3rd-edition', 'page_number': 15}, 'relevance_score': 0.9995482}, page_content='Section 1.2The mechanics and control of mechanical manipulators 7joint space to Cartesian space is needed. These days, however, it is rare to find anindustrial robot that lacks this basic inverse kinematic algorithm.The inverse kinematics problem is not as simple as the forward kinematicsone. Because the kinematic equations are nonlinear, their solution is not alwayseasy (or even possible) in a closed form.'),
 Document(metadata={'metadata': [{'book_name': 'Introduction-to-Robotics-3rd-edition', 'page_number': 138}, {'book_name': 'Introduction-to-Robotics-3rd-edition', 'page_number': 139}, {'book_name': 'Introduction-to-Robotics-3rd-edition', 'page_number': 139}, {'book_name': 'Introduction-to-Robotics-3rd-edition', 'page_number': 139}, {'book_name': 'Introduction-to-Robotics-3rd-edition', 'page_number': 140}, {'book_name': 'Introduction

## Question - 2

In [61]:
#query expansion
query = "How can I calculate the pos matrix in robotics?."
expanded_query = stepback_prompting_expansion(query)
print(expanded_query)


what methods are used to calculate position matrices in robotics?


In [62]:
result = QA_chain.invoke({"query": expanded_query})
result["result"]

"The text does not specify the exact methods used to calculate position matrices in robotics. It mentions practical examples and exercises, such as calculating transformation matrices and verifying results using MATLAB, but does not provide detailed methods. Therefore, I don't know the specific methods used to calculate position matrices in robotics."

## Question 3

In [67]:
query = "What is singularity in robotics?."
expanded_query = stepback_prompting_expansion(query)
print(expanded_query)

what does the term singularity mean in the context of robotics?


In [68]:
result = QA_chain.invoke({"query": expanded_query})
result["result"]

'In the context of robotics, the term "singularity" refers to a configuration of a manipulator where the Jacobian matrix becomes non-invertible, leading to a loss of degrees of freedom. At a singularity, the manipulator may experience infinite joint rates as it approaches the singular point, which can result in difficulties in controlling the robot\'s movement and can limit its ability to exert forces or perform tasks effectively.'

## Question 4

In [74]:
query = "What happens when det of Jacobian becomes zero in robotics?."
expanded_query = stepback_prompting_expansion(query)
print(expanded_query)

what are the implications of a zero determinant in a Jacobian matrix in robotics?


In [75]:
result = QA_chain.invoke({"query": expanded_query})
result["result"]

'A zero determinant in a Jacobian matrix indicates a singular configuration, meaning the Jacobian becomes non-invertible. This leads to infinite joint rates or forces, which can cause problems with the motion of the robotic arm in the vicinity of that configuration. It signifies a loss of degrees of freedom in the manipulator, making it difficult or impossible to achieve certain desired velocities or forces at the end-effector. This is critical for understanding the limitations of robotic motion and control.'

## Question 5

In [79]:
query = "What is the main field of robotics?."
expanded_query = stepback_prompting_expansion(query)
print(expanded_query)

What are the primary areas of study within robotics?


In [80]:
result = QA_chain.invoke({"query": expanded_query})
result["result"]

'The primary areas of study within robotics are mechanical manipulation, locomotion, computer vision, and artificial intelligence.'

book_name: Introduction-to-Robotics-3rd-edition, page_numbers: (15, 20) ; book_name: Introduction-to-Robotics-2nd-edition, page_numbers: (10, 12)


book_name: Introduction-to-Robotics-3rd-edition, page_numbers: (15, 138, 139, 140) ; book_name: mataric-primer, page_numbers: (163,)
