In [22]:
!pip install --upgrade --quiet  pypdf langchain langchain-community langchain-core langchain-experimental langchain-text-splitters langchain-ollama pinecone-client pinecone-text pinecone-notebooks

In [23]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="mistral-nemo:latest",
    temperature=0,
)

In [24]:
import os
HOME = os.getcwd()
print(HOME)

/Users/shubhamrathod/PycharmProjects/RAG_Pipeline


# HYBRID RAG With Local Model and PineCone

## Load the Document

In [25]:
from langchain_community.document_loaders import PyPDFLoader

file_path = f'{HOME}/yolov10.pdf'
loader = PyPDFLoader(file_path=file_path)
pages = loader.load()

In [26]:
len(pages)

18

In [27]:
pages[0]

Document(metadata={'source': '/Users/shubhamrathod/PycharmProjects/RAG_Pipeline/yolov10.pdf', 'page': 0}, page_content='YOLOv10: Real-Time End-to-End Object Detection\nAo Wang Hui Chen∗Lihao Liu Kai Chen Zijia Lin\nJungong Han Guiguang Ding∗\nTsinghua University\n/uni00000015/uni00000011/uni00000018 /uni00000018/uni00000011/uni00000013 /uni0000001a/uni00000011/uni00000018 /uni00000014/uni00000013/uni00000011/uni00000013 /uni00000014/uni00000015/uni00000011/uni00000018 /uni00000014/uni00000018/uni00000011/uni00000013 /uni00000014/uni0000001a/uni00000011/uni00000018 /uni00000015/uni00000013/uni00000011/uni00000013\n/uni0000002f/uni00000044/uni00000057/uni00000048/uni00000051/uni00000046/uni0000005c/uni00000003/uni0000000b/uni00000050/uni00000056/uni0000000c/uni00000016/uni0000001a/uni00000011/uni00000018/uni00000017/uni00000013/uni00000011/uni00000013/uni00000017/uni00000015/uni00000011/uni00000018/uni00000017/uni00000018/uni00000011/uni00000013/uni00000017/uni0000001a/uni00000011/uni000

# Split the Document

In [28]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=8000,
    chunk_overlap=3000,
    length_function=len,
    is_separator_regex=False,
)

In [29]:
docs = text_splitter.split_documents(pages)

In [30]:
len(docs)

18

In [31]:
docs[0]

Document(metadata={'source': '/Users/shubhamrathod/PycharmProjects/RAG_Pipeline/yolov10.pdf', 'page': 0}, page_content='YOLOv10: Real-Time End-to-End Object Detection\nAo Wang Hui Chen∗Lihao Liu Kai Chen Zijia Lin\nJungong Han Guiguang Ding∗\nTsinghua University\n/uni00000015/uni00000011/uni00000018 /uni00000018/uni00000011/uni00000013 /uni0000001a/uni00000011/uni00000018 /uni00000014/uni00000013/uni00000011/uni00000013 /uni00000014/uni00000015/uni00000011/uni00000018 /uni00000014/uni00000018/uni00000011/uni00000013 /uni00000014/uni0000001a/uni00000011/uni00000018 /uni00000015/uni00000013/uni00000011/uni00000013\n/uni0000002f/uni00000044/uni00000057/uni00000048/uni00000051/uni00000046/uni0000005c/uni00000003/uni0000000b/uni00000050/uni00000056/uni0000000c/uni00000016/uni0000001a/uni00000011/uni00000018/uni00000017/uni00000013/uni00000011/uni00000013/uni00000017/uni00000015/uni00000011/uni00000018/uni00000017/uni00000018/uni00000011/uni00000013/uni00000017/uni0000001a/uni00000011/uni000

# Setting Up Pinecone

In [35]:
pinecone_api_key = ''

In [58]:
import os

from pinecone import Pinecone, ServerlessSpec

index_name = "langchain-pinecone-hybrid-search"

# initialize Pinecone client
pc = Pinecone(api_key=pinecone_api_key)

# create the index
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1024,  # dimensionality of dense model
        metric="dotproduct",  # sparse values supported only for dotproduct
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )

In [59]:
index = pc.Index(index_name)

In [60]:
index

<pinecone.data.index.Index at 0x13088aec0>

# Creating Embeddings

### Dense Vector

In [39]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="mxbai-embed-large:latest")

### Sparse Vector

In [48]:
# To encode the text to sparse values you can either choose SPLADE or BM25
from pinecone_text.sparse import BM25Encoder

# or from pinecone_text.sparse import SpladeEncoder if you wish to work with SPLADE

# use default tf-idf values
bm25_encoder = BM25Encoder().default()

### The above code is using default tfids values. It's highly recommended to fit the tf-idf values to your own corpus.

In [49]:
corpus = []

for doc in docs:
    content = doc.page_content
    corpus.append(content)

In [50]:
len(corpus)

18

In [51]:
# print(corpus[0])

In [56]:
# Run this only if error pop up while running below code
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shubhamrathod/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/shubhamrathod/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [57]:
# fit tf-idf values on your corpus
bm25_encoder.fit(corpus)

# store the values to a json file
bm25_encoder.dump("bm25_values.json")

# load to your BM25Encoder object
bm25_encoder = BM25Encoder().load("bm25_values.json")

100%|██████████| 18/18 [00:00<00:00, 72.95it/s]


# Create Retriever

In [61]:
from langchain_community.retrievers import PineconeHybridSearchRetriever

retriever = PineconeHybridSearchRetriever(
    embeddings=embeddings,
    sparse_encoder=bm25_encoder,
    index=index
)

In [62]:
retriever

PineconeHybridSearchRetriever(embeddings=OllamaEmbeddings(model='mxbai-embed-large:latest'), sparse_encoder=<pinecone_text.sparse.bm25_encoder.BM25Encoder object at 0x1315abf70>, index=<pinecone.data.index.Index object at 0x13088aec0>)

## Use Retriever

In [63]:
retriever.add_texts(corpus)

100%|██████████| 1/1 [00:04<00:00,  4.14s/it]


In [64]:
result = retriever.invoke("Implementation Details of Yolov10")

In [65]:
result

[Document(page_content='mixing, as shown in Fig. 3.(b). It can serve as the efficient basic building block, e.g., embedded in the\nELAN structure [ 58,20] (Fig. 3.(b)). Then, we advocate a rank-guided block allocation strategy to\nachieve the best efficiency while maintaining competitive capacity. Specifically, given a model, we\nsort its all stages based on their intrinsic ranks in ascending order. We further inspect the performance\nvariation of replacing the basic block in the leading stage with CIB. If there is no performance\ndegradation compared with the given model, we proceed with the replacement of the next stage and\nhalt the process otherwise. Consequently, we can implement adaptive compact block designs across\nstages and model scales, achieving higher efficiency without compromising performance. Due to the\npage limit, we provide the details of the algorithm in the appendix.\nAccuracy driven model design. We further explore the large-kernel convolution and self-attention\n

# Chain
Memory - Prompt

In [67]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [68]:
llm = ChatOllama(
    model="mistral-nemo:latest",
    temperature=0,
)

## Defining System Prompt

In [77]:
system_prompt = (
    """
      You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer
      the question. Base your respose only based on this context, If you don't know the answer, say that you don't know. Use three sentences maximum and keep the
      answer concise.
      \n\n
      {context}
    """
)

## Contextualizing the Question

In [78]:
contextualize_q_system_prompt = (
    """
    Given a chat history and the latest user question which might reference context in the chat history,
    formulate a standalone question which can be understood without the chat history. Do NOT answer the question,
    just reformulate it if needed and otherwise return it as is.
    """
)

In [79]:
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

history_aware_retriever = create_history_aware_retriever(
    llm,
    retriever,
    contextualize_q_prompt
)

## Create Retrieval Chain

In [80]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

# Run Chain

In [81]:
chat_history = []

question = "What is Yolo?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})

chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

print(ai_msg_1['answer'])

"YOLO" can stand for several things, depending on the context. (1) In popular culture, it's often used as an acronym for "You Only Live Once," which encourages living life to its fullest without regrets. Here are a few other meanings:

- **In computing and artificial intelligence:**
  - **YOLO** is also the name of a real-time object detection system introduced in 2016 by Joseph Redmon et al. It's known for its speed and accuracy, making it popular in applications like self-driving cars and security cameras.
  - **YOLOv3**, **YOLOv4**, **YOLOv5**, etc., are successive versions of this system.

- **In social media:**
  - #Yolo is sometimes used as a hashtag to accompany posts about living life adventurously or making spontaneous decisions.


In [82]:
question = "What are the implemenattion detail of Yolov10?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})

chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

print(ai_msg_1['answer'])

Yolov10 is a real-time object detector introduced in the paper "YOLOv10: A New Real-Time End-to-End Object Detector" by Li et al. Here are some key implementation details of Yolov10:

1. **Consistent Dual Assignments Strategy**:
   - **One-to-Many Branch (Training)**: During training, each ground truth object is assigned to multiple anchors with the highest IoU overlap. This provides rich supervision.
   - **One-to-One Branch (Inference)**: During inference, each predicted box is assigned to a single ground truth object based on the maximum IoU overlap. This ensures high efficiency.
   - **Consistent Matching Metric**: To reduce the supervision gap between training and inference, Yolov10 introduces a consistent matching metric that considers both branches during training.

2. **Efficiency-Accuracy Driven Model Design**:
   - **Lightweight Classification Head**: Yolov10 uses a lightweight classification head with fewer parameters to reduce computational redundancy.
   - **Spatial-Channe

In [83]:
question = "What is the conclusion we obtain from  Yolov10 paper?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})

chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

print(ai_msg_1['answer'])

Based on the provided text from the YOLOv10 paper, here are the key conclusions:

1. **Superior Performance and Efficiency**: YOLOv10 outperforms other state-of-the-art object detectors like YOLOv8, RT-DETR, Gold-YOLO, and YOLOv9 in terms of Average Precision (AP) while having fewer parameters and lower computational costs.

2. **Real-Time Detection**: YOLOv10 demonstrates significant improvements in inference speed compared to other methods. For instance, YOLOv10-S/X achieves 1.8x/1.3x faster inference speed than RT-DETR-R18/R101 under similar performance.

3. **Effectiveness of Architectural Designs**: The paper shows that the architectural designs used in YOLOv10, such as NMS-free training with consistent dual assignments and efficiency-driven model design, lead to substantial reductions in latency and improvements in AP.

4. **State-of-the-Art Performance Across Different Model Scales**: When compared using the original one-to-many training approach, YOLOv10 exhibits state-of-the-a

In [84]:
question = "Based on the paper, Yolov10 outperforms how many other model and which are those?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})

chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

print(ai_msg_1['answer'])

According to the provided text, YOLOv10 outperforms YOLOv6-3.0-N/S by 1.5 AP and 2.0 AP at small/medium object sizes respectively. Here's a summary of the performance comparison:

| Model | AP (small) | AP (medium) |
|---|---|---|
| YOLOv6-3.0-N/S | ? | ? |
| **YOLOv10** | **+1.5** | **+2.0** |

However, without the specific AP values for YOLOv6-3.0-N/S, we cannot determine by how much YOLOv10 outperforms them numerically. The text only mentions the improvement in terms of Average Precision (AP) points.

Additionally, the paper mentions that YOLOv10 achieves state-of-the-art performance and efficiency trade-offs compared with other advanced detectors, but it doesn't explicitly mention which specific models it outperforms besides YOLOv6-3.0-N/S. To get a complete list of outperformed models, you would need to refer to the full paper or the detailed performance tables provided in the text (Table 15).
