# RAG Search and Retrieval

## Environment Setup

In [None]:
%%capture

# installations
!pip install --quiet sentence_transformers transformers torch peft huggingface_hub kaggle pinecone lark rank_bm25 langchain_huggingface langchain_experimental langchain_pinecone 

# THE REGS
import pandas as pd
import numpy as np
import kagglehub
import torch
import os
import time

# Transformers
from transformers import AutoTokenizer, AutoModel
import torch

# PINECONE
from pinecone import Pinecone
from pinecone import ServerlessSpec

# LANGCHAIN
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_core.documents import Document

# Kaggle environment
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

In [None]:
# Get model
model_name = "BAAI/bge-large-zh-v1.5"
model = AutoModel.from_pretrained(model_name)

print("About the model: \n\n", model.config, "\n")

# Get corresponding tokenizer/encoder
tokenizer = AutoTokenizer.from_pretrained(model_name)

print("About the tokenizer: \n\n", tokenizer)

In [None]:
# get access to populated index
pc = Pinecone(api_key=user_secrets.get_secret("PINECONE_API_KEY"))
index_name = "book-vector-store"
index = pc.Index(index_name)

In [None]:
# create embedding object using the model
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embedder = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [None]:
## create PineconeVectorStore object
vector_store = PineconeVectorStore(index=index, embedding=embedder)

## Test out the search

> A similarity_search on a PineconeVectorStore object returns a list of LangChain Document objects most similar to the query provided. While the similarity_search uses a Pinecone query to find the most similar results, this method includes additional steps and returns results of a different type.The similarity_search method accepts raw text and automatically embeds it using the Embedding object provided when you initialized the PineconeVectorStore. You can also provide a k value to determine the number of LangChain Document objects to return. The default value is k=4.


In [None]:
## Example
#query = "Who is Ketanji Brown Jackson?"
    #vectorstore.similarity_search(query)
    
    # Response:
    # [
    #    Document(page_content='Ketanji Onyika Brown Jackson is an American lawyer and jurist who is an associate justice of the Supreme Court of the United...', metadata={'chunk': 0.0, 'source': 'https://en.wikipedia.org/wiki/Ketanji_Brown_Jackson', 'title': 'Ketanji Brown Jackson', 'wiki-id': '6573'}),  
    #    Document(page_content='Jackson was nominated to the Supreme Court by President Joe Biden on February 25, 2022, and confirmed by the U.S. Senate...', metadata={'chunk': 1.0, 'source': 'https://en.wikipedia.org/wiki/Ketanji_Brown_Jackson', 'title': 'Ketanji Brown Jackson', 'wiki-id': '6573'}),  
    #    Document(page_content='Jackson grew up in Miami and attended Miami Palmetto Senior High School. She distinguished herself as a champion debater...', metadata={'chunk': 3.0, 'source': 'https://en.wikipedia.org/wiki/Ketanji_Brown_Jackson', 'title': 'Ketanji Brown Jackson', 'wiki-id': '6573'}),
    #    Document(page_content='After high school, Jackson matriculated at Harvard University to study government, having applied despite her guidance...', metadata={'chunk': 5.0, 'source': 'https://en.wikipedia.org/wiki/Ketanji_Brown_Jackson', 'title': 'Ketanji Brown Jackson', 'wiki-id': '6573'})
    # ]

You can also optionally apply a metadata filter to your similarity search. The filtering query language is the same as for Pinecone queries, as detailed in [Filtering with metadata](https://docs.pinecone.io/guides/index-data/indexing-overview#metadata).

In [None]:
## Example
   # query = "Tell me more about Ketanji Brown Jackson."
   #  vectorstore.similarity_search(query, filter={'source': 'https://en.wikipedia.org/wiki/Ketanji_Brown_Jackson'})

### Test out RAG with Langchain
In RAG, you take the query as a question that is to be answered by a LLM, but the LLM must answer the question based on the information it is seeing from the vectorstore.

In [None]:
## Example
# from langchain_openai import ChatOpenAI  
# from langchain.chains import RetrievalQA  
# # completion llm  
# llm = ChatOpenAI(  
#     openai_api_key=OPENAI_API_KEY,  
#     model_name='gpt-3.5-turbo',  
#     temperature=0.0  
# )  
# qa = RetrievalQA.from_chain_type(  
#     llm=llm,  
#     chain_type="stuff",  
#     retriever=vectorstore.as_retriever()  
# )  
# qa.invoke(query)  

# Response:
# Benito Mussolini was an Italian politician and journalist who served as the Prime Minister of Italy from 1922 until 1943. He was the leader of the National Fascist Party and played a significant role in the rise of fascism in Italy...