#Retrival using RAG#

In [11]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = ''

In [12]:
from langchain_community.document_loaders import UnstructuredHTMLLoader

loader = UnstructuredHTMLLoader("docs.html")
docs = loader.load()

In [13]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

In [14]:
splits = text_splitter.split_documents(docs)


In [15]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from tqdm import tqdm

In [16]:
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


In [17]:
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embedding_model
)

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [18]:
retriever = vectorstore.as_retriever()

In [19]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForCausalLM.from_pretrained(
    "phi4-mini-instruct-transformers-default-v1",
    device_map="auto",
    quantization_config=bnb_config
)

The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers and GPU quantization are unavailable.
Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 2/2 [01:11<00:00, 35.75s/it]


In [7]:
from transformers import pipeline
from langchain_community.llms import HuggingFacePipeline

model_path = "phi4-mini-instruct-transformers-default-v1"
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [8]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0,
    do_sample=False
)

Device set to use cpu
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


In [9]:
llm_local_phi = HuggingFacePipeline(pipeline=pipe)

  llm_local_phi = HuggingFacePipeline(pipeline=pipe)


In [20]:
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_rag_fusion 
    | llm_local_phi 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [21]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

In [22]:
question = "What is task decomposition for LLM agents?"
retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given
  (loads(doc), score)


12

In [23]:
docs

[(Document(metadata={'source': 'docs.html'}, page_content='}\n]'), 0.05),
 (Document(metadata={'source': 'docs.html'}, page_content='[9] Laskin et al. “In-context Reinforcement Learning with Algorithm Distillation” ICLR 2023.\n\n[10] Karpas et al. “MRKL Systems A modular, neuro-symbolic architecture that combines large language models, external knowledge sources and discrete reasoning.” arXiv preprint arXiv:2205.00445 (2022).\n\n[11] Nakano et al. “Webgpt: Browser-assisted question-answering with human feedback.” arXiv preprint arXiv:2112.09332 (2021).\n\n[12] Parisi et al. “TALM: Tool Augmented Language Models”\n\n[13] Schick et al. “Toolformer: Language Models Can Teach Themselves to Use Tools.” arXiv preprint arXiv:2302.04761 (2023).\n\n[14] Weaviate Blog. Why is Vector Search so fast? Sep 13, 2022.\n\n[15] Li et al. “API-Bank: A Benchmark for Tool-Augmented LLMs” arXiv preprint arXiv:2304.08244 (2023).'),
  0.04838709677419355),
 (Document(metadata={'source': 'docs.html'}, page_con

In [4]:
import sys
!{sys.executable} -m pip install bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-win_amd64.whl.metadata (10 kB)
Downloading bitsandbytes-0.46.0-py3-none-win_amd64.whl (66.5 MB)
   ---------------------------------------- 0.0/66.5 MB ? eta -:--:--
   ------------------- -------------------- 32.8/66.5 MB 160.1 MB/s eta 0:00:01
   -------------------------- ------------- 44.8/66.5 MB 109.8 MB/s eta 0:00:01
   ---------------------------------------  66.3/66.5 MB 114.3 MB/s eta 0:00:01
   ---------------------------------------  66.3/66.5 MB 114.3 MB/s eta 0:00:01
   ---------------------------------------  66.3/66.5 MB 114.3 MB/s eta 0:00:01
   ---------------------------------------  66.3/66.5 MB 114.3 MB/s eta 0:00:01
   ---------------------------------------- 66.5/66.5 MB 46.5 MB/s eta 0:00:00
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.46.0
