In [None]:
! pip install langchain_community tiktoken langchain-google-genai langchainhub chromadb langchain youtube-transcript-api pytube langchain-huggingface

In [None]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGSMITH_PROJECT'] = "langsmith_project_MY_FIRST_RAG_EVER"
os.environ['LANGCHAIN_API_KEY'] = ""

In [None]:
os.environ["GOOGLE_API_KEY"] = ""

## Part 12: Multi-representation Indexing

In [6]:
import uuid
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.storage import InMemoryByteStore
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.retrievers.multi_vector import MultiVectorRetriever


loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
docs = loader.load()

loader = WebBaseLoader("https://lilianweng.github.io/posts/2024-02-05-human-data-quality/")
docs.extend(loader.load())



In [7]:
chain = (
    {"doc": lambda x: x.page_content}
    | ChatPromptTemplate.from_template("Summarize the following document:\n\n{doc}")
    | ChatGoogleGenerativeAI(model="gemini-1.5-pro",max_retries=0)
    | StrOutputParser()
)

summaries = chain.batch(docs, {"max_concurrency": 5})



In [11]:
# The vectorstore to use to index the child chunks
vectorstore = Chroma(collection_name="summaries",
                     embedding_function=HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-mpnet-base-v2'))

# The storage layer for the parent documents
store = InMemoryByteStore()
id_key = "doc_id"

# The retriever
retriever = MultiVectorRetriever(
    vectorstore=vectorstore,
    byte_store=store,
    id_key=id_key,
)
doc_ids = [str(uuid.uuid4()) for _ in docs]

# Docs linked to summaries
summary_docs = [
    Document(page_content=s, metadata={id_key: doc_ids[i]})
    for i, s in enumerate(summaries)
]

# Add
retriever.vectorstore.add_documents(summary_docs)
retriever.docstore.mset(list(zip(doc_ids, docs)))

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  vectorstore = Chroma(collection_name="summaries",


In [12]:
query = "Memory in agents"
sub_docs = vectorstore.similarity_search(query,k=1)
sub_docs[0]

Document(metadata={'doc_id': '9e2254da-23c7-4a83-b714-edc9ef82745e'}, page_content="This blog post discusses LLM-powered autonomous agents, exploring their key components and providing examples and challenges.  LLMs serve as the agent's brain, augmented by planning, memory, and tool use.  Planning involves task decomposition (using techniques like Chain of Thought and Tree of Thoughts) and self-reflection (using methods like ReAct, Reflexion, Chain of Hindsight, and Algorithm Distillation). Memory includes short-term (in-context learning) and long-term memory (external vector stores with fast retrieval via algorithms like LSH, ANNOY, HNSW, FAISS, and ScaNN). Tool use expands LLM capabilities through external APIs and tools (as seen in MRKL, TALM, Toolformer, ChatGPT Plugins, HuggingGPT, and API-Bank).  Case studies highlight applications in scientific discovery (ChemCrow) and generative agent simulations.  The post concludes with challenges including limited context length, difficultie

In [13]:
retrieved_docs = retriever.get_relevant_documents(query,n_results=1)
retrieved_docs[0].page_content[0:500]

  retrieved_docs = retriever.get_relevant_documents(query,n_results=1)


"\n\n\n\n\n\nLLM Powered Autonomous Agents | Lil'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLil'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n|\n\n\n\n\n\n\nPosts\n\n\n\n\nArchive\n\n\n\n\nSearch\n\n\n\n\nTags\n\n\n\n\nFAQ\n\n\n\n\nemojisearch.app\n\n\n\n\n\n\n\n\n\n      LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\n \n\n\nTable of Contents\n\n\n\nAgent System Overview\n\nComponent One: Planning\n\nTask Decomposition\n\nSelf-Reflection\n\n\nComponent Two: Memory\n\nTypes of Memory\n\nMaximum Inner Product Search (MIPS"

## RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval

## ColBERT


In [None]:
! pip install -U ragatouille

In [15]:
from ragatouille import RAGPretrainedModel
RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")

artifact.metadata:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/405 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

  self.scaler = torch.cuda.amp.GradScaler()


In [16]:
import requests

def get_wikipedia_page(title: str):
    """
    Retrieve the full text content of a Wikipedia page.

    :param title: str - Title of the Wikipedia page.
    :return: str - Full text content of the page as raw string.
    """
    # Wikipedia API endpoint
    URL = "https://en.wikipedia.org/w/api.php"

    # Parameters for the API request
    params = {
        "action": "query",
        "format": "json",
        "titles": title,
        "prop": "extracts",
        "explaintext": True,
    }

    # Custom User-Agent header to comply with Wikipedia's best practices
    headers = {"User-Agent": "RAGatouille_tutorial/0.0.1 (ben@clavie.eu)"}

    response = requests.get(URL, params=params, headers=headers)
    data = response.json()

    # Extracting page content
    page = next(iter(data["query"]["pages"].values()))
    return page["extract"] if "extract" in page else None

full_document = get_wikipedia_page("Hayao_Miyazaki")

In [17]:
RAG.index(
    collection=[full_document],
    index_name="Miyazaki-123",
    max_document_length=180,
    split_documents=True,
)

This is a behaviour change from RAGatouille 0.8.0 onwards.
This works fine for most users and smallish datasets, but can be considerably slower than FAISS and could cause worse results in some situations.
If you're confident with FAISS working on your machine, pass use_faiss=True to revert to the FAISS-using behaviour.
--------------------


[Feb 16, 05:27:52] #> Creating directory .ragatouille/colbert/indexes/Miyazaki-123 


[Feb 16, 05:27:53] [0] 		 #> Encoding 122 passages..


  return torch.cuda.amp.autocast() if self.activated else NullContextManager()


[Feb 16, 05:27:54] [0] 		 avg_doclen_est = 131.9262237548828 	 len(local_sample) = 122
[Feb 16, 05:27:54] [0] 		 Creating 1,024 partitions.
[Feb 16, 05:27:54] [0] 		 *Estimated* 16,094 embeddings.
[Feb 16, 05:27:54] [0] 		 #> Saving the indexing plan to .ragatouille/colbert/indexes/Miyazaki-123/plan.json ..


  sub_sample = torch.load(sub_sample_path)


used 20 iterations (0.4038s) to cluster 15291 items into 1024 clusters
[Feb 16, 05:27:54] Loading decompress_residuals_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...


If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].


[Feb 16, 05:29:23] Loading packbits_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...


If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].
  centroids = torch.load(centroids_path, map_location='cpu')
  avg_residual = torch.load(avgresidual_path, map_location='cpu')
  bucket_cutoffs, bucket_weights = torch.load(buckets_path, map_location='cpu')


[0.036, 0.041, 0.039, 0.035, 0.033, 0.036, 0.035, 0.036, 0.033, 0.032, 0.034, 0.037, 0.035, 0.038, 0.035, 0.035, 0.031, 0.033, 0.034, 0.038, 0.037, 0.033, 0.034, 0.039, 0.038, 0.032, 0.039, 0.034, 0.036, 0.037, 0.036, 0.035, 0.037, 0.035, 0.035, 0.033, 0.035, 0.034, 0.035, 0.039, 0.036, 0.037, 0.034, 0.034, 0.036, 0.036, 0.034, 0.038, 0.036, 0.034, 0.034, 0.035, 0.035, 0.037, 0.034, 0.037, 0.036, 0.039, 0.038, 0.033, 0.036, 0.038, 0.035, 0.035, 0.038, 0.035, 0.037, 0.037, 0.033, 0.033, 0.037, 0.034, 0.033, 0.035, 0.037, 0.035, 0.034, 0.04, 0.034, 0.036, 0.038, 0.038, 0.035, 0.037, 0.033, 0.035, 0.038, 0.037, 0.034, 0.043, 0.033, 0.035, 0.035, 0.04, 0.034, 0.036, 0.039, 0.034, 0.037, 0.036, 0.04, 0.042, 0.036, 0.037, 0.038, 0.035, 0.036, 0.033, 0.037, 0.034, 0.036, 0.037, 0.034, 0.031, 0.036, 0.037, 0.037, 0.036, 0.037, 0.039, 0.032, 0.035, 0.033, 0.04, 0.033, 0.038, 0.037, 0.035]


0it [00:00, ?it/s]

[Feb 16, 05:30:41] [0] 		 #> Encoding 122 passages..


  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
1it [00:00,  2.15it/s]
  return torch.load(codes_path, map_location='cpu')
100%|██████████| 1/1 [00:00<00:00, 764.55it/s]

[Feb 16, 05:30:41] #> Optimizing IVF to store map from centroids to list of pids..
[Feb 16, 05:30:41] #> Building the emb2pid mapping..
[Feb 16, 05:30:41] len(emb2pid) = 16095



100%|██████████| 1024/1024 [00:00<00:00, 61384.74it/s]

[Feb 16, 05:30:41] #> Saved optimized IVF to .ragatouille/colbert/indexes/Miyazaki-123/ivf.pid.pt
Done indexing!





'.ragatouille/colbert/indexes/Miyazaki-123'

In [18]:
results = RAG.search(query="What animation studio did Miyazaki found?", k=3)
results

Loading searcher for index Miyazaki-123 for the first time... This may take a few seconds
[Feb 16, 05:30:42] #> Loading codec...
[Feb 16, 05:30:42] #> Loading IVF...
[Feb 16, 05:30:42] #> Loading doclens...


  self.scaler = torch.cuda.amp.GradScaler()
  ivf, ivf_lengths = torch.load(os.path.join(self.index_path, "ivf.pid.pt"), map_location='cpu')
100%|██████████| 1/1 [00:00<00:00, 548.20it/s]

[Feb 16, 05:30:42] #> Loading codes and residuals...



  return torch.load(residuals_path, map_location='cpu')
100%|██████████| 1/1 [00:00<00:00, 292.94it/s]


Searcher loaded!

#> QueryTokenizer.tensorize(batch_text[0], batch_background[0], bsize) ==
#> Input: What animation studio did Miyazaki found?, 		 True, 		 None
#> Output IDs: torch.Size([32]), tensor([  101,     1,  2054,  7284,  2996,  2106,  2771,  3148, 18637,  2179,
         1029,   102,   103,   103,   103,   103,   103,   103,   103,   103,
          103,   103,   103,   103,   103,   103,   103,   103,   103,   103,
          103,   103], device='cuda:0')
#> Output Mask: torch.Size([32]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')



[{'content': '=== Studio Ghibli ===\n\n\n==== Early films (1985–1995) ====\nFollowing the success of Nausicaä of the Valley of the Wind, Miyazaki and Takahata founded the animation production company Studio Ghibli on June 15, 1985, as a subsidiary of Tokuma Shoten, with offices in Kichijōji designed by Miyazaki. The studio\'s name had been registered a year earlier; Miyazaki named it after the nickname of the Caproni Ca.309 aircraft, meaning "a hot wind that blows in the desert" in Italian.',
  'score': 25.71875,
  'rank': 1,
  'document_id': '1fc2c5b3-5062-4c21-b137-cd1584d156a1',
  'passage_id': 42},
 {'content': 'Hayao Miyazaki (宮崎 駿 or 宮﨑 駿, Miyazaki Hayao, [mijaꜜzaki hajao]; born January 5, 1941) is a Japanese animator, filmmaker, and manga artist. He co-founded Studio Ghibli and serves as its honorary chairman. Over the course of his career, Miyazaki has attained international acclaim as a masterful storyteller and creator of Japanese animated feature films, and is widely regarde

In [19]:
retriever = RAG.as_langchain_retriever(k=3)
retriever.invoke("What animation studio did Miyazaki found?")

  return torch.cuda.amp.autocast() if self.activated else NullContextManager()


[Document(metadata={}, page_content='=== Studio Ghibli ===\n\n\n==== Early films (1985–1995) ====\nFollowing the success of Nausicaä of the Valley of the Wind, Miyazaki and Takahata founded the animation production company Studio Ghibli on June 15, 1985, as a subsidiary of Tokuma Shoten, with offices in Kichijōji designed by Miyazaki. The studio\'s name had been registered a year earlier; Miyazaki named it after the nickname of the Caproni Ca.309 aircraft, meaning "a hot wind that blows in the desert" in Italian.'),
 Document(metadata={}, page_content='Hayao Miyazaki (宮崎 駿 or 宮﨑 駿, Miyazaki Hayao, [mijaꜜzaki hajao]; born January 5, 1941) is a Japanese animator, filmmaker, and manga artist. He co-founded Studio Ghibli and serves as its honorary chairman. Over the course of his career, Miyazaki has attained international acclaim as a masterful storyteller and creator of Japanese animated feature films, and is widely regarded as one of the most accomplished filmmakers in the history of an