In [None]:
! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain
! pip install langchain-google-genai

In [None]:
# Langsmith
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = "LANGCHAIN_API_KEY"

In [None]:
api_key = os.getenv("YOUR_API_KEY")

# Multi Representing Indexing

In [None]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Load the documents
loader = WebBaseLoader("https://medium.com/@aakuskar.980/understanding-lstm-networks-a-simplified-explanation-3659be6b4923")
docs = loader.load()

loader = WebBaseLoader("https://medium.com/@aakuskar.980/ai-agents-the-invisible-workforce-revolutionizing-our-world-a2905367f725")
docs.extend(loader.load())

In [None]:
import uuid

from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

# create chain
chain = (
    {"doc": lambda x: x.page_content}
    | ChatPromptTemplate.from_template("Summarize the following document:\n\n{doc}")
    | ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0,api_key=api_key)
    | StrOutputParser()
)

summaries = chain.batch(docs, {"max_concurrency": 5})

In [None]:
summaries

['This article explains LSTM networks, a type of recurrent neural network (RNN) designed to address the short-term memory limitations of basic RNNs.  Basic RNNs struggle to retain information from earlier stages in a sequence, hindering their ability to understand context. LSTMs solve this by incorporating a "memory" mechanism.\n\nThe article uses the analogy of watching a movie.  A basic RNN is like a viewer who forgets each scene immediately after watching it, while an LSTM is like a viewer taking notes to remember key plot points.  RNNs have a loop that allows information to pass from one step to the next, but they suffer from the vanishing gradient problem, where information fades over time, especially in long sequences.\n\nLSTMs address this with a more complex internal structure.  They use "gates" – forget, input, and output – to control the flow of information. The forget gate decides what information to discard, the input gate decides what new information to store, and the outp

In [None]:
from langchain.storage import InMemoryByteStore
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.vectorstores import Chroma
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# store the vestotes in chromadb
# The vectorstore to use to index the child chunks
vectorstore = Chroma(collection_name="summaries",
                     embedding_function=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

  vectorstore = Chroma(collection_name="summaries",


In [None]:
# Storage layer
store = InMemoryByteStore()
id_key = 'doc_id'

In [None]:
# Retriver
retriver = MultiVectorRetriever(
    vectorstore=vectorstore,
    byte_store=store,
    id_key=id_key,
)
doc_ids = [str(uuid.uuid4()) for _ in docs]

In [None]:
# Docs linkedin to summaries
summary_docs = [
    Document(page_content=s, metadata={id_key: doc_ids[i]})
    for i, s in enumerate(summaries)
]

In [None]:
# Add
retriver.vectorstore.add_documents(summary_docs)
retriver.docstore.mset(list(zip(doc_ids, docs)))

In [None]:
query = "Rnns in LSTMS"
sub_docs = vectorstore.similarity_search(query,k=1)
sub_docs[0]

Document(metadata={'doc_id': 'd8f03f55-789a-4cef-a927-19b13e6a7ba8'}, page_content='This article explains LSTM networks, a type of recurrent neural network (RNN) designed to address the short-term memory limitations of basic RNNs.  Basic RNNs struggle to retain information from earlier stages in a sequence, hindering their ability to understand context. LSTMs solve this by incorporating a "memory" mechanism.\n\nThe article uses the analogy of watching a movie.  A basic RNN is like a viewer who forgets each scene immediately after watching it, while an LSTM is like a viewer taking notes to remember key plot points.  RNNs have a loop that allows information to pass from one step to the next, but they suffer from the vanishing gradient problem, where information fades over time, especially in long sequences.\n\nLSTMs address this with a more complex internal structure.  They use "gates" – forget, input, and output – to control the flow of information. The forget gate decides what informat

In [None]:
retrieved_docs = retriver.get_relevant_documents(query,n_results=1)
retrieved_docs[0].page_content[0:500]

  retrieved_docs = retriver.get_relevant_documents(query,n_results=1)


'Understanding LSTM Networks: A Simplified Explanation | by Aditya Ak | Mar, 2025 | MediumOpen in appSign upSign inWriteSign upSign inHomeLibraryStoriesStatsUnderstanding LSTM Networks: A Simplified ExplanationAditya Ak·Follow12 min read·Mar 3, 2025--ListenShareImagine you’re watching a movie. As the plot unfolds, you don’t forget what happened at the beginning, right? You remember the characters, their relationships, especially the actress, and how all of this helps you predict what will happen '

##  ColBERT
RAGatouille makes it as simple to use ColBERT.
ColBERT generates a contextually influenced vector for each token in the passages.ColBERT similarly generates vectors for each token in the query.Then, the score of each document is the sum of the maximum similarity of each query embedding to any of the document embeddings:

In [None]:
! pip install -U ragatouille

In [None]:
from ragatouille import RAGPretrainedModel
RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")

No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


artifact.metadata:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/405 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

[Mar 22, 11:02:21] Loading segmented_maxsim_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...


  self.scaler = torch.cuda.amp.GradScaler()


In [None]:
import requests

def get_wikipedia(title: str):
   """
    Retrieve the full text content of a Wikipedia page.

    :param title: str - Title of the Wikipedia page.
    :return: str - Full text content of the page as raw string.
    """
    # Wikipedia API endpoint
   URL = "https://en.wikipedia.org/w/api.php"

    # Parameters for the API request
   params = {
        "action": "query",
        "format": "json",
        "titles": title,
        "prop": "extracts",
        "explaintext": True,
    }
     # Custom User-Agent header to comply with Wikipedia's best practices
   headers = {"User-Agent": "RAGatouille_tutorial/0.0.1 (ben@clavie.eu)"}

   response = requests.get(URL, params=params, headers=headers)
   data = response.json()

    # Extracting page content
   page = next(iter(data["query"]["pages"].values()))
   return page["extract"] if "extract" in page else None

full_document = get_wikipedia("Hayao_Miyazaki")

In [None]:
RAG.index(
    collection=[full_document],
    index_name="Miyazaki-123",
    max_document_length=180,
    split_documents=True,
)

This is a behaviour change from RAGatouille 0.8.0 onwards.
This works fine for most users and smallish datasets, but can be considerably slower than FAISS and could cause worse results in some situations.
If you're confident with FAISS working on your machine, pass use_faiss=True to revert to the FAISS-using behaviour.
--------------------


[Mar 22, 11:10:54] #> Creating directory .ragatouille/colbert/indexes/Miyazaki-123 


[Mar 22, 11:10:55] [0] 		 #> Encoding 122 passages..


  self.scaler = torch.cuda.amp.GradScaler()
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 4/4 [01:17<00:00, 19.44s/it]

[Mar 22, 11:12:13] [0] 		 avg_doclen_est = 131.98361206054688 	 len(local_sample) = 122
[Mar 22, 11:12:13] [0] 		 Creating 1,024 partitions.
[Mar 22, 11:12:13] [0] 		 *Estimated* 16,102 embeddings.
[Mar 22, 11:12:13] [0] 		 #> Saving the indexing plan to .ragatouille/colbert/indexes/Miyazaki-123/plan.json ..





used 20 iterations (7.3358s) to cluster 15297 items into 1024 clusters
[0.041, 0.041, 0.041, 0.036, 0.036, 0.039, 0.035, 0.04, 0.035, 0.035, 0.035, 0.038, 0.037, 0.039, 0.038, 0.039, 0.035, 0.034, 0.037, 0.04, 0.037, 0.036, 0.037, 0.039, 0.038, 0.034, 0.04, 0.035, 0.037, 0.037, 0.038, 0.039, 0.041, 0.035, 0.036, 0.035, 0.037, 0.035, 0.035, 0.041, 0.036, 0.039, 0.035, 0.036, 0.037, 0.035, 0.037, 0.04, 0.039, 0.035, 0.035, 0.036, 0.037, 0.036, 0.038, 0.036, 0.039, 0.039, 0.042, 0.035, 0.035, 0.037, 0.036, 0.037, 0.036, 0.037, 0.037, 0.038, 0.033, 0.034, 0.038, 0.036, 0.035, 0.036, 0.038, 0.036, 0.037, 0.039, 0.035, 0.035, 0.038, 0.04, 0.034, 0.04, 0.034, 0.036, 0.039, 0.039, 0.036, 0.045, 0.036, 0.037, 0.036, 0.037, 0.036, 0.036, 0.04, 0.035, 0.038, 0.038, 0.041, 0.041, 0.037, 0.038, 0.039, 0.035, 0.038, 0.033, 0.038, 0.034, 0.037, 0.037, 0.037, 0.035, 0.037, 0.04, 0.039, 0.037, 0.039, 0.038, 0.033, 0.034, 0.035, 0.038, 0.034, 0.038, 0.038, 0.038]


0it [00:00, ?it/s]

[Mar 22, 11:12:20] [0] 		 #> Encoding 122 passages..



  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:20<01:01, 20.36s/it][A
 50%|█████     | 2/4 [00:39<00:39, 19.77s/it][A
 75%|███████▌  | 3/4 [00:59<00:19, 19.98s/it][A
100%|██████████| 4/4 [01:15<00:00, 18.97s/it]
1it [01:16, 76.28s/it]
100%|██████████| 1/1 [00:00<00:00, 787.37it/s]

[Mar 22, 11:13:36] #> Optimizing IVF to store map from centroids to list of pids..
[Mar 22, 11:13:36] #> Building the emb2pid mapping..
[Mar 22, 11:13:36] len(emb2pid) = 16102



100%|██████████| 1024/1024 [00:00<00:00, 44668.98it/s]

[Mar 22, 11:13:36] #> Saved optimized IVF to .ragatouille/colbert/indexes/Miyazaki-123/ivf.pid.pt
Done indexing!





'.ragatouille/colbert/indexes/Miyazaki-123'

In [None]:
results = RAG.search(query="What animation studio did Miyazaki found?", k=3)
results

Loading searcher for index Miyazaki-123 for the first time... This may take a few seconds
[Mar 22, 11:14:22] #> Loading codec...
[Mar 22, 11:14:22] #> Loading IVF...
[Mar 22, 11:14:22] Loading segmented_lookup_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...
[Mar 22, 11:14:55] #> Loading doclens...


100%|██████████| 1/1 [00:00<00:00, 2939.25it/s]

[Mar 22, 11:14:55] #> Loading codes and residuals...



100%|██████████| 1/1 [00:00<00:00, 335.71it/s]

[Mar 22, 11:14:55] Loading filter_pids_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...





[Mar 22, 11:15:26] Loading decompress_residuals_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...
Searcher loaded!

#> QueryTokenizer.tensorize(batch_text[0], batch_background[0], bsize) ==
#> Input: What animation studio did Miyazaki found?, 		 True, 		 None
#> Output IDs: torch.Size([32]), tensor([  101,     1,  2054,  7284,  2996,  2106,  2771,  3148, 18637,  2179,
         1029,   102,   103,   103,   103,   103,   103,   103,   103,   103,
          103,   103,   103,   103,   103,   103,   103,   103,   103,   103,
          103,   103])
#> Output Mask: torch.Size([32]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])



  return torch.cuda.amp.autocast() if self.activated else NullContextManager()


[{'content': '=== Studio Ghibli ===\n\n\n==== Early films (1985–1995) ====\nFollowing the success of Nausicaä of the Valley of the Wind, Miyazaki and Takahata founded the animation production company Studio Ghibli on June 15, 1985, as a subsidiary of Tokuma Shoten, with offices in Kichijōji designed by Miyazaki. The studio\'s name had been registered a year earlier; Miyazaki named it after the nickname of the Caproni Ca.309 aircraft, meaning "a hot wind that blows in the desert" in Italian.',
  'score': 25.754240036010742,
  'rank': 1,
  'document_id': '021a6f07-848d-4f94-a921-7c67fc60f909',
  'passage_id': 42},
 {'content': 'Hayao Miyazaki (宮崎 駿 or 宮﨑 駿, Miyazaki Hayao, [mijaꜜzaki hajao]; born January 5, 1941) is a Japanese animator, filmmaker, and manga artist. He co-founded Studio Ghibli and serves as its honorary chairman. Over the course of his career, Miyazaki has attained international acclaim as a masterful storyteller and creator of Japanese animated feature films, and is wide

In [None]:
retriever = RAG.as_langchain_retriever(k=3)
retriever.invoke("What animation studio did Miyazaki found?")

  return torch.cuda.amp.autocast() if self.activated else NullContextManager()


[Document(metadata={}, page_content='=== Studio Ghibli ===\n\n\n==== Early films (1985–1995) ====\nFollowing the success of Nausicaä of the Valley of the Wind, Miyazaki and Takahata founded the animation production company Studio Ghibli on June 15, 1985, as a subsidiary of Tokuma Shoten, with offices in Kichijōji designed by Miyazaki. The studio\'s name had been registered a year earlier; Miyazaki named it after the nickname of the Caproni Ca.309 aircraft, meaning "a hot wind that blows in the desert" in Italian.'),
 Document(metadata={}, page_content='Hayao Miyazaki (宮崎 駿 or 宮﨑 駿, Miyazaki Hayao, [mijaꜜzaki hajao]; born January 5, 1941) is a Japanese animator, filmmaker, and manga artist. He co-founded Studio Ghibli and serves as its honorary chairman. Over the course of his career, Miyazaki has attained international acclaim as a masterful storyteller and creator of Japanese animated feature films, and is widely regarded as one of the most accomplished filmmakers in the history of an