In [None]:

!pip install -U langchain-community
# Install the chromadb library
!pip install chromadb

In [13]:

# Install faiss-cpu for FAISS vector store
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m41.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0


In [15]:
# Install the gpt4all library
!pip install gpt4all

Collecting gpt4all
  Downloading gpt4all-2.8.2-py3-none-manylinux1_x86_64.whl.metadata (4.8 kB)
Downloading gpt4all-2.8.2-py3-none-manylinux1_x86_64.whl (121.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.6/121.6 MB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gpt4all
Successfully installed gpt4all-2.8.2


In [23]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.docstore.document import Document
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.llms import HuggingFacePipeline

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# 1. Sample documents
docs = [
    Document(page_content="LangChain has many modules such as agents and chains."),
    Document(page_content="The embedding size for OpenAI models is 1536."),
]

# 2. Use a small free HuggingFace embedding model
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# 3. Create Chroma DB
vectordb = Chroma.from_documents(documents=docs, embedding=embedding)

# 4. Create base retriever
base_retriever = vectordb.as_retriever()

# 5. Load a small free LLM (like distilgpt2)
model_name = "distilgpt2"  # Small model; you can use larger ones like "tiiuae/falcon-rw-1b" if RAM allows
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# 6. Create the HuggingFace pipeline
hf_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100)
llm = HuggingFacePipeline(pipeline=hf_pipeline)

# 7. Create LLMChainExtractor compressor
compressor = LLMChainExtractor.from_llm(llm)

# 8. Wrap retriever with ContextualCompressionRetriever
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=base_retriever
)

# 9. Perform compressed retrieval
results = compression_retriever.get_relevant_documents("What is LangChain?")

# 10. Output results
print("\n📌 ContextualCompressionRetriever Output:")
for i, doc in enumerate(results):
    print(f"{i+1}. {doc.page_content}")


Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



📌 ContextualCompressionRetriever Output:
[Document(metadata={}, page_content="Given the following question and context, extract any part of the context *AS IS* that is relevant to answer the question. If none of the context is relevant return NO_OUTPUT. \n\nRemember, *DO NOT* edit the extracted parts of the context.\n\n> Question: What is LangChain?\n> Context:\n>>>\nLangChain has many modules such as agents and chains.\n>>>\nExtracted relevant parts:\n>>>\n<p > In the above example, for example, let's try to extract the contents of each of the modules: >>>>\n<p > An initial value of the given module was defined by the module that returned the module named `_' in the module name. So if we're comparing these modules to a function or module that returns the value to the module, we see the value is the same as the returned module, since you can see that these modules were defined before"), Document(metadata={}, page_content="Given the following question and context, extract any part of t

In [11]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.docstore.document import Document

# Sample documents
docs = [
    Document(page_content="LangChain has many modules such as agents and chains."),
    Document(page_content="The embedding size for OpenAI models is 1536."),
]

# HuggingFace embedding (Free)
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Create Chroma DB
vectordb = Chroma.from_documents(documents=docs, embedding=embedding)

# Create retriever from Chroma
retriever = vectordb.as_retriever()

# Perform query
results = retriever.get_relevant_documents("What is LangChain?")

# Output results
print("\n📌 Chroma Retriever Output:")
for i, doc in enumerate(results):
    print(f"{i+1}. {doc.page_content}")



📌 Chroma Retriever Output:
1. LangChain has many modules such as agents and chains.
2. Chroma is a local vector database.
3. Bananas are yellow.
4. Apple is a fruit.


  results = retriever.get_relevant_documents("What is LangChain?")


In [9]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

# Use SentenceTransformers model (free, no API key needed)
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Your text data
texts = ["Apple is a fruit.", "Bananas are yellow.", "Chroma is a local vector database."]
db = Chroma.from_texts(texts, embedding)

# Perform similarity search
query = "What color are bananas?"
results = db.similarity_search(query)

# Output
print("\n📌 Chroma Output:")
for i, doc in enumerate(results):
    print(f"{i+1}. {doc.page_content}")


  embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


📌 Chroma Output:
1. Bananas are yellow.
2. Apple is a fruit.
3. Chroma is a local vector database.


In [24]:
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.docstore.document import Document
from langchain.llms import HuggingFacePipeline

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Sample documents
docs = [
    Document(page_content="LangChain supports agents, chains, and tools."),
    Document(page_content="You can use OpenAI or HuggingFace models with LangChain."),
    Document(page_content="Vector stores like FAISS and Pinecone are used in RAG."),
]

# Use HuggingFace embeddings (free)
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Create FAISS vector store
vectordb = FAISS.from_documents(docs, embedding)

# Load small HuggingFace causal LLM for query expansion
model_name = "distilgpt2"  # lightweight model, swap for larger if you want
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
hf_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=50)
llm = HuggingFacePipeline(pipeline=hf_pipeline)

# Create MultiQueryRetriever with HuggingFace LLM (query expansion)
retriever = MultiQueryRetriever.from_llm(
    retriever=vectordb.as_retriever(),
    llm=llm
)

# Query
results = retriever.get_relevant_documents("How does LangChain use models?")

print("\n📌 MultiQueryRetriever Output:")
for i, doc in enumerate(results):
    print(f"{i+1}. {doc.page_content}")


Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



📌 MultiQueryRetriever Output:
1. LangChain supports agents, chains, and tools.
2. You can use OpenAI or HuggingFace models with LangChain.
3. Vector stores like FAISS and Pinecone are used in RAG.
