In [None]:
pip install langchain langchain_community langchain_groq sentence_transformers chromadb faiss-cpu

Collecting langchain_community
  Downloading langchain_community-0.3.26-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain_groq
  Downloading langchain_groq-0.3.4-py3-none-any.whl.metadata (2.6 kB)
Collecting chromadb
  Downloading chromadb-1.0.13-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting groq<1,>=0.28.0 (from langchain_groq)
  Downloading groq-0.29.0-py3-none-any.whl.metadata (16 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  D

In [None]:
pip install posthog



In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings

In [None]:
loaders = [
    TextLoader("/content/paul_graham_essay.txt"),
    TextLoader("/content/state_of_the_union.txt"),
]

In [None]:
docs = []
for loader in loaders:
  docs.extend(loader.load())

len(docs)

2

In [None]:
for i, doc in enumerate(docs):
    print(f"Document {i+1} - Characters: {len(doc.page_content)}")

Document 1 - Characters: 75012
Document 2 - Characters: 38540


In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)

In [None]:
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
from langchain.storage import InMemoryStore
from langchain.vectorstores import Chroma

In [None]:
import os
os.environ["CHROMA_TELEMETRY"] = "False"

import logging
logging.getLogger("chromadb").setLevel(logging.CRITICAL)

In [None]:
vector_store = Chroma(
    collection_name="full_douments", embedding_function= embedding
)

In [None]:
store = InMemoryStore()

In [None]:
from langchain.retrievers import ParentDocumentRetriever
retriever = ParentDocumentRetriever(
    vectorstore=vector_store,
    docstore=store,
    child_splitter= splitter
)

In [None]:
retriever.add_documents(docs, ids=None)

In [None]:
len(list(store.yield_keys()))

2

In [None]:
response = retriever.invoke("What did the president say about Ketanji Brown Jackson?")
len(response[0].page_content)

38540

In [None]:
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=2000)
child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)

In [None]:
vector_store = Chroma(
    collection_name="parent_documents", embedding_function= embedding
)
store = InMemoryStore()

In [None]:
retriever = ParentDocumentRetriever(
    vectorstore=vector_store,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)

In [None]:
retriever.add_documents(docs)

In [None]:
len(list(store.yield_keys()))

66

In [None]:
response = retriever.invoke("What did the president say about Ketanji Brown Jackson?")
response[0].page_content

'In state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \n\nWe cannot let this happen. \n\nTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \n\nA former top litigator in private practice. A former 

In [None]:
response[1].page_content

"At the time I didn't understand what he meant, but gradually it dawned on me that he was saying I should quit. This seemed strange advice, because YC was doing great. But if there was one thing rarer than Rtm offering advice, it was Rtm being wrong. So this set me thinking. It was true that on my current trajectory, YC would be the last thing I did, because it was only taking up more of my attention. It had already eaten Arc, and was in the process of eating essays too. Either YC was my life's work or I'd have to leave eventually. And it wasn't, so I would.\n\nIn the summer of 2012 my mother had a stroke, and the cause turned out to be a blood clot caused by colon cancer. The stroke destroyed her balance, and she was put in a nursing home, but she really wanted to get out of it and back to her house, and my sister and I were determined to help her do it. I used to fly up to Oregon to visit her regularly, and I had a lot of time to think on those flights. On one of them I realized I wa

In [None]:
from langchain_groq import ChatGroq

In [None]:
from google.colab import userdata
import os
GROQ_API_KEY= userdata.get('GROQ_API_KEY')
os.environ['GROQ_API_KEY'] = GROQ_API_KEY

In [None]:
llm = ChatGroq(model = "gemma2-9b-it")

In [None]:
from langchain.chains import RetrievalQA
chain = RetrievalQA.from_chain_type(
    llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
)

In [None]:
query = "What did the president say about Ketanji Brown Jackson?"
result = chain({"query": query})
result["result"]

'The president said Ketanji Brown Jackson is one of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \n\nHe also said she is a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He noted that she is a consensus builder and has received broad range of support since her nomination, from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.  \n'