## Install Required Packages

In [16]:
pip install transformers langgraph langchain chromadb pypdf gradio

Collecting langgraph
  Using cached langgraph-1.0.3-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain_openai
  Using cached langchain_openai-1.0.3-py3-none-any.whl.metadata (2.6 kB)
Collecting chromadb
  Downloading chromadb-1.3.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Collecting pypdf
  Downloading pypdf-6.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting langgraph-checkpoint<4.0.0,>=2.1.0 (from langgraph)
  Downloading langgraph_checkpoint-3.0.1-py3-none-any.whl.metadata (4.7 kB)
Collecting langgraph-prebuilt<1.1.0,>=1.0.2 (from langgraph)
  Downloading langgraph_prebuilt-1.0.4-py3-none-any.whl.metadata (5.2 kB)
Collecting langgraph-sdk<0.3.0,>=0.2.2 (from langgraph)
  Downloading langgraph_sdk-0.2.9-py3-none-any.whl.metadata (1.5 kB)
INFO: pip is looking at multiple versions of langchain-openai to determine which version is compatible with other requirements. This could take a while.
Collecting langchain_openai
  Downloading langchain_openai

## Setup Utilitlies

In [1]:
HF_TOKEN=""

*gemma-3-270m-it for Generating Answers:*

In [2]:
from transformers import pipeline

generator = pipeline("text-generation", model="google/gemma-3-1b-it", token = HF_TOKEN)

Device set to use cpu


*Chromedb for documents retreival*

In [3]:
import chromadb

chroma_client = chromadb.Client()
collection = chroma_client.create_collection(name="RAG")

*Text Spitter for chunking*

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)

# USAGE: texts = text_splitter.split_text(read_pdf("slmatfoa.pdf"))

## RAG Nodes

**PIPELINE STATE**

In [5]:
from typing import TypedDict

class State(TypedDict):
  query: str
  context: list[str]
  answer: str

**RETREIVE NODE**

In [6]:
def retreive(state:State):
  """
  query RAG using a local vector database
  """

  context = collection.query(
    query_texts = state['query'],
    n_results = 5,
    include = ["documents", "distances"]
  )['documents']

  return {"context":context}

**UPSERT NODE**

In [7]:
current_length = 0

def upsert(texts: list[str]):
    collection.upsert(
        ids= [f"id{current_length + i}" for i in range(len(texts))],
        documents = texts
    )

    current_length += len(texts)

**GENERATE NODE**

In [29]:
from langchain_core.prompts import ChatPromptTemplate

answering_prompt = """
  You are an assistant that answers the user's question using ONLY the provided context.
"""

def answer(state:State):
  """
  generate the final answer using the LLM and retrieved documents
  """

  prompt = ChatPromptTemplate([
    ("system", answering_prompt),
    ("user", "Question: {question} \n\n Context: {context}"),
  ])

  query = prompt.invoke({"question":state['query'], "context":state['context']}).messages

  # Convert for gemma model
  queries = []
  for q in query:
      role = 'user' if q.type == 'human' else q.type

      queries.append({
          'role': role,
          'content': q.content
      })
  result = generator(queries)

  return {"answer":result[0]['generated_text'][-1]['content']}

## RAG PipeLine

In [30]:
from langgraph.graph import StateGraph, START, END

rag = (
    StateGraph(State)
    .add_sequence([retreive, answer])
    .add_edge(START, "retreive")
    .compile()
)

In [31]:
rag.invoke({"query":"Who am i ?"})

{'query': 'Who am i ?',
 'context': [[]],
 'answer': 'I am sorry, but the context is empty. I cannot answer your question without the provided text.'}