In [2]:
import getpass
import os
from langchain.chat_models import init_chat_model
from langchain_openai import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings


os.environ["LANGSMITH_TRACING"] = "true"
# os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

if not os.environ.get("GITHUB_ACCESS_TOKEN"):
  os.environ["GITHUB_ACCESS_TOKEN"] = getpass.getpass("Enter your github access token: ")

llm = init_chat_model(
  model="gpt-4.1",
  model_provider="openai",
  openai_api_base="https://models.github.ai/inference",
  openai_api_key=os.environ["GITHUB_ACCESS_TOKEN"],
  temperature=0.7
  )

# embeddings 
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

# in memory storage (more like database)
vector_store = InMemoryVectorStore(embeddings) # stores our embeddings in memory: for faster searches and retrievals

# loading documents
loader = WebBaseLoader(
  web_path=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
  bs_kwargs=dict(
    parse_only=bs4.SoupStrainer(class_=("post-content", "post-title", "post-loader"))
  ),
)

docs = loader.load()

print(f"loaded documents with {len(docs[0].page_content)} characters")


Enter your github access token:  ········


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


loaded documents with 42964 characters


loaded documents with 42964 characters


In [3]:
# splitting the loaded document into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap=200,
)
all_splits = text_splitter.split_documents(docs)
print(f"document split into {len(all_splits)} chunks")

document split into 63 chunks


In [4]:
# storing the chunks
# the document chunks have now been stored as searchable vetctors
document_ids = vector_store.add_documents(documents=all_splits)
print(f"stored {len(document_ids)} ducument chunks") 

stored 63 ducument chunks


In [5]:
# retrieval and generation 

#setup the application structure

from langchain import hub
from langchain_core.documents import Document
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

# get premade prompt for RAG
prompt = hub.pull("rlm/rag-prompt")

# define what information flows through your app
class State (TypedDict):
    question: str # user's question
    context: List[Document] # retrieved documents
    answer = str # generted answer



In [6]:
# retrieve relevant documents
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])

    return {"context": retrieved_docs}

In [7]:
def generate(state: State):
    "generate answers with the retrieved documents"
    
    #combine the retrived documents into one
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])

    # create prompt with question and context 
    messages = prompt.invoke({
        "question": state["question"],
        "context": docs_content
    })

    # get response from LLM
    response = llm.invoke(messages)

    return {"answer": response.content}

In [8]:
"""connect everything together"""

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [9]:
# test the rag system

result = graph.invoke({"question": "what is self reflection"})
print("answer: ")
print(result["answer"])
print("\nSOURCE DOCUMENTS: ")
for i, doc in enumerate(result["context"]):
    print(f"{i + 1}. {doc.page_content[:200]}...")

Failed to multipart ingest runs: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/runs/multipart. HTTPError('401 Client Error: Unauthorized for url: https://api.smith.langchain.com/runs/multipart', '{"error":"Unauthorized"}\n')trace=a4cb5a8f-0d6c-474f-bdca-e6d7cac29fce,id=a4cb5a8f-0d6c-474f-bdca-e6d7cac29fce; trace=a4cb5a8f-0d6c-474f-bdca-e6d7cac29fce,id=e7ee3554-5ff7-439b-af64-490a243de8f7; trace=a4cb5a8f-0d6c-474f-bdca-e6d7cac29fce,id=53d5c67f-5119-4725-9e7f-e8fd48c4cf17; trace=a4cb5a8f-0d6c-474f-bdca-e6d7cac29fce,id=4f0fe5b1-a2b8-4df7-96b8-1a3f20e28796; trace=a4cb5a8f-0d6c-474f-bdca-e6d7cac29fce,id=5e5e95db-e05e-4496-a06a-67589a3407d1


answer: 


KeyError: 'answer'

Failed to send compressed multipart ingest: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/runs/multipart. HTTPError('401 Client Error: Unauthorized for url: https://api.smith.langchain.com/runs/multipart', '{"error":"Unauthorized"}\n')trace=a4cb5a8f-0d6c-474f-bdca-e6d7cac29fce,id=5e5e95db-e05e-4496-a06a-67589a3407d1; trace=a4cb5a8f-0d6c-474f-bdca-e6d7cac29fce,id=53d5c67f-5119-4725-9e7f-e8fd48c4cf17; trace=a4cb5a8f-0d6c-474f-bdca-e6d7cac29fce,id=a4cb5a8f-0d6c-474f-bdca-e6d7cac29fce
