In [None]:
import getpass
import os
from langchain.chat_models import init_chat_model
from langchain_openai import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings


os.environ["LANGSMITH_TRACING"] = "true"
# os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

if not os.environ.get("GITHUB_ACCESS_TOKEN"):
  os.environ["GITHUB_ACESS_TOKEN"] = getpass.getpass("Enter your github access token: ")

llm = init_chat_model(
  model="gpt-4.1",
  model_provider="openai",
  openai_api_base="https://models.github.ai/inference",
  openai_api_key=os.environ["GITHUB_ACCESS_TOKEN"],
  temperature=0.7
  )

# embeddings 
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

# in memory storage (more like database)
vector_store = InMemoryVectorStore(embeddings) # stores our embeddings in memory: for faster searches and retrievals

# loading documents
loader = WebBaseLoader(
  web_path=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
  bs_kwargs=dict(
    parse_only=bs4.SoupStrainer(class_=("post-content", "post-title", "post-loader"))
  ),
)

docs = loader.load()

print(f"loaded documents with {len(docs[0].page_content)} characters")


USER_AGENT environment variable not set, consider setting it to identify your requests.


Enter API key for OpenAI:  ········


loaded documents with 42964 characters


In [2]:
# splitting the loaded document into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap=200,
)
all_splits = text_splitter.split_documents(docs)
print(f"document split into {len(all_splits)} chunks")

document split into 63 chunks


In [None]:
# storing the chunks
# the document chunks have now been stored as searchable vetctors
document_ids = vector_store.add_documents(documents=all_splits)
print(f"stored {len(document_ids)} ducument chunks") 

In [None]:
# retrieval and generation 

#setup the application structure

from langchain import hub
from langchain_core.documents import Document
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

# get premade prompt for RAG
prompt = hub.pull("rlm/rag-prompt")

# define what information flows through your app
class State (TypedDict):
    question: str # user's question
    context: List[Document] # retrieved documents
    answer = str # generted answer

In [None]:
# retrieve relevant documents
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])

    return {"context": retrieved_docs}

In [None]:
def generate(state: State):
    "generate answers with the retrieved documents"
    
    #combine the retrived documents into one
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])

    # create prompt with question and context 
    messages = prompt.invoke({
        "question": state["question"],
        "context": docs_content
    })

    # get response from LLM
    response = llm.invoke(messages)

    return {"answer": response.content}

In [None]:
"""connect everything together"""

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [None]:
# test the rag system

result = graph.invoke({"question": "what is self reflection"})
print("answer: ")
print(result["answer"])
print("\nSOURCE DOCUMENTS: ")
for i, doc in enumerate(result["context"]):
    print(f"{i + 1}. {doc.page_content[:200]}...")