This notebook provides example code for creating a basic RAG system.
May 2025

In [None]:
# !uv add langchain openai faiss-cpu sentence-transformers

[2K[2mResolved [1m104 packages[0m [2min 460ms[0m[0m                                       [0m
[2K[2mPrepared [1m45 packages[0m [2min 1.80s[0m[0m                                            
[2mUninstalled [1m1 package[0m [2min 0.91ms[0m[0m
[2K[2mInstalled [1m70 packages[0m [2min 263ms[0m[0m                              [0m
 [32m+[39m [1mannotated-types[0m[2m==0.7.0[0m
 [32m+[39m [1manyio[0m[2m==4.9.0[0m
 [32m+[39m [1mcertifi[0m[2m==2025.4.26[0m
 [32m+[39m [1mcharset-normalizer[0m[2m==3.4.2[0m
 [32m+[39m [1mdistro[0m[2m==1.9.0[0m
 [32m+[39m [1mfaiss-cpu[0m[2m==1.11.0[0m
 [32m+[39m [1mfilelock[0m[2m==3.18.0[0m
 [32m+[39m [1mfsspec[0m[2m==2025.3.2[0m
 [32m+[39m [1mgreenlet[0m[2m==3.2.2[0m
 [32m+[39m [1mh11[0m[2m==0.16.0[0m
 [32m+[39m [1mhttpcore[0m[2m==1.0.9[0m
 [32m+[39m [1mhttpx[0m[2m==0.28.1[0m
 [32m+[39m [1mhuggingface-hub[0m[2m==0.31.4[0m
 [32m+[39m [1midna[0m[2m==3.10[0m
 

In [None]:
# Step 1: Imports
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

import os

# Step 2: Set OpenAI API key (replace with your key or use environment variable)
os.environ["OPENAI_API_KEY"] = "your-openai-api-key"

# Step 3: Load documents (replace 'data.txt' with your own file)
loader = TextLoader("data/data.txt")
documents = loader.load()

# Step 4: Split into chunks
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

# Step 5: Embed and index with FAISS
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(docs, embedding_model)

# Step 6: Set up retriever and LLM
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
llm = OpenAI(temperature=0)

# Step 7: Set up RAG pipeline
rag_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)


In [None]:
# Step 8: Ask a question
query = "What is the main idea of the document?"
result = rag_chain(query)

# Step 9: Display result
print("Answer:\n", result["result"])
print("\nSources:")
for doc in result["source_documents"]:
    print("-", doc.metadata.get("source", "Unknown"), "\n", doc.page_content[:200], "\n")