This notebook provides example code for creating a basic RAG system.

In [7]:
# Step 1: Imports
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

from dotenv import load_dotenv
import os

# Step 2: get key from environment variables in .env file
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

# Step 3: Load documents (replace 'data.txt' with your own file)
loader = TextLoader("../data/document.txt")
documents = loader.load()

# Step 4: Split into chunks
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

# Step 5: Embed and index with FAISS
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(docs, embedding_model)

# Step 6: Set up retriever and LLM
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
llm = OpenAI(temperature=0)

# Step 7: Set up RAG pipeline
rag_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)


ImportError: Could not import faiss python package. Please install it with `pip install faiss-gpu` (for CUDA supported GPU) or `pip install faiss-cpu` (depending on Python version).

In [4]:
# Step 8: Ask a question
query = "What does this document contain?"
result = rag_chain(query)

# Step 9: Display result
print("Answer:\n", result["result"])
print("\nSources:")
for doc in result["source_documents"]:
    print("-", doc.metadata.get("source", "Unknown"), "\n", doc.page_content[:200], "\n")

  result = rag_chain(query)


Answer:
 
This document contains information about a main character named Kafka Tamura.

Sources:
- data/document.txt 
 There is a main character. His name is Kafka Tamura. 

