In [2]:
### Biomedical

In [None]:
                ┌────────────────────┐
                │   User Query       │
                └────────┬───────────┘
                         │
                         ▼
                ┌────────────────────┐
                │   Embed Query      │ ◄─ Using dense embedding model (e.g., SBERT, OpenAI)
                └────────┬───────────┘
                         │
                         ▼
                ┌────────────────────┐
                │ Vector Search (kNN)│ ◄─ In vector store (e.g., FAISS, Pinecone)
                └────────┬───────────┘
                         │
                         ▼
                ┌────────────────────┐
                │ Retrieve Top-k Docs│
                └────────┬───────────┘
                         │
                         ▼
                ┌────────────────────┐
                │  Format Context    │
                └────────┬───────────┘
                         │
                         ▼
                ┌────────────────────┐
                │  Prompt LLM (RAG)  │ ◄─ Append query + context
                └────────┬───────────┘
                         │
                         ▼
                ┌────────────────────┐
                │   Generated Answer │
                └────────────────────┘


In [2]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
import os

# 1. Load and split your documents
loader = TextLoader("data/my_knowledge.txt")  # or use DirectoryLoader
documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)

# 2. Embed documents and create vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_db = FAISS.from_documents(chunks, embeddings)

# 3. Define the retriever
retriever = vector_db.as_retriever(search_type="similarity", k=4)

# 4. Setup Ollama LLM
llm = Ollama(model="llama3:instruct", temperature=0)

# 5. Create RAG chain 
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

# 5. Ask a question
query = "How does photosynthesis work?"
result = rag_chain(query)

# 6. Show result
print("Answer:")
print(result["result"])

print("\nSources:")
for doc in result["source_documents"]:
    print(f"- {doc.metadata.get('source', 'Unknown')}")



RuntimeError: Error loading data/my_knowledge.txt