# Your First Local RAG System: Efficiently Built Without External APIs

Read article on [Medium](https://medium.com/@doyinelugbadebo/your-first-local-rag-system-de8302c7a676)

In [1]:
# Step 1: Import libraries
import fitz  # PyMuPDF
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from langchain_ollama import ChatOllama

# Step 1: Extract Text from PDF
def extract_text_from_pdf(pdf_path):
    """
    Extracts text from a PDF file.
    """
    doc = fitz.open(pdf_path)
    return " ".join([page.get_text("text") for page in doc])

pdf_path = "Robust Weighted LAD Regression.pdf"
pdf_text = extract_text_from_pdf(pdf_path)

  from tqdm.autonotebook import tqdm, trange


In [4]:
# Step 2: Split Text into Chunks
def split_text_into_chunks(text, chunk_size=300):
    """
    Splits the text into smaller chunks for embedding.
    """
    words = text.split()
    return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]

chunks = split_text_into_chunks(pdf_text, chunk_size=300)

In [5]:
# Step 3: Generate Embeddings and Create FAISS Index
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")  # Efficient embedding model
embeddings = embedding_model.encode(chunks)

dimension = embeddings[0].shape[0]  # Vector dimension
index = faiss.IndexFlatL2(dimension)  # L2 distance for similarity

# Normalize embeddings for cosine similarity
embeddings = np.array(embeddings)
faiss.normalize_L2(embeddings)
index.add(embeddings)

# Metadata to map indices to chunks
metadata = {i: chunks[i] for i in range(len(chunks))}

In [6]:
# Step 4: Define a Function to Query the RAG System
def query_rag_system(query, top_k=3):
    """
    Queries the FAISS index with the input query and retrieves the top-k relevant chunks.
    """
    query_embedding = embedding_model.encode([query])[0]
    query_embedding = np.array([query_embedding])
    faiss.normalize_L2(query_embedding)
    
    distances, indices = index.search(query_embedding, top_k)
    results = [metadata[idx] for idx in indices[0]]
    return "\n\n".join(results)

In [7]:
# Step 5: Initialize ChatOllama for Chat-Style Responses
llm = ChatOllama(
    model="llama3.2:latest",
    temperature=0.7
)

In [8]:
# Step 6: RAG-Based Question Answering
def ask_question(query):
    """
    Processes a query using the RAG system and generates a response using ChatOllama.
    """
    # Retrieve relevant context from the FAISS index
    retrieved_context = query_rag_system(query, top_k=3)
    
    # Combine the context and query into a chat prompt
    prompt = f"""Answer the following question based on the document context:
    Context: {retrieved_context}
    Query: {query}
    """
    
    # Get a response from ChatOllama
    response = llm.invoke(prompt)
    return response.content

In [9]:
# Example Usage
query = "What is the document about?"
response = ask_question(query)
print("Response:", response)

Response: The document appears to be discussing the properties and behavior of the Least Absolute Deviation (LAD) regression estimator, specifically its robustness and asymptotic distribution under certain conditions. It discusses various aspects such as the breakdown point, finite sample breakdown point, and heteroscedasticity, and provides proofs and lemmas to support these claims. The document also presents an application to real datasets using the Weighted LAD (WLAD) estimator, which is a variant of the original LAD estimator.
