In [27]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import NLTKTextSplitter
from src.helpers.config import get_settings, Settings
from langchain.embeddings import CacheBackedEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain_community.llms.ollama import Ollama
from langchain_community.embeddings import OllamaEmbeddings
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

import hashlib
from tqdm import tqdm
from sklearn.metrics.pairwise import cosine_similarity

import warnings
warnings.filterwarnings('ignore')

In [28]:
app_settings = get_settings()

In [29]:
def read_pdf(file_path):
    
    loader = PyPDFLoader(file_path)
    docs = loader.load()
    return docs

In [30]:
def chunk(docs, chunk_size=1000, chunk_overlap=50):
    
    # Extract text content from Document objects
    texts = [doc.page_content for doc in docs if hasattr(doc, "page_content")]
    
    # Initialize the text splitter with the specified chunk size and overlap
    text_splitter = NLTKTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len
    )
    
    # Split the text into chunks and collect them in a list
    chunks = []
    for text in texts:
        chunks.extend(text_splitter.create_documents([text]))
    
    # Log the number of generated chunks for debugging
    print(f"Split into {len(chunks)} chunks")
    return chunks

In [31]:
file_path = "assets/files/How Our Brain Works.pdf"
docs = read_pdf(file_path)
chunks = chunk(docs)

Split into 735 chunks


In [32]:
import os
os.makedirs(app_settings.CACHE_DIR, exist_ok=True)
os.makedirs(app_settings.VECTOR_DB_PATH, exist_ok=True)

In [33]:
def encode_key(key: str) -> str:
    """Encode a key to be safe for file system storage."""
    return hashlib.sha256(key.encode()).hexdigest()

def compute_similarity(embedding1, embedding2):
    """Computes cosine similarity between two embeddings."""
    similarity = cosine_similarity([embedding1], [embedding2])
    return similarity[0][0]

In [34]:
def create_qa_chain(llm_model, retriever):
    """Creates a QA chain without chat history using the RAG approach."""
    system_prompt = """You are an intelligent and versatile AI assistant designed to assist with a wide variety of tasks. 
    You excel in providing clear, concise, and accurate information, creative ideas, and thoughtful responses. 
    Use the provided context below to answer the user's query.
    Context:
    {context}"""

    qa_prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        ("human", "{input}")
    ])

    question_answer_chain = create_stuff_documents_chain(llm_model, qa_prompt)
    rag_chain = create_retrieval_chain(retriever, question_answer_chain)
    
    return rag_chain

In [35]:
def create_or_load_vector_store(chunks, embedder, cache_dir=app_settings.VECTOR_DB_PATH):
    """Creates or loads a FAISS vector store with progress tracking."""
    if os.path.exists(cache_dir):
        print("Loading existing vector store...")
        try:
            # Safely load the index with explicit permission
            return FAISS.load_local(
                cache_dir, 
                embedder, 
                allow_dangerous_deserialization=True  # Safe for local development
            )
        except Exception as e:
            print(f"Error loading vector store: {e}")
            print("Creating new vector store instead...")
    
    print("Creating new vector store...")
    # Process chunks in batches
    batch_size = 10
    all_embeddings = []
    all_texts = []
    
    for i in tqdm(range(0, len(chunks), batch_size), desc="Processing documents"):
        batch = chunks[i:i + batch_size]
        texts = [doc.page_content for doc in batch]
        all_texts.extend(texts)
        embeddings = embedder.embed_documents(texts)
        all_embeddings.extend(embeddings)
    
    vector_store = FAISS.from_embeddings(
        text_embeddings=list(zip(all_texts, all_embeddings)),
        embedding=embedder
    )
    
    # Save for future use
    os.makedirs(cache_dir, exist_ok=True)
    vector_store.save_local(cache_dir)
    
    return vector_store

In [36]:
def run_query(query, model_name, chunks, cache_dir=app_settings.VECTOR_DB_PATH):
    """Processes the query using RAG (Retrieval-Augmented Generation) and computes similarities."""
    print("Initializing models...")
    llm_model = Ollama(model=model_name, temperature=app_settings.GENERATION_TEMPERATURE)
    embeddings_model = OllamaEmbeddings(model=model_name)

    # Create cache-backed embeddings
    print("Setting up embedding cache...")
    store = LocalFileStore("./assets/cache/")
    embedder = CacheBackedEmbeddings.from_bytes_store(
        embeddings_model, 
        store, 
        namespace=encode_key(model_name)
    )

    # Create or load vector store
    vector_store = create_or_load_vector_store(chunks, embedder, cache_dir)
    retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})

    # Create QA chain and process query
    print("Processing query...")
    rag_chain = create_qa_chain(llm_model, retriever)
    result = rag_chain.invoke({"input": query})
    
    retrieved_docs = result["context"]
    response = result.get("answer", result)
    
    print("\nRetrieved Context:")
    for doc in retrieved_docs:
        print(doc.page_content)
    print("\nGenerated Response:\n", response)

    # Compute similarities
    print("\nComputing similarities...")
    query_embedding = embedder.embed_query(query)
    response_embedding = embedder.embed_query(response)

    # Process documents with progress bar
    document_similarities = []
    for doc in tqdm(retrieved_docs, desc="Analyzing documents"):
        if hasattr(doc, 'page_content') and doc.page_content.strip():
            doc_embedding = embedder.embed_query(doc.page_content)
            similarity = compute_similarity(query_embedding, doc_embedding)
            document_similarities.append((doc.page_content, similarity))

    # Sort and display results
    document_similarities.sort(key=lambda x: x[1], reverse=True)
    print("\nDocument Similarities:")
    for i, (doc, similarity) in enumerate(document_similarities):
        print(f"Similarity {i+1}: {similarity:.4f}, Snippet: {doc[:200]}...")

    query_response_similarity = compute_similarity(query_embedding, response_embedding)
    print(f"\nSimilarity between query and response: {query_response_similarity:.4f}")

    return response, document_similarities, query_response_similarity

In [37]:
# Example usage
query = "How does the brain process information?"
model_name = app_settings.GENERATION_MODEL_ID
response, document_similarities, query_response_similarity = run_query(
    query, model_name, chunks
)

Initializing models...
Setting up embedding cache...
Loading existing vector store...
Processing query...

Retrieved Context:
How do you control the machine that you see when 
you look into a mirror?

Your brain is a truly amazing organ.

Lets 
begin the process of understanding its structure, organization, and capabilities.
Understanding that standard 
function is a necessary prerequisite to understanding how your 
brain enables human intelligence.
The fundamental neural functionality that allows this system to operate is the cascaded detection of synchronous patterns of input by cortical pyramidal neurons under the timing control of the thalamus.

Generated Response:
 To answer your question, when you look into a mirror, you see a reflection of yourself because light from your face bounces back off the mirror's surface. The mirror doesn't have any "machine" that controls its behavior; it simply reflects light.

However, I can provide some insights on how our brains process informatio

Analyzing documents: 100%|██████████| 3/3 [00:02<00:00,  1.10it/s]


Document Similarities:
Similarity 1: 0.6733, Snippet: Understanding that standard 
function is a necessary prerequisite to understanding how your 
brain enables human intelligence....
Similarity 2: 0.6667, Snippet: How do you control the machine that you see when 
you look into a mirror?

Your brain is a truly amazing organ.

Lets 
begin the process of understanding its structure, organization, and capabilities....
Similarity 3: 0.6419, Snippet: The fundamental neural functionality that allows this system to operate is the cascaded detection of synchronous patterns of input by cortical pyramidal neurons under the timing control of the thalamu...

Similarity between query and response: 0.4711





#### From the output we can see the similarity between the query and the retrieved documents above 66% which is good and the similarity between the query and the response is a little bit good which is above 47%

In [39]:
llm_model = Ollama(model=model_name, temperature=0.5)
embeddings_model = OllamaEmbeddings(model=model_name)

In [42]:
# Generate embeddings for a sample text
sample_text = "test"
embedding_vector = embeddings_model.embed_query(sample_text)

# Print the dimensions of the embedding
print(f"Embedding dimensions: {len(embedding_vector)}")

Embedding dimensions: 3072


In [None]:
# Create cache-backed embeddings
print("Setting up embedding cache...")
store = LocalFileStore("./assets/cache/")
embedder = CacheBackedEmbeddings.from_bytes_store(
    embeddings_model, 
    store, 
    namespace=encode_key(model_name)
)

# Create or load vector store
vector_store = create_or_load_vector_store(chunks, embedder, cache_dir)
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# Create QA chain and process query
print("Processing query...")
rag_chain = create_qa_chain(llm_model, retriever)
result = rag_chain.invoke({"input": query})

retrieved_docs = result["context"]
response = result.get("answer", result)

print("\nRetrieved Context:")
for doc in retrieved_docs:
    print(doc.page_content)
print("\nGenerated Response:\n", response)

# Compute similarities
print("\nComputing similarities...")
query_embedding = embedder.embed_query(query)
response_embedding = embedder.embed_query(response)

# Process documents with progress bar
document_similarities = []
for doc in tqdm(retrieved_docs, desc="Analyzing documents"):
    if hasattr(doc, 'page_content') and doc.page_content.strip():
        doc_embedding = embedder.embed_query(doc.page_content)
        similarity = compute_similarity(query_embedding, doc_embedding)
        document_similarities.append((doc.page_content, similarity))

# Sort and display results
document_similarities.sort(key=lambda x: x[1], reverse=True)
print("\nDocument Similarities:")
for i, (doc, similarity) in enumerate(document_similarities):
    print(f"Similarity {i+1}: {similarity:.4f}, Snippet: {doc[:200]}...")

query_response_similarity = compute_similarity(query_embedding, response_embedding)
print(f"\nSimilarity between query and response: {query_response_similarity:.4f}")