# RAG Pipeline

## Install Packages


In [1]:
!pip install -q pypdf langchain_community langchain-huggingface faiss-cpu sentence-transformers transformers torch accelerate

## Import Modules

In [12]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFacePipeline
from langchain.chains import ConversationalRetrievalChain
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch 

## Define Embedding Model

In [15]:
model_name_embeddings = "sentence-transformers/all-MiniLM-L6-v2"

device_embeddings = 'mps'

model_kwargs_embeddings = {'device': device_embeddings}
encode_kwargs_embeddings = {'normalize_embeddings': False}

try:
    embeddings_model = HuggingFaceEmbeddings(
        model_name=model_name_embeddings,
        model_kwargs=model_kwargs_embeddings,
        encode_kwargs=encode_kwargs_embeddings
    )
except Exception as e:
    print(f"ERROR initializing embeddings model: {e}")
    embeddings_model = None

##  Define Language Model


In [16]:
model_id_llm = "google/flan-t5-base"

try:
    tokenizer = AutoTokenizer.from_pretrained(model_id_llm)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_id_llm)
    
    if torch.backends.mps.is_available():
        device_llm_pipeline = 'mps'
    elif torch.cuda.is_available():
        device_llm_pipeline = 0
    else:
        device_llm_pipeline = -1

    pipe = pipeline(
        "text2text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=512,
        device=device_llm_pipeline
    )
    
    llm = HuggingFacePipeline(pipeline=pipe)

except Exception as e:
    print(f"ERROR initializing LLM: {e}")
    llm = None

Device set to use mps


## Parse and Load the PDF Document

In [17]:
pdf_file_path = "neuro_ai_research.pdf"
loader = PyPDFLoader(pdf_file_path)

try:
    pages = loader.load_and_split()
    print(f"File '{pdf_file_path}' loaded successfully.")
except FileNotFoundError:
    print(f"PDF file not found at '{pdf_file_path}'")
    pages = None
except Exception as e:
    print(f"Could not load PDF. {e}")
    pages = None

File 'neuro_ai_research.pdf' loaded successfully.


## Create FAISS Vector

In [20]:
faiss_index = None
faiss_index_path = "neuro_ai_faiss_embeddings"

if pages and embeddings_model:
    try:
        faiss_index = FAISS.from_documents(pages, embeddings_model)
        faiss_index.save_local(faiss_index_path)
        print(f"FAISS index saved to '{faiss_index_path}' successfully.")
    except Exception as e:
        print(f"ERROR creating/saving FAISS index: {e}")
else:
    print("PDF pages or embeddings model not available.")

FAISS index saved to 'neuro_ai_faiss_embeddings' successfully.


## Load Saved FAISS

In [23]:
loaded_faiss_index = None

if embeddings_model:
    try:
        if os.path.exists(faiss_index_path):
            loaded_faiss_index = FAISS.load_local(
                faiss_index_path,
                embeddings_model,
                allow_dangerous_deserialization=True
            )
            print(f"FAISS index loaded from '{faiss_index_path}' successfully.")
        elif faiss_index:
            loaded_faiss_index = faiss_index
            print("FAISS index from loaded successfully.")
        else:
            print(f"FAISS index not found at '{faiss_index_path}'.")
    except Exception as e:
        print(f"ERROR loading FAISS index: {e}")
else:
    print("Cannot load FAISS index.")

FAISS index loaded from 'neuro_ai_faiss_embeddings' successfully.


## Create the Conversational Retrieval Chain

In [24]:
qa_chain = None

if loaded_faiss_index and llm:
    retriever = loaded_faiss_index.as_retriever(search_kwargs={'k': 3})
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        return_source_documents=True
    )
    print("Conversational Retrieval Chain created successfully.")
else:
    print("Either the FAISS index or the LLM is not available.")



Conversational Retrieval Chain created successfully.


## Define a Function to Interact with the RAG System

In [25]:
chat_history = []

def ask_rag(query: str):
    if not qa_chain:
        return "Sorry, the RAG chain is not initialized yet. Please make sure everything is set up before asking questions."
    
    if not query:
        return "It looks like you didn’t enter a question. Please provide a query to continue."

    chain_input = {"question": query, "chat_history": chat_history}

    print(f"\n[QUERY] Asking: {query}")
    
    try:
        response = qa_chain.invoke(chain_input)
    except Exception as e:
        return f"[ERROR] Something went wrong while processing your query: {e}"
    
    chat_history.append((query, response['answer']))

    if response.get('source_documents'):
        print("\n[SOURCES] Retrieved the following source documents:")
        for i, doc in enumerate(response['source_documents']):
            content_preview = doc.page_content[:250] + "..." if len(doc.page_content) > 250 else doc.page_content
            source_page = doc.metadata.get('page', 'N/A')
            print(f"- Doc {i+1} (Page {source_page}): {content_preview}\n")
    
    return response['answer'].strip()

## Example Queries

In [27]:
if qa_chain:
    print("\nQ&A \n")

    # Example 1
    question1 = "How are biological neural networks similar to Transformers?"
    answer1 = ask_rag(question1)
    print(f"\n[Answer 1] {answer1}")

    # Example 2
    question2 = "How are biological neurons different from Transformer neurons?"
    answer2 = ask_rag(question2)
    print(f"\n[Answer 2] {answer2}")

else:
    print("The RAG chain is not initialized.")




Q&A 


[QUERY] Asking: How are biological neural networks similar to Transformers?

[SOURCES] Retrieved the following source documents:
- Doc 1 (Page 13): From 1, we see that V (t) is dependent on the conductance,
gL , of the resistor, the capacitance, C, of the capacitor, on the
resting voltage (E L ) and of a current source I (t). If we
multiply 1 by R := 1
C , we obtain dvmem
dt in terms of the
memb...

- Doc 2 (Page 14): J. D. Nuneset al.: Spiking Neural Networks: Survey
the synaptic weights, W. One of the prevailing methods is
the biologically inspired STDP. STDP results from a set of
neurobiological ﬁndings that started in 1949, with Donald
Hebb, who proposed a fun...

- Doc 3 (Page 25): residual learning in SNNs. Much like Spiking ResNet [146],
SEW ResNet substitutes the ReLU activation for a Spiking
Neuron (SN), however, it also ﬁnds an element-wise func-
tion, g, to realize identity mapping. This strategy overcomes
the drawbacks o...


[Answer 1] 

[QUERY] Asking: How are bio