In [1]:
from langchain_community.document_loaders import PyPDFLoader
from IPython.display import display, HTML
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from langchain_ollama import OllamaEmbeddings
from langchain_ollama.llms import OllamaLLM
from langchain import hub
from langgraph.graph import START, StateGraph
from langchain_core.documents import Document
from typing_extensions import List, TypedDict

In [2]:
file_path = "./stanley.cec02.pdf"  # NEAT Algorithm publication
loader = PyPDFLoader(file_path=file_path)
document = loader.load()

In [3]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)
chunks = text_splitter.split_documents(documents=document)

In [4]:
client = QdrantClient("http://localhost:6333")

embeddings = OllamaEmbeddings(model="llama3", base_url="http://localhost:11434")

client.create_collection(
    collection_name="neat-rag",
    vectors_config=VectorParams(size=4096, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="neat-rag",
    embedding=embeddings,
)

In [5]:
vector_store.add_documents(documents=chunks)

['fb2d58fe629742bc9fe66fdcb65e66a6',
 'c9259c37b681437cac68314f411658da',
 'c333cfdd1bf749c180d41637bc2958d5',
 '8a2b5b7912dc48fe9d33589b4f2d54ea',
 '4a9a7a8db49e40cc8b9ba8c3e73c3185',
 '1a0563acc8d74ceb8d90a4fb3a9d4c13',
 '4169a230f74c4761a10532e84d6751ce',
 'd913245a10334ffb80a51002b9dc20ce',
 '2bb439bff7154737b6a0e388869b7b24',
 'be6fffb8abef4fc0b0f5c54c6d2d66b2',
 '9642d2f066d343068d842f738e7dc0c0',
 '12296a1ba420470099d544852e849b1d',
 '5c4d5a6bab2a471a9a1cb88cb94893a8',
 'dedf58411e25453c956fe7f2e4e68098',
 'dadca382794948f0b6d668b5b8a9df72',
 '8676bb9fad1b4fef9bbc48cc78468c5a',
 '0f7883d32ad247c1961077e016870684',
 'f782e45044234a40acefb34fd74edc20',
 '466c7273da2b4e8191370c04556f2f5c',
 'e28f00449f9647b3ad81b47830f97322',
 'bb55a95b486247e481473e9bb2e8c2af',
 'a1b284876aab4ac2b13afedcaf7317c8',
 'c47bce8a59fc4fa0a5ef45415fceb0d4',
 'b3e06bdbaa464809b81bb6680de08653',
 'fcb9f69f5ce14703bb8439591ee46648',
 'eb4233475674466897b8cba03b4a7a63',
 '76e41ab5b1504f1aab6a1b5162d2530a',
 

In [6]:
llm = OllamaLLM(model="llama3", base_url="http://localhost:11434")

In [7]:
prompt = hub.pull("rlm/rag-prompt")
prompt



ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [8]:
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response}

In [9]:
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [10]:
response = graph.invoke({"question": "What is the NEAT algorithm?"})
print(response["answer"])

The NEAT algorithm is a type of evolutionary algorithm that uses both connection weight mutations and structural mutations to evolve neural networks. Structural mutations can add new connections or nodes to the network, allowing it to grow and become more complex. This process helps to incrementally introduce new structure into the network only as necessary, reducing the number of dimensions that need to be searched to optimize the network.


In [11]:
questions = [
    "What is the full form of NEAT?",
    "Who are the authors of the NEAT paper?",
    "What problem does NEAT aim to solve?",
    "What are the three key challenges NEAT addresses in topology and weight evolution?",
    "What are the three main components of NEAT?",
    "How does NEAT use species to protect innovations?",
    "How does speciation in NEAT protect structural innovations during evolution?",
    "What is the significance of starting with minimal structure in NEAT?",
    "How does NEAT start evolving neural networks differently from other methods?",
    "What are the two types of structural mutations in NEAT, and how do they affect the genome?",
    "What does the term 'disjoint genes' mean in the context of NEAT?",
    "Why is the double pole balancing task important in testing NEAT?",
    "How does NEAT compare to other neuroevolution methods like Cellular Encoding and Enforced Subpopulations?",
]

#### Answers
1. **What is the full form of NEAT?**  
   NeuroEvolution of Augmenting Topologies.

2. **Who are the authors of the NEAT paper?**  
   Kenneth O. Stanley and Risto Miikkulainen.

3. **What problem does NEAT aim to solve?**  
   It aims to evolve neural network topologies and weights efficiently for reinforcement learning tasks.

4. **What are the three key challenges NEAT addresses in topology and weight evolution?**  
- Crossover of different topologies.  
- Protection of structural innovation.  
- Minimizing complexity by starting with minimal structure.

5. **What are the three main components of NEAT?**  
- Historical markings.  
- Speciation.  
- Incremental growth from minimal structure.

6. **How does NEAT use species to protect innovations?**  
   By grouping similar genomes into species, NEAT ensures innovations are preserved while they optimize.

7. **How does speciation in NEAT protect structural innovations during evolution?**  
   It allows competition within species rather than across the entire population, giving time for innovations to mature.

8. **What is the significance of starting with minimal structure in NEAT?**  
   It reduces the search space, allowing NEAT to optimize simpler structures first and grow complexity as needed.

9. **How does NEAT start evolving neural networks differently from other methods?**  
   It begins with minimal networks and incrementally adds structure, avoiding unnecessary complexity.

10. **What are the two types of structural mutations in NEAT, and how do they affect the genome?**  
- **Add connection:** Adds a new link between nodes.  
- **Add node:** Splits an existing connection and introduces a new node.

11. **What does the term 'disjoint genes' mean in the context of NEAT?**  
    Genes that exist in one genome but not in another due to differences in evolutionary history.

12. **Why is the double pole balancing task important in testing NEAT?**  
    It is a challenging benchmark task that evaluates the ability to solve complex reinforcement learning problems.

13. **How does NEAT compare to other neuroevolution methods like Cellular Encoding and Enforced Subpopulations?**  
    NEAT outperforms them by evolving topologies more efficiently, requiring significantly fewer evaluations.


In [12]:
for question in questions:
    result = graph.invoke({"question": question})
    display(
        HTML(
            f"<strong>Question:</strong><div style='max-height: 200px; overflow-y: scroll;'>{question}</div><br>"
        )
    )
    context = result["context"]
    display(
        HTML(
            f"<strong>Context:</strong><div style='max-height: 200px; overflow-y: scroll;'>{context}</div><br>"
        )
    )
    answer = result["answer"]
    display(
        HTML(
            f"<strong>Answer:</strong><div style='max-height: 400px; overflow-y: scroll;'>{answer}</div><br>"
        )
    )

In [13]:
for question in questions:
    messages = prompt.invoke({"question": question, "context": ""})
    answer = llm.invoke(messages)
    display(
        HTML(
            f"<strong>Question:</strong><div style='max-height: 200px; overflow-y: scroll;'>{question}</div><br>"
        )
    )
    display(
        HTML(
            f"<strong>Answer:</strong><div style='max-height: 400px; overflow-y: scroll;'>{answer}</div><br>"
        )
    )

## QUESTIONS & ANSWERS


##### How does RAG improve the quality and reliability of LLM responses compared to pure LLM generation?

In the case of the PDF used, both RAG and LLM responses are quite brief. However, RAG is slightly more aligned with the content of the document, while LLM focuses more on general information. This results in better outcomes for some answers and worse for others. Overall, the performance is OK in my opinion.

##### What are the key factors affecting RAG performance (chunk size, embedding quality, prompt design)?

Factors affecting RAG performance include:

Chunk size: Smaller chunks provide more information but can also introduce more noise.
Embeddings model: A better model yields better results.
LLM model: Higher-quality models produce better outcomes.
Questions: Better-formulated questions lead to better answers.

##### How does the choice of vector database and embedding model impact system performance?
A poor embeddings database can result in slower model performance or incorrect answers if the embedding dimensions are too small. An unsuitable embeddings model may lead to chunks lacking essential information and incorrect classification.

##### What are the main challenges in implementing a production-ready RAG system?
Challenges include ensuring database consistency, aligning embeddings with the problem, and managing search times for large databases.

##### How can the system be improved to handle complex queries requiring multiple document lookups?

Implement multi-hop search.
Use document clustering to enhance query-specific search.
Apply reranking techniques to achieve more precise results.