In [1]:
from document_processor import DocumentProcessor
from knowledge_graph import KnowledgeGraph
from vector_store import VectorStore
from entity_extraction import GraphEntityRetriever
from retrieval import Retriever
from rag_chain import RagChain
from dotenv import load_dotenv
import os
import warnings
warnings.filterwarnings("ignore")
load_dotenv()

required_vars = ["NEO4J_URI", "NEO4J_USERNAME", "NEO4J_PASSWORD", "OPENROUTER_API_KEY"]
missing_vars = [var for var in required_vars if not os.getenv(var)]

if missing_vars:
    print(f"Warning: The following environment variables are not set: {', '.join(missing_vars)}")
    print("Please set them in a .env file or directly in the environment.")
else:
    print("All required environment variables are set.")

All required environment variables are set.


In [2]:
# Initialize the System

document_processor = DocumentProcessor()
knowledge_graph = KnowledgeGraph()
vector_store = VectorStore(knowledge_graph)
entity_extractor = GraphEntityRetriever()
retriever = Retriever(knowledge_graph, vector_store)
rag_chain = RagChain(retriever)



# Load the documents

In [3]:
documents = document_processor.process_documents()
print(f"Loaded {len(documents)} document chunks from the knowledge base")
print("\nFirst document chunk:")
print(documents[0].page_content[:500] + "...")

Loaded 78 document chunks from the knowledge base

First document chunk:
**Chris Olande: A Deep Professional Profile for RAG Systems**

**Overview**

Chris Olande is an ambitious and intellectually gifted data science student at Kenyatta University in Nairobi, Kenya. With a rigorous academic foundation in statistics and programming, Chris is cultivating a multidisciplinary approach that intersects applied mathematics, artificial intelligence, and human-centered design. He brings together analytical precision, computational depth, and social responsibility in his work...


# Build the knowledge graph

In [4]:
from IPython.display import clear_output
# Clear the existing database
knowledge_graph.clear_database()

# Create a knowledge graph from the documents
knowledge_graph.create_graph_from_documents(documents)
clear_output()
print("Knowledge graph created successfully!")

Knowledge graph created successfully!


In [5]:
knowledge_graph.visualize_graph()

GraphWidget(layout=Layout(height='800px', width='100%'))

# Test entity Extraction

In [6]:
question = "Who is Chris Olande?"
entity = entity_extractor.extract_entities(question)
print(entity)

Extracting entities for question: Who is Chris Olande?
Extracted 1 entities: ['Chris Olande']
['Chris Olande']


# Test structured entity extraction

In [7]:
structured_results = entity_extractor.structured_retriever(question)

print(f"Question: {question}")
print("\nStructured retrieval results:")
print(structured_results)

Processing structured retrieval for question: Who is Chris Olande?
Extracting entities for question: Who is Chris Olande?
Extracted 1 entities: ['Chris Olande']
Retrieved 39 relationships for entity 'Chris Olande'
Found 39 relationships for entity 'Chris Olande'
  - Chris Olande - WORKS_ON -> Data Science
  - Chris Olande - USES -> OpenCV
  - Chris Olande - USES -> FAISS
  - Chris Olande - USES -> vision transformers
  - Chris Olande - USES -> Pinecone
  ... and 34 more
Question: Who is Chris Olande?

Structured retrieval results:
Chris Olande - WORKS_ON -> Data Science
Chris Olande - USES -> OpenCV
Chris Olande - USES -> FAISS
Chris Olande - USES -> vision transformers
Chris Olande - USES -> Pinecone
Chris Olande - USES -> Streamlit
Chris Olande - USES -> Docker
Chris Olande - USES -> Python
Chris Olande - USES -> Flask
Chris Olande - USES -> Fastai
Chris Olande - USES -> Gradio
Chris Olande - USES -> torchvision transforms
Chris Olande - DESIGNS -> intelligent job search assistant
Ch

# Create the hybrid vector index

In [8]:
vector_store.create_hybrid_index()

print("Hybrid vector index created successfully!")

Hybrid vector index created successfully!


# Create vector index

In [9]:
# create vector index from documents
vector_store.create_vector_index(documents)

print("Vector index created successfully!")

Vector index created successfully!


## Test vector Retrieval

In [10]:
# Test vector retrieval
vector_results = retriever.vector_retrieval(question, k=2)

print(f"Question: {question}")
print("\nVector retrieval results:")
for i, doc in enumerate(vector_results):
    print(f"\nDocument {i+1}:")
    print(doc.page_content[:300] + "...")

Question: Who is Chris Olande?

Vector retrieval results:

Document 1:
**Chris Olande: A Deep Professional Profile for RAG Systems**

**Overview**

Chris Olande is an ambitious and intellectually gifted data science student at Kenyatta University in Nairobi, Kenya. With a rigorous academic foundation in statistics and programming, Chris is cultivating a multidisciplina...

Document 2:
 tutoring peers in statistics and machine learning, explaining complex concepts like backpropagation, gradient descent, and dimensionality reduction using intuitive analogies and real-life examples.

**Personality, Work Ethic, and Personal Interests**

Chris is disciplined, curious, and mathematical...

Document 3:
.
Have we anything new to offer upon the subject?  Nothing.  We have held the
subject up in every light of which it is capable; but it has been all in vain.
Shall we resort to entreaty and humble supplication?  What terms shall we
find which have not been already exhausted?  Let us not, I besee

# Test hybrid retrieval


In [11]:
# Test hybrid retrieval
hybrid_results = retriever.hybrid_retrieval(question)

print(f"Question: {question}")
print("\nHybrid retrieval results:")
print(hybrid_results)

Question: Who is Chris Olande?

Hybrid retrieval results:
Structured data:
            No relevant information found in the knowledge graph.

            Unstructured data:
            **Chris Olande: A Deep Professional Profile for RAG Systems**

**Overview**

Chris Olande is an ambitious and intellectually gifted data science student at Kenyatta University in Nairobi, Kenya. With a rigorous academic foundation in statistics and programming, Chris is cultivating a multidisciplinary approach that intersects applied mathematics, artificial intelligence, and human-centered design. He brings together analytical precision, computational depth, and social responsibility in his work. Chris is not only building machine learning systems—he is actively shaping the future of AI literacy and ethical, context-aware data applications in Kenya and beyond.

**Academic Background and Technical Expertise**

Chris’s academic journey is rooted in mathematical reasoning and statistical theory, reinforced 

# Test RAG Chain

In [12]:
# Test the RAG chain
answer = rag_chain.invoke({"question": question})

print(f"Question: {question}")
print(f"\nAnswer: {answer}")

Question: Who is Chris Olande?

Answer:  Chris Olande is an ambitious and intellectually gifted data science student at Kenyatta University in Nairobi, Kenya. He has a rigorous academic foundation in statistics and programming, and is cultivating a multidisciplinary approach that intersects applied mathematics, artificial intelligence, and human-centered design. Chris is known for building machine learning systems and is actively shaping the future of AI literacy and ethical, context-aware data applications in Kenya and beyond. He has a wide range of technical expertise in various data science technologies, including statistical foundations, programming and tooling, data science libraries, deep learning and NLP, ML experiment tracking, deployment tools, and more. He is also involved in educational leadership and STEM advocacy, and has a personal vision to become one of Africa's leading data scientists and contribute to responsible AI systems that serve the public good.


## Test using a follow up question

In [13]:
# Initialize chat history
chat_history = [(question, answer)]

# Ask a follow-up question
follow_up_question = "What does he even do at Kenyatta University?"

# Get answer to follow-up question
follow_up_answer = rag_chain.invoke({
    "question": follow_up_question,
    "chat_history": chat_history
})

print(f"Follow-up question: {follow_up_question}")
print(f"\nAnswer: {follow_up_answer}")

Follow-up question: What does he even do at Kenyatta University?

Answer:  Chris Olande is a data science student at Kenyatta University in Nairobi, Kenya. He tutors peers in statistics and machine learning, explaining complex concepts using intuitive analogies and real-life examples.


In [14]:
quiz = "How and what tools does he use and what jobs is he suited for in the future?"

# Get answer to follow-up question
follow_up_answer = rag_chain.invoke({
    "question": quiz,
    "chat_history": chat_history
})

print(f"Follow-up question: {quiz}")
print(f"\nAnswer: {follow_up_answer}")

Follow-up question: How and what tools does he use and what jobs is he suited for in the future?

Answer:  Chris Olande uses various tools in his work as a data scientist, including Python, R, Git/GitHub, shell scripting, NumPy, pandas, matplotlib, seaborn, scikit-learn, SciPy, PyTorch, Hugging Face Transformers, Fastai, OpenCV, LangChain, SentenceTransformers, Weights & Biases, TensorBoard, Streamlit, Gradio, Flask, Docker, FAISS, and Pinecone. He is suited for jobs in intelligent agents, AI in education, research labs & AI startups, and open-source contributions, particularly in areas like language model interpretability, ethical AI, real-time AI tooling, and Retrieval-Augmented Generation (RAG) systems.


# Test questions based on romeo and juliet knowledge base

In [15]:
question = "give me a brief story about the romeo and juliet book"
# Test the RAG chain
answer = rag_chain.invoke({"question": question})

print(f"Question: {question}")
print(f"\nAnswer: {answer}")

Question: give me a brief story about the romeo and juliet book

Answer:  "The Tragedy of Romeo and Juliet" is a play written by William Shakespeare. It tells the story of two young lovers, Romeo Montague and Juliet Capulet, who belong to two feuding families in the city of Verona. Despite their families' longstanding hatred for each other, Romeo and Juliet fall in love at first sight at a masquerade ball. They secretly marry with the help of Friar Lawrence, but their happiness is short-lived as their families' feud leads to a series of tragic events, including the deaths of both Romeo and Juliet. The play explores themes of love, family, and the consequences of hatred, and is considered one of Shakespeare's most famous works.


In [16]:
question = "How did Romeo meet Juliet?"

# Get answer to follow-up question
follow_up_answer = rag_chain.invoke({
    "question": question,
    "chat_history": chat_history
})

print(f"Follow-up question: {question}")
print(f"\nAnswer: {follow_up_answer}")

Follow-up question: How did Romeo meet Juliet?

Answer:  In the context provided, Romeo and Juliet first meet at a party hosted by Capulet. They meet in secret because they are from rival families and are not supposed to associate with each other. Juliet is introduced to Romeo by her Nurse, and they quickly fall in love. Later, they meet again in Friar Lawrence's garden, where they consummate their relationship and plan to get married in secret.


In [17]:
quiz = "How did Romeo meet his fate? go on to tell me how Juliet met her too."

# Get answer to follow-up question
follow_up_answer = rag_chain.invoke({
    "question": quiz,
    "chat_history": chat_history
})

print(f"Follow-up question: {quiz}")
print(f"\nAnswer: {follow_up_answer}")

Follow-up question: How did Romeo meet his fate? go on to tell me how Juliet met her too.

Answer:  In the play "Romeo and Juliet" by William Shakespeare, Romeo is banished from Verona after killing Tybalt in a duel. Later, Juliet is given a potion by Friar Lawrence that is meant to make her appear dead, but she is awakened before Romeo can arrive to take her away. When he finds her in the tomb, he is devastated and eventually takes his own life. As for Juliet, she wakes up and finds herself in the tomb, not knowing that Romeo is dead. She is later found by Friar Laurence and the Nurse, and they try to find a way to reunite her with Romeo. However, the play ends tragically with both Juliet and Romeo dying.


# Test for questions about the declaration of independence

In [18]:
question = "what grievances were made against the King of England?"
# Test the RAG chain
answer = rag_chain.invoke({"question": question})

print(f"Question: {question}")
print(f"\nAnswer: {answer}")

Question: what grievances were made against the King of England?

Answer:  The grievances made against the King of England, as stated in the Declaration of Independence, include:

1. Refusing to give assent to laws that were wholesome and necessary for the public good.
2. Forbidding governors to pass laws of immediate and pressing importance without suspending them until his assent was obtained, and neglecting to attend to them when suspended.
3. Refusing to pass laws for the accommodation of large districts of people unless they would relinquish the right of representation in the legislature.
4. Calling legislative bodies to places unusual, uncomfortable, and distant from the depository of their public records, for the sole purpose of fatiguing them into compliance with his measures.
5. Dissolving representative houses repeatedly for opposing his invasions on the rights of the people.
6. Refusing for a long time to cause others to be elected after dissolutions, leaving the legislative

In [19]:
quiz = "How did the declaration change the status of the American colonies?"

# Get answer to follow-up question
follow_up_answer = rag_chain.invoke({
    "question": quiz,
    "chat_history": chat_history
})

print(f"Follow-up question: {quiz}")
print(f"\nAnswer: {follow_up_answer}")

Follow-up question: How did the declaration change the status of the American colonies?

Answer:  The Declaration of Independence declared the American colonies to be free and independent states, absolved from all allegiance to the British Crown, and dissolved all political connection between them and the State of Great Britain. This meant that the American colonies were no longer under British rule and were now independent nations.


In [20]:
quiz = "What enlightenment ideas are embedded in the declaration of independence?"

# Get answer to follow-up question
follow_up_answer = rag_chain.invoke({
    "question": quiz,
    "chat_history": chat_history
})

print(f"Follow-up question: {quiz}")
print(f"\nAnswer: {follow_up_answer}")

Follow-up question: What enlightenment ideas are embedded in the declaration of independence?

Answer:  The Declaration of Independence, written by Thomas Jefferson, is a foundational document of the United States that embodies several Enlightenment ideas. Here are some key Enlightenment ideas that can be found in the Declaration:

1. Natural Rights: The Declaration asserts that all men are created equal and are endowed by their Creator with certain unalienable Rights, such as Life, Liberty, and the pursuit of Happiness. This idea is rooted in the Enlightenment belief in natural rights, which are inherent to all individuals and cannot be taken away by any government.

2. Consent of the Governed: The Declaration states that governments are instituted among men to secure these rights, deriving their just powers from the consent of the governed. This idea reflects the Enlightenment belief in the social contract, whereby individuals agree to be governed in exchange for protection of their 

In [23]:
quiz = "What events led to the drafting of the declaration of independence in 1776?"

# Get answer to follow-up question
follow_up_answer = rag_chain.invoke({
    "question": quiz,
    "chat_history": chat_history
})

print(f"Follow-up question: {quiz}")
print(f"\nAnswer: {follow_up_answer}")

Follow-up question: What events led to the drafting of the declaration of independence in 1776?

Answer:  The events that led to the drafting of the Declaration of Independence in 1776 were a long train of abuses and usurpations by the King of Great Britain, which the colonists believed were designed to establish an absolute tyranny over them. These abuses included refusing assent to laws, obstructing the administration of justice, quartering troops among them, cutting off trade, imposing taxes without consent, depriving them of trial by jury, and waging war against them, among other grievances. The colonists felt that these actions violated their rights to life, liberty, and the pursuit of happiness, and that it was their right and duty to throw off such a government and provide new guards for their future security.
