In [1]:
from document_processor import DocumentProcessor
from knowledge_graph import KnowledgeGraph
from vector_store import VectorStore
from entity_extraction import GraphEntityRetriever
from retrieval import Retriever
from rag_chain import RagChain
from dotenv import load_dotenv
import os
import warnings
warnings.filterwarnings("ignore")
load_dotenv()

required_vars = ["NEO4J_URI", "NEO4J_USERNAME", "NEO4J_PASSWORD", "OPENROUTER_API_KEY"]
missing_vars = [var for var in required_vars if not os.getenv(var)]

if missing_vars:
    print(f"Warning: The following environment variables are not set: {', '.join(missing_vars)}")
    print("Please set them in a .env file or directly in the environment.")
else:
    print("All required environment variables are set.")

All required environment variables are set.


In [2]:
# Initialize the System

document_processor = DocumentProcessor()
knowledge_graph = KnowledgeGraph()
vector_store = VectorStore(knowledge_graph)
entity_extractor = GraphEntityRetriever()
retriever = Retriever(knowledge_graph, vector_store)
rag_chain = RagChain(retriever)



# Load the documents

In [3]:
documents = document_processor.process_documents()
print(f"Loaded {len(documents)} document chunks from the knowledge base")
print("\nFirst document chunk:")
print(documents[0].page_content[:500] + "...")

Loaded 2 document chunks from the knowledge base

First document chunk:
Chris Olande: A Professional Profile

Chris Olande is a dynamic and intellectually curious student of Statistics and Programming at Kenyatta University, with a growing portfolio of sophisticated projects that blend statistical rigor with cutting-edge machine learning techniques. His academic and practical pursuits reflect not only a mastery of foundational principles in data science and programming, but also a passion for innovative applications in real-world contexts, including education and ar...


# Build the knowledge graph

In [4]:
# Clear the existing database
knowledge_graph.clear_database()

# Create a knowledge graph from the documents
knowledge_graph.create_graph_from_documents(documents)

print("Knowledge graph created successfully!")

Extracting entities and relationships from documents...


Processing documents:   0%|          | 0/2 [00:00<?, ?it/s]

Extracted 42 unique entities and 36 relationships
Creating entities...


Creating entities in batches:   0%|          | 0/1 [00:00<?, ?it/s]

Creating relationships...


Creating STUDIES_AT relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating SPECIALIZES_IN relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating PROFICIENT_IN relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating WORKS_WITH relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating WORKED_ON relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating LED relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating ORGANIZES relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating EXPLORES relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating IMPLEMENTS relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating INTEGRATES relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating USES relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating LOCATED_AT relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating DEMONSTRATES_LEADERSHIP_IN relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating DEMONSTRATES_COMMITMENT_TO relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating CURRENT_FOCUS_ON relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating FUTURE_IN relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating THRIVES_IN_ENVIRONMENTS_SUCH_AS relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating COULD_WORK_IN relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Creating HAS_BLEND_OF relationships:   0%|          | 0/1 [00:00<?, ?it/s]

Graph creation completed!
Knowledge graph created successfully!


In [5]:
knowledge_graph.visualize_graph()

GraphWidget(layout=Layout(height='800px', width='100%'))

# Test entity Extraction

In [14]:
question = "Who is Chris Olande?"
entity = entity_extractor.extract_entities(question)
print(entity)

Extracting entities for question: Who is Chris Olande?
Extracted 1 entities: ['Chris Olande']
['Chris Olande']


# Test structured entity extraction

In [15]:
structured_results = entity_extractor.structured_retriever(question)

print(f"Question: {question}")
print("\nStructured retrieval results:")
print(structured_results)

Processing structured retrieval for question: Who is Chris Olande?
Extracting entities for question: Who is Chris Olande?
Extracted 1 entities: ['Chris Olande']
Retrieved 35 relationships for entity 'Chris Olande'
Found 35 relationships for entity 'Chris Olande'
  - Chris Olande - USES -> LangChain Expression Language (LCEL)
  - Chris Olande - USES -> OpenRouter models
  - Chris Olande - SPECIALIZES_IN -> Data Science, Machine Learning Engineering, Research, AI Systems Development, Agentic RAGs, Intelligent Information Systems, AI Product Development
  - Chris Olande - SPECIALIZES_IN -> Programming
  - Chris Olande - SPECIALIZES_IN -> Statistics
  ... and 30 more
Question: Who is Chris Olande?

Structured retrieval results:
Chris Olande - USES -> LangChain Expression Language (LCEL)
Chris Olande - USES -> OpenRouter models
Chris Olande - SPECIALIZES_IN -> Data Science, Machine Learning Engineering, Research, AI Systems Development, Agentic RAGs, Intelligent Information Systems, AI Prod

# Create the hybrid vector index

In [16]:
vector_store.create_hybrid_index()

print("Hybrid vector index created successfully!")

Hybrid vector index created successfully!


# Create vector index

In [17]:
# create vector index from documents
vector_store.create_vector_index(documents)

print("Vector index created successfully!")

Vector index created successfully!


## Test vector Retrieval

In [18]:
# Test vector retrieval
vector_results = retriever.vector_retrieval(question, k=2)

print(f"Question: {question}")
print("\nVector retrieval results:")
for i, doc in enumerate(vector_results):
    print(f"\nDocument {i+1}:")
    print(doc.page_content[:300] + "...")

Question: Who is Chris Olande?

Vector retrieval results:

Document 1:
 trajectory, Chris is well-positioned for roles in data science, machine learning engineering, research, or AI systems development. His current focus on agentic RAGs suggests a future in intelligent information systems and AI product development, especially those requiring real-time reasoning and ad...

Document 2:
Chris Olande: A Professional Profile

Chris Olande is a dynamic and intellectually curious student of Statistics and Programming at Kenyatta University, with a growing portfolio of sophisticated projects that blend statistical rigor with cutting-edge machine learning techniques. His academic and pra...


# Test hybrid retrieval


In [19]:
# Test hybrid retrieval
hybrid_results = retriever.hybrid_retrieval(question)

print(f"Question: {question}")
print("\nHybrid retrieval results:")
print(hybrid_results)

Question: Who is Chris Olande?

Hybrid retrieval results:
Structured data:
            No relevant information found in the knowledge graph.

            Unstructured data:
             trajectory, Chris is well-positioned for roles in data science, machine learning engineering, research, or AI systems development. His current focus on agentic RAGs suggests a future in intelligent information systems and AI product development, especially those requiring real-time reasoning and adaptive behavior.

He could thrive in environments such as research labs, AI startups, ed-tech companies, or advanced analytics units in large organizations. With his blend of statistical depth, programming expertise, and interpersonal insight, Chris is not only a problem solver but a builder of intelligent systems that matter.

In Conclusion

Chris Olande is a promising young professional whose blend of technical competence, educational leadership, and forward-thinking innovation makes him stand out. As he con

# Test RAG Chain

In [20]:
# Test the RAG chain
answer = rag_chain.invoke({"question": question})

print(f"Question: {question}")
print(f"\nAnswer: {answer}")

Question: Who is Chris Olande?

Answer:  Chris Olande is a dynamic and intellectually curious student of Statistics and Programming at Kenyatta University, with a growing portfolio of sophisticated projects that blend statistical rigor with cutting-edge machine learning techniques. He is a promising young professional whose blend of technical competence, educational leadership, and forward-thinking innovation makes him stand out. He is currently focusing on agentic RAG systems and is well-positioned for roles in data science, machine learning engineering, research, or AI systems development.


## Test using a follow up question

In [21]:
# Initialize chat history
chat_history = [(question, answer)]

# Ask a follow-up question
follow_up_question = "What does he even do at Kenyatta University?"

# Get answer to follow-up question
follow_up_answer = rag_chain.invoke({
    "question": follow_up_question,
    "chat_history": chat_history
})

print(f"Follow-up question: {follow_up_question}")
print(f"\nAnswer: {follow_up_answer}")

Follow-up question: What does he even do at Kenyatta University?

Answer:  The context does not specify Chris Olande's role or activities at Kenyatta University. However, it mentions that he is a student of Statistics and Programming at the university.


In [None]:
quiz = "How and what tools does he use and what jobs is he suited for in the future?"

# Get answer to follow-up question
follow_up_answer = rag_chain.invoke({
    "question": quiz,
    "chat_history": chat_history
})

print(f"Follow-up question: {quiz}")
print(f"\nAnswer: {follow_up_answer}")