In [1]:
import os
from dotenv import load_dotenv
import glob
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.prompts import MessagesPlaceholder


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

if not api_key:
    raise ValueError("OPENAI_API_KEY not found in .env file")

print("API key loaded")

API key loaded


#### Documents collections

In [None]:
documents = []

# for pdf_path in glob.glob("documents/*.pdf"):
#     loader = PyPDFLoader(pdf_path)
#     docs = loader.load()

#     # Add document title inside the text
#     title = pdf_path.split("\\")[-1].replace(".pdf", "")
#     for d in docs:
#         d.page_content = f"Document Title: {title}\n{d.page_content}"

#     documents.extend(docs)

# for pdf_path in glob.glob("documents/*.pdf"):
#     loader = PyPDFLoader(pdf_path)
#     docs = loader.load()
    
#     # Extract metadata
#     filename = os.path.basename(pdf_path).replace(".pdf", "")
    
#     for doc in docs:
#         # Add title to content AND metadata
#         doc.page_content = f"Document: {filename}\n\n{doc.page_content}"
#         doc.metadata.update({
#             "source": pdf_path,
#             "title": filename,
#             "page": doc.metadata.get("page", 0)
#         })

#         documents.extend(docs)

print(f"Loaded {len(documents)} pages from PDFs")


Loaded 20 pages from PDFs


In [3]:
documents = []

for pdf_path in glob.glob("documents/*.pdf"):  # adjust folder path
    loader = PyPDFLoader(pdf_path)
    docs = loader.load()
    documents.extend(docs)

print(f"Loaded {len(documents)} pages from PDFs")


Loaded 10 pages from PDFs


#### Text Splitters

In [6]:
# Create splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=350,
    chunk_overlap=50,
    length_function=len
    # separators=["\n# ", "\n## ", "\n### ", "\n\n", "\n", ". ", " ", ""]
)

# Split documents
chunks = text_splitter.split_documents(documents)

print(f"Split {len(documents)} documents into {len(chunks)} chunks")
for i, chunk in enumerate(chunks):
    print(f"\nChunk {i+1}: {chunk.page_content}")

Split 10 documents into 44 chunks

Chunk 1: Personal Biography: Victor Ridwan Ademuyiwa 
Introduction 
Victor Ridwan Ademuyiwa is a driven and purpose-oriented individual whose journey 
reflects resilience, curiosity, and a deep commitment to growth. Born on 27th May and raised 
in Ijoko, Sango Ota, Ogun State, he has steadily built a path defined by learning, discipline,

Chunk 2: and a passion for technology. 
Early Life 
Growing up in Ijoko shaped Ridwan’s values and worldview. His environment taught him 
patience, determination, and the belief that progress is not a race but a steady climb. These 
early experiences became the foundation of his personal philosophy: “Slow and steady — it

Chunk 3: is not about how far, but how well.” 
Education 
Ridwan’s academic journey reflects both breadth and depth: 
• B.Sc. Industrial Chemistry 
Federal University of Petroleum Resources, Effurun (2017–2023) 
Here, he developed strong analytical thinking, scientific discipline, and 
problem-solvi

#### Embeddings

In [7]:
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    openai_api_key=api_key
)

# Test embedding
test_embedding = embeddings.embed_query("What is RAG?")
print(f"Embedding dimension: {len(test_embedding)}")
print(f"First 5 values: {test_embedding[:5]}")

Embedding dimension: 1536
First 5 values: [0.0006463840254582465, 0.0257024634629488, 0.007150898687541485, 0.033402226865291595, -0.03191245347261429]


#### Vector Store
# Create vector store from documents

In [8]:
vectorstore = Chroma.from_documents(
    chunks,
    embeddings,
    collection_name="collection",
    persist_directory="./chroma_db"
)

In [9]:
# Test retriver
query = "What ai projects?"

retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

results = retriever.invoke(query)
results


[Document(metadata={'author': 'Ridwan Ademuyiwa', 'creationdate': '2025-12-15T01:00:48+01:00', 'source': 'documents\\Projects.pdf', 'moddate': '2025-12-15T01:00:48+01:00', 'total_pages': 2, 'creator': 'Microsoft® Word 2019', 'page': 0, 'page_label': '1', 'producer': 'Microsoft® Word 2019'}, page_content='PROJECTS \nAI & Machine Learning Projects \nProject 1. RAG Chatbot (Personal AI Assistant) \n• Architecture: Retrieval-Augmented Generation pipeline \n• Components: \no LangChain Expression Language (LCEL) \no ChromaDB vector store \no OpenAI LLM for generation \no Embedding-based document chunk retrieval \n• Capabilities:'),
 Document(metadata={'author': 'Ridwan Ademuyiwa', 'creator': 'Microsoft® Word 2019', 'producer': 'Microsoft® Word 2019', 'creationdate': '2025-12-14T23:33:11+01:00', 'page_label': '1', 'source': 'documents\\Personal Biography.pdf', 'page': 0, 'moddate': '2025-12-14T23:33:11+01:00', 'total_pages': 2}, page_content='• AI Engineering Program \nAI Fellowship, Abeokuta

#### Conversational Rag

In [10]:
# Create LLM
llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0,
    openai_api_key=api_key
)

# Create retriever
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5}
)

# prompt
prompt = ChatPromptTemplate.from_template("""
You are an Victor Ridwan Ademuyiwa personal AI assistant answering questions about Victor Ridwan Ademuyiwa using the provided documents.

Use ONLY the context below to answer the question.
If the answer is not in the context, say "I don't know."

<context>
{context}
</context>

Question: {question}

Answer in clear sentences.
At the end, list the sources you used as bullet points.
""")

# format documents
def format_docs(docs):
    return "\n\n".join(
        f"Source: {doc.metadata.get('source', 'unknown')}\n{doc.page_content}"
        for doc in docs
    )

# RAG chain Using LCEL
rag_chain = (
    {
        "context": retriever | format_docs,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)


In [11]:
query = "What AI projects has Ridwan worked on?"
response = rag_chain.invoke(query)
print(response)

Ridwan has worked on AI projects that aim to improve decision-making, enhance productivity, support scientific discovery, empower businesses, and make life easier for people. He specializes in building intelligent systems that automate complex processes and improve decision-making. Ridwan envisions himself working on high-impact projects as a Lead AI Engineer, designing and architecting large-scale AI platforms that power intelligent decision-support systems.

Sources:
- documents\Research Interest and Career Goal.pdf


#### Conversational RAG

In [10]:
# from langchain_core.chat_history import InMemoryChatMessageHistory
# from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
# from langchain_core.runnables.history import RunnableWithMessageHistory
# from langchain_core.output_parsers import StrOutputParser

# # Session store
# chat_store = {}

# def get_session_history(session_id: str):
#     if session_id not in chat_store:
#         chat_store[session_id] = InMemoryChatMessageHistory()
#     return chat_store[session_id]

# # Prompt
# conversational_prompt = ChatPromptTemplate.from_messages([
#     ("system", "You are an AI assistant answering questions about Alex Morgan."),
#     MessagesPlaceholder("chat_history"),
#     ("system", "Use the document context to answer.\nContext:\n{context}"),
#     ("user", "{question}")
# ])

# # Chain
# conversational_chain = (
#     {
#         "context": retriever | format_docs,
#         "question": lambda x: x["question"]
#     }
#     | conversational_prompt
#     | llm
#     | StrOutputParser()
# )

# # Memory wrapper
# rag_with_memory = RunnableWithMessageHistory(
#     conversational_chain,
#     get_session_history,
#     input_messages_key="question",
#     history_messages_key="chat_history"
# )


In [12]:
# Store for chat histories
chat_store = {}

def get_session_history(session_id: str):
    if session_id not in chat_store:
        chat_store[session_id] = InMemoryChatMessageHistory()
    return chat_store[session_id]

# Create conversational prompt
conv_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an Victor Ridwan Ademuyiwa personal AI assistant answering questions about Victor Ridwan Ademuyiwa using the provided documents. Use ONLY the context below to answer the question. If the answer is not in the context, say I don't know."),
    MessagesPlaceholder(variable_name="chat_history"),
    ("system", "Answer in clear sentences. At the end, list the sources you used as bullet points."),
    ("human", "Context: {context}\n\nQuestion: {question}")
])

# Build base chain
conv_chain_base = (
    RunnableParallel(
        context=lambda x: format_docs(retriever.invoke(x["question"])),
        question=lambda x: x["question"],
        chat_history=lambda x: x.get("chat_history", [])
    )
    | conv_prompt
    | llm
    | StrOutputParser()
)

# Wrap with message history
conv_chain = RunnableWithMessageHistory(
    conv_chain_base,
    get_session_history,
    input_messages_key="question",
    history_messages_key="chat_history"
)



**Questions**

In [15]:
# First question
response = conv_chain.invoke(
    {"question": "who are you?"},
    config={"configurable": {"session_id": "user_1"}}
)
print("Response 1:\n", response)

# Follow-up question
response2 = conv_chain.invoke(
    {"question": "list the AI and manchine learning project he has worked on?"},
    config={"configurable": {"session_id": "user_1"}}
)

print("\nResponse 2:\n", response2)

Response 1:
 I am Victor Ridwan Ademuyiwa's personal AI assistant, providing information about him based on the documents provided.

Sources:
- documents\Personal Biography.pdf
- documents\Professional Resume.pdf

Response 2:
 The AI and machine learning projects Victor Ridwan Ademuyiwa has worked on are:
- RAG Chatbot (Personal AI Assistant)
- Student Performance Prediction

Sources:
- documents\Projects.pdf
- documents\Professional Resume.pdf
