In [16]:
import os
from dotenv import load_dotenv
import glob
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.prompts import MessagesPlaceholder
from IPython.display import display, Markdown
import ipywidgets as widgets

In [17]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

if not api_key:
    raise ValueError("OPENAI_API_KEY not found in .env file")

print("API key loaded")

API key loaded


#### Documents collections

In [18]:
documents = []

for pdf_path in glob.glob("documents/*.pdf"):
    loader = PyPDFLoader(pdf_path)
    docs = loader.load()
    documents.extend(docs)

print(f"Loaded {len(documents)} pages from PDFs")

Loaded 10 pages from PDFs


#### Text Splitters

In [19]:
# Create splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
    length_function=len
)

# Split documents
chunks = text_splitter.split_documents(documents)

print(f"Split {len(documents)} documents into {len(chunks)} chunks")
for i, chunk in enumerate(chunks[:2]):
    print(f"\nChunk {i+1}: {chunk.page_content}")

Split 10 documents into 34 chunks

Chunk 1: Personal Biography: Victor Ridwan Ademuyiwa 
Introduction 
Victor Ridwan Ademuyiwa is a driven and purpose-oriented individual whose journey 
reflects resilience, curiosity, and a deep commitment to growth. Born on 27th May and raised 
in Ijoko, Sango Ota, Ogun State, he has steadily built a path defined by learning, discipline, 
and a passion for technology. 
Early Life 
Growing up in Ijoko shaped Ridwan‚Äôs values and worldview. His environment taught him

Chunk 2: Early Life 
Growing up in Ijoko shaped Ridwan‚Äôs values and worldview. His environment taught him 
patience, determination, and the belief that progress is not a race but a steady climb. These 
early experiences became the foundation of his personal philosophy: ‚ÄúSlow and steady ‚Äî it 
is not about how far, but how well.‚Äù 
Education 
Ridwan‚Äôs academic journey reflects both breadth and depth: 
‚Ä¢ B.Sc. Industrial Chemistry 
Federal University of Petroleum Resources, Effuru

#### Embeddings

In [20]:
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    openai_api_key=api_key
)
print("Embeddings created")

Embeddings created


#### Vector Store
# Create vector store from documents

In [21]:
persist_directory = "./chroma_db"
collection_name = "collection"

if os.path.exists(persist_directory):
    vectorstore = Chroma(
        collection_name="collection",
        persist_directory=persist_directory,
        embedding_function=embeddings
    )
    print("chroma_db loaded")
else:
    vectorstore = Chroma.from_documents(
        chunks,
        embeddings,
        collection_name="collection",
        persist_directory="./chroma_db"
    )
    print("chroma_db created")

chroma_db loaded


#### Conversational RAG with memory

In [None]:
# Create LLM
llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0,
    openai_api_key=api_key
)

# Create retriever
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5}
)

# Store for chat histories
chat_store = {}

def get_session_history(session_id: str):
    if session_id not in chat_store:
        chat_store[session_id] = InMemoryChatMessageHistory()
    return chat_store[session_id]

# Create conversational prompt
conv_prompt = ChatPromptTemplate.from_messages([
    ("system",
     "You are the personal AI assistant for Victor Ridwan Ademuyiwa. "
     "You must answer questions ONLY if:\n"
     "1. The question is clear, complete, and meaningful.\n"
     "2. The provided context directly contains the information needed.\n\n"
     "If the question is unclear, incomplete, or nonsensical, respond exactly with:\n"
     "'Please ask a clear and complete question.'\n\n"
     "If the context does not contain the answer, respond exactly with:\n"
     "'I don‚Äôt have information about this in the provided documents.'\n\n"
     "Do NOT guess, infer, or generalize beyond the context."
    ),

    MessagesPlaceholder(variable_name="chat_history"),

    ("human",
     "Context:\n{context}\n\nQuestion:\n{question}"),

    ("system",
     "Rules for formatting the final answer:\n"
     "- Answer in clear, concise sentences.\n"
     "- List sources as bullet points ONLY if you actually used the context.\n"
     "- If you did not use the context, do NOT list any sources."
    )
])

# format documents
def format_docs(docs):
    return "\n\n".join(
        f"Source: {doc.metadata.get('source', 'unknown')}\n{doc.page_content}"
        for doc in docs
    )

# Build base chain
conv_chain_base = (
    RunnableParallel(
        context=lambda x: format_docs(retriever.invoke(x["question"])),
        question=lambda x: x["question"],
        chat_history=lambda x: x.get("chat_history", [])
    )
    | conv_prompt
    | llm
    | StrOutputParser()
)

# Wrap with message history
conv_chain = RunnableWithMessageHistory(
    conv_chain_base,
    get_session_history,
    input_messages_key="question",
    history_messages_key="chat_history"
)



**Example interactions**

In [23]:
# First question
response = conv_chain.invoke(
    {"question": "what projects has Victor worked on"},
    config={"configurable": {"session_id": "user_1"}}
)
print("Response 1:\n", response)

# Follow-up question
response2 = conv_chain.invoke(
    {"question": "Which of those projects are AI and Machine Learning project?"},
    config={"configurable": {"session_id": "user_1"}}
)

print("\nResponse 2:\n", response2)

Response 1:
 Victor Ridwan Ademuyiwa has worked on projects ranging from AI systems to interactive web apps, showcasing his ability to learn, build, and solve problems. His projects demonstrate consistent growth, curiosity, and a willingness to explore new technologies.

Response 2:
 The AI and Machine Learning projects that Victor Ridwan Ademuyiwa has worked on include:
- RAG Chatbot (Personal AI Assistant)
- Student Performance Prediction
- Car Price Prediction System


#### Users Interactive Section

In [None]:
session_id = "user_1"


while True:
    user_input = input("\nAsk your question about Victor (or type 'exit' to quit): ")

    if user_input.lower() in ["exit", "quit", "bye"]:
        print(Markdown("### Goodbye! Have a nice day."))
        break

    response = conv_chain.invoke(
        {"question": user_input},
        config={"configurable": {"session_id": session_id}}
    )

    # print("\nResponce:", response)
    display(Markdown(f"""
---
**üßë User:**  
{user_input}

**ü§ñ Response:**  
{response}
"""))
    
clear_output(wait=True)

