**Setup**

In [1]:
# Importing the necessary libraries
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("OPENAI_API_KEY environment variable not set")
print("API Key loaded successfully.")

API Key loaded successfully.


In [2]:
import PyPDF2

In [3]:
# Loading the file
file_path = r"C:\Users\Dell\Downloads\Naheemot_Adebiyi_ML_CV.pdf"
file_name = os.path.basename(file_path)

pages = ""

with open(file_path, "rb") as file:
    reader = PyPDF2.PdfReader(file)
    num_pages = len(reader.pages)
    for page_num in reader.pages:
        pages += page_num.extract_text()
        print(pages)

Naheemot Adenike Adebiyi
 
AI & Machine Learning Engineer | Lagos, Nigeria
+234 810 120 5765 | naeemaadenike@gmail.com | LinkedIn
Professional Summary
Entry-level AI and Machine Learning Engineer with strong hands-on experience in building
end-to-end machine learning solutions. Skilled in transforming data into actionable insights and
deploying models via APIs to support business decision-making.
Technical Skills
1
Python, SQL
2
Machine Learning & Artificial Intelligence
3
Data Analysis & Feature Engineering
4
Model Evaluation & Optimization
5
FastAPI & RESTful APIs
Projects
Loan Approval Prediction System
1
Built a machine learning model to predict loan approval outcomes using customer financial
data.
2
Performed data cleaning, exploratory data analysis, and feature engineering to improve model
accuracy.
3
Trained and evaluated multiple machine learning algorithms to select the best-performing
model.
4
Deployed the trained model using FastAPI, enabling real-time loan approval predicti

In [4]:
def chunk_by_sentences(text, max_chunk_size=500):
    # Simple sentence splitting (split on . ! ?)
    import re
    sentences = re.split(r'(?<=[.!?])\s+', text)
    
    chunks = []
    current_chunk = ""
    
    for sentence in sentences:
        # Check if adding this sentence would exceed max size
        if len(current_chunk) + len(sentence) > max_chunk_size and current_chunk:
            # Save current chunk and start new one
            chunks.append(current_chunk.strip())
            current_chunk = sentence
        else:
            # Add sentence to current chunk
            current_chunk += " " + sentence if current_chunk else sentence
    
    # Don't forget the last chunk
    if current_chunk:
        chunks.append(current_chunk.strip())
    
    return chunks

# Test it
chunks = chunk_by_sentences(pages, max_chunk_size=500)

print(f"Number of chunks: {len(chunks)}\n")
for i, chunk in enumerate(chunks, 1):
    print(f"Chunk {i} ({len(chunk)} chars):")
    print(chunk)
    print("-" * 80)

Number of chunks: 4

Chunk 1 (399 chars):
Naheemot Adenike Adebiyi
 
AI & Machine Learning Engineer | Lagos, Nigeria
+234 810 120 5765 | naeemaadenike@gmail.com | LinkedIn
Professional Summary
Entry-level AI and Machine Learning Engineer with strong hands-on experience in building
end-to-end machine learning solutions. Skilled in transforming data into actionable insights and
deploying models via APIs to support business decision-making.
--------------------------------------------------------------------------------
Chunk 2 (416 chars):
Technical Skills
1
Python, SQL
2
Machine Learning & Artificial Intelligence
3
Data Analysis & Feature Engineering
4
Model Evaluation & Optimization
5
FastAPI & RESTful APIs
Projects
Loan Approval Prediction System
1
Built a machine learning model to predict loan approval outcomes using customer financial
data. 2
Performed data cleaning, exploratory data analysis, and feature engineering to improve model
accuracy.
---------------------------------------

In [12]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Create splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=20,
    length_function=len
)

# Split documents
chunks = text_splitter.split_text(pages)

print(f"Split {len(pages)} documents into {len(chunks)} chunks")
for i, chunk in enumerate(chunks[:3]):
    print(f"Chunk {i+1} ({len(chunk)} chars): {chunk}\n")
    

Split 1496 documents into 10 chunks
Chunk 1 (150 chars): Naheemot Adenike Adebiyi
 
AI & Machine Learning Engineer | Lagos, Nigeria
+234 810 120 5765 | naeemaadenike@gmail.com | LinkedIn
Professional Summary

Chunk 2 (185 chars): Entry-level AI and Machine Learning Engineer with strong hands-on experience in building
end-to-end machine learning solutions. Skilled in transforming data into actionable insights and

Chunk 3 (178 chars): deploying models via APIs to support business decision-making.
Technical Skills
1
Python, SQL
2
Machine Learning & Artificial Intelligence
3
Data Analysis & Feature Engineering
4



In [13]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    openai_api_key = api_key
)

# Test embedding
test_embedding = embeddings.embed_query("Who is Naheemot Adebiyi?")
print(f"Embedding dimension: {len(test_embedding)}")
print(f"First 5 values: {test_embedding[:5]}")

Embedding dimension: 1536
First 5 values: [0.01047521736472845, -0.049979958683252335, -0.01698615960776806, 0.025253426283597946, 0.0020354539155960083]


In [15]:
from langchain_community.vectorstores import FAISS

# Create vector store from documents
vectorstore = FAISS.from_texts(chunks, embeddings)

print(f"✅ Vector store created with {len(chunks)} chunks")

# Test similarity search
query = "Who is Naheemot Adebiyi?"
results = vectorstore.similarity_search(query, k=2)

print(f"\nQuery: {query}")
for i, doc in enumerate(results):
    print(f"\nResult {i+1}: {doc.page_content}")

✅ Vector store created with 10 chunks

Query: Who is Naheemot Adebiyi?

Result 1: Naheemot Adenike Adebiyi
 
AI & Machine Learning Engineer | Lagos, Nigeria
+234 810 120 5765 | naeemaadenike@gmail.com | LinkedIn
Professional Summary

Result 2: continuous learning, and real-world business problem solving.
Education
Bachelor of Science (B.Sc.)
University of Port Harcourt — 2017
Certifications
1
3MTT Program
2
ALX Diploma of Education
3


In [17]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# Create LLM
llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0.7,
    openai_api_key=api_key
)

# Create retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

# Create prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer using ONLY the provided context."),
    ("human", "{question}\n\nContext:\n{context}")
])

# Helper function to format documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Build RAG chain using LCEL
rag_chain = (
    RunnableParallel(context=retriever | format_docs, question=RunnablePassthrough())
    | prompt
    | llm
    | StrOutputParser()
)

print("✅ RAG chain created")

✅ RAG chain created


In [18]:
# Query the chain
response = rag_chain.invoke("Who is Naheemot Adebiyi?")
print(response)

Naheemot Adenike Adebiyi is an AI & Machine Learning Engineer based in Lagos, Nigeria.


In [19]:
# Create a custom prompt with specific instructions
custom_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a precise assistant. If you don't know the answer based on the context, say 'I don't know'."),
    ("human", "Context: {context}\n\nQuestion: {question}")
])

# Build chain with custom prompt
custom_rag = (
    RunnableParallel(context=retriever | format_docs, question=RunnablePassthrough())
    | custom_prompt
    | llm
    | StrOutputParser()
)

# Query
response = custom_rag.invoke("What is vector search?")
print(response)

I don't know.


In [20]:
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.prompts import MessagesPlaceholder

# Store for chat histories
chat_store = {}

def get_session_history(session_id: str):
    if session_id not in chat_store:
        chat_store[session_id] = InMemoryChatMessageHistory()
    return chat_store[session_id]

# Create conversational prompt
conv_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer using the context provided."),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "Context: {context}\n\nQuestion: {question}")
])

# Build base chain
conv_chain_base = (
    RunnableParallel(
        context=lambda x: format_docs(retriever.invoke(x["question"])),
        question=lambda x: x["question"],
        chat_history=lambda x: x.get("chat_history", [])
    )
    | conv_prompt
    | llm
    | StrOutputParser()
)

# Wrap with message history
conv_chain = RunnableWithMessageHistory(
    conv_chain_base,
    get_session_history,
    input_messages_key="question",
    history_messages_key="chat_history"
)

print("✅ Conversational chain created")

✅ Conversational chain created


In [21]:
# First question
response1 = conv_chain.invoke(
    {"question": "Who is Naheemot Adebiyi?"},
    config={"configurable": {"session_id": "session1"}}
)
print(f"Q1: Who is Naheemot Adebiyi?")
print(f"A1: {response1}\n")

# Follow-up question (remembers context)
response2 = conv_chain.invoke(
    {"question": "What does she do?"},
    config={"configurable": {"session_id": "session1"}}
)
print(f"Q2: What does she do?")
print(f"A2: {response2}")

Q1: Who is Naheemot Adebiyi?
A1: Naheemot Adenike Adebiyi is an AI & Machine Learning Engineer based in Lagos, Nigeria. She graduated with a Bachelor of Science degree from the University of Port Harcourt in 2017 and holds certifications in the 3MTT Program and ALX Diploma of Education.

Q2: What does she do?
A2: Naheemot Adenike Adebiyi is an AI & Machine Learning Engineer who specializes in solving real-world business problems through continuous learning and application of artificial intelligence and machine learning techniques.


In [22]:
# View chat history
session = get_session_history("session1")
print("Chat History:")
for msg in session.messages:
    print(f"\n{msg.type}: {msg.content}")

Chat History:

human: Who is Naheemot Adebiyi?

ai: Naheemot Adenike Adebiyi is an AI & Machine Learning Engineer based in Lagos, Nigeria. She graduated with a Bachelor of Science degree from the University of Port Harcourt in 2017 and holds certifications in the 3MTT Program and ALX Diploma of Education.

human: What does she do?

ai: Naheemot Adenike Adebiyi is an AI & Machine Learning Engineer who specializes in solving real-world business problems through continuous learning and application of artificial intelligence and machine learning techniques.


In [23]:
# Save vector store
vectorstore.save_local("faiss_index")
print("✅ Vector store saved")

# Load vector store
loaded_vectorstore = FAISS.load_local(
    "faiss_index",
    embeddings,
    allow_dangerous_deserialization=True
)
print("✅ Vector store loaded")

# Test
test_results = loaded_vectorstore.similarity_search("LangChain", k=1)
print(f"\nTest result: {test_results[0].page_content}")

✅ Vector store saved
✅ Vector store loaded

Test result: 3
LinkedIn Generative AI Certification
