In [14]:
from langchain_community.document_loaders import PyPDFLoader


import os
from dotenv import load_dotenv

load_dotenv()

key = os.getenv("genai_api_key")



path = "MARY-MAY.pdf"
loader = PyPDFLoader(path)

pages = loader.load()

In [15]:
if key :
    print("api_key Loaded successfully")

api_key Loaded successfully


In [16]:
if pages:
    for page in pages:
        print(page.page_content)

Name:  Adeoye  Mary  Oluwafunmilayo  
Abeokuta,  Nigeria  |  08133520650  |  Email:  maryadeoye7@gmail.com  
 
 PROFESSIONAL  SUMMARY  Detail-oriented  and  motivated  aspiring  Data  Analyst,  Data  Scientist,  and  AI/ML  Engineer  with  a  strong  background  in  Agricultural  Economics,  food  
processing,
 
and
 
soilless
 
farm
 
management.
 
Passionate
 
about
 
applying
 
data-driven
 
and
 
AI-based
 
solutions
 
to
 
agriculture,
 
food
 
systems,
 
and
 
business
 
problems.
 
Seeking
 
internship
 
or
 
entry-level
 
opportunities
 
to
 
gain
 
hands-on
 
experience.
 
 
 CORE  SKILLS  •  Git,  VS  Code  •  Data  Analysis  Fundamentals  •  Agricultural  Economics  •  Soilless  Farm  Management  •  Food  Processing  •  Analytical  Thinking,  Teamwork,  
Continuous
 
Learning
 
 
 PROJECTS  Data  Analysis  &  Machine  Learning  Practice  Projects  •  Working  on  beginner-to-intermediate  data  analysis  and  ML  projects  •  Exploring  AI  applications  in  agriculture  
an

In [17]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=100)

document = splitter.split_documents(documents=pages)
document

[Document(metadata={'producer': 'Skia/PDF m145 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'Resume', 'source': 'MARY-MAY.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='Name:  Adeoye  Mary  Oluwafunmilayo  \nAbeokuta,  Nigeria  |  08133520650  |  Email:  maryadeoye7@gmail.com  \n \n PROFESSIONAL  SUMMARY  Detail-oriented  and  motivated  aspiring  Data  Analyst,  Data  Scientist,  and  AI/ML  Engineer  with  a  strong  background  in  Agricultural  Economics,  food  \nprocessing,\n \nand\n \nsoilless\n \nfarm\n \nmanagement.\n \nPassionate\n \nabout\n \napplying\n \ndata-driven\n \nand\n \nAI-based\n \nsolutions\n \nto\n \nagriculture,\n \nfood\n \nsystems,\n \nand\n \nbusiness\n \nproblems.\n \nSeeking\n \ninternship\n \nor\n \nentry-level\n \nopportunities\n \nto\n \ngain\n \nhands-on\n \nexperience.\n \n \n CORE  SKILLS  •  Git,  VS  Code  •  Data  Analysis  Fundamentals  •  Agricultural  Economics  •  Soilless  Farm  Management  •  F

In [18]:

# Embeddings
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    api_key=key,
)

test_embedding= embeddings.embed_query("Who is Adeoye?")
print(f"Embedding Dimension: {len(test_embedding)}")
print(f"first five: {test_embedding[:5]}")

Embedding Dimension: 1536
first five: [0.05064554512500763, -0.05947131663560867, -0.05205664411187172, 0.02407844178378582, -0.017125584185123444]


In [21]:
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.from_documents(document, embeddings)

print(f"vector store created with {len(document) }chunks")

query = "Who is Adeoye?"
results = vectorstore.similarity_search(query, k=2)


print(f"\nQUERY: {query}")
print(results)

vector store created with 2chunks

QUERY: Who is Adeoye?
[Document(id='7844a09c-eb77-45e4-be1e-215e760c9e55', metadata={'producer': 'Skia/PDF m145 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'Resume', 'source': 'MARY-MAY.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='Name:  Adeoye  Mary  Oluwafunmilayo  \nAbeokuta,  Nigeria  |  08133520650  |  Email:  maryadeoye7@gmail.com  \n \n PROFESSIONAL  SUMMARY  Detail-oriented  and  motivated  aspiring  Data  Analyst,  Data  Scientist,  and  AI/ML  Engineer  with  a  strong  background  in  Agricultural  Economics,  food  \nprocessing,\n \nand\n \nsoilless\n \nfarm\n \nmanagement.\n \nPassionate\n \nabout\n \napplying\n \ndata-driven\n \nand\n \nAI-based\n \nsolutions\n \nto\n \nagriculture,\n \nfood\n \nsystems,\n \nand\n \nbusiness\n \nproblems.\n \nSeeking\n \ninternship\n \nor\n \nentry-level\n \nopportunities\n \nto\n \ngain\n \nhands-on\n \nexperience.\n \n \n CORE  SKILLS  •  Git,  VS  Co

In [22]:

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate



llm = ChatOpenAI(
    model = "gpt-3.5-turbo",
    temperature=0,
    openai_api_key=key
)

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant to answer questions relating to Adeoye. Answer ONLY using the provided context."),
    ("human", "Question: {question}\n\nContext:\n{context}")
])

retriever = vectorstore.as_retriever(
    search_kwargs={"k": 4}
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [24]:
from langchain_core.runnables import RunnablePassthrough

In [25]:

rag_chain = (
    {
        "context": retriever | format_docs,
        "question": RunnablePassthrough(),
    }
    | prompt
    | llm
)

In [None]:

response = rag_chain.invoke("Who is Adeoye?")
print(response.content)

Adeoye Mary Oluwafunmilayo is an aspiring Data Analyst, Data Scientist, and AI/ML Engineer with a background in Agricultural Economics, food processing, and soilless farm management. She is passionate about applying data-driven and AI-based solutions to agriculture, food systems, and business problems. Adeoye is seeking internship or entry-level opportunities to gain hands-on experience.
