In [1]:
!pip install \
  langchain_community \
  chromadb \
  langchain_openai \
  unstructured \
  langchain-text-splitters



In [2]:
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
import glob

In [3]:
!pip install fpdf

Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fpdf
  Building wheel for fpdf (setup.py) ... [?25l[?25hdone
  Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40704 sha256=fab749f7a267933f8e2770339586b991afa56216ccb1de5f132f91708c97519c
  Stored in directory: /root/.cache/pip/wheels/65/4f/66/bbda9866da446a72e206d6484cd97381cbc7859a7068541c36
Successfully built fpdf
Installing collected packages: fpdf
Successfully installed fpdf-1.7.2


In [4]:
from fpdf import FPDF

pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
pdf.set_font("Arial", size=12)

text = """Asked Questions (AQ)

1. What does NEOV do?
NEOV is an AI-powered company focused on automation and efficiency in the finance sector.
"""

pdf.multi_cell(0, 10, text)

documents_dir = "/content/documents"

if not os.path.exists(documents_dir):
    os.makedirs(documents_dir)

pdf_file = os.path.join(documents_dir, "NEOV_FAQ.pdf")
pdf.output(pdf_file)

print(f"PDF successfully created: {pdf_file}")

PDF successfully created: /content/documents/NEOV_FAQ.pdf


In [5]:
loader = pdf_loader = DirectoryLoader("documents", glob="*.pdf", loader_cls=PyPDFLoader)

In [6]:
documents = pdf_loader.load()

In [7]:
documents[0]

Document(metadata={'producer': 'PyFPDF 1.7.2 http://pyfpdf.googlecode.com/', 'creator': 'PyPDF', 'creationdate': 'D:20250224183906', 'source': 'documents/NEOV_FAQ.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='Asked Questions (AQ)\n1. What does NEOV do?\nNEOV is an AI-powered company focused on automation and efficiency in the finance sector.')

In [8]:
import os
os.environ["OPENAI_API_KEY"] = ""

In [9]:
embeddings = OpenAIEmbeddings (
    model= "text-embedding-3-small"
)

In [10]:
vectorstore = Chroma.from_documents(
    documents=documents,
    embedding=embeddings,
    persist_directory="./chroma_db"
)

In [11]:
query = "What does NEOV do?"
results = vectorstore.similarity_search(query)



In [12]:
for result in results:
    print(result.page_content)

Asked Questions (AQ)
1. What does NEOV do?
NEOV is an AI-powered company focused on automation and efficiency in the finance sector.


In [13]:
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA

llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=1,
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)
qa.invoke(query)



{'query': 'What does NEOV do?',
 'result': 'NEOV is an AI-powered company focused on automation and efficiency in the finance sector.'}