In [1]:
import os
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
from dotenv import load_dotenv

load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
embedding = OpenAIEmbeddings(
    model="text-embedding-3-large",
    openai_api_key=os.getenv("MY_OPENAI_API_KEY")
)


In [3]:
def load_faiss_index(index_path="faiss_index"):
    vectorstore = FAISS.load_local(
        index_path,
        embeddings=embedding,
        allow_dangerous_deserialization=True
    )
    print("FAISS index loaded.")
    return vectorstore

In [5]:
vectorstore = load_faiss_index("../faiss_index")

FAISS index loaded.


In [6]:
def retrieve_chunks(query, k=5):
    """
    Retrieve top-K most relevant chunks for the query.
    Returns a list of Document objects.
    """

    results = vectorstore.similarity_search(
        query=query,
        k=k
    )

    print(f"Retrieved {len(results)} chunks.\n")
    return results

In [7]:
def display_chunks(chunks):
    for i, doc in enumerate(chunks):
        print(f"--- Chunk {i+1} ---")
        print("Source:", doc.metadata.get("source"))
        print("Page:", doc.metadata.get("page"))
        print(doc.page_content[:400], "...")
        print()


In [8]:
query = "How did Meta perform financially in 2024?"
chunks = retrieve_chunks(query, k=5)
display_chunks(chunks)


Retrieved 5 chunks.

--- Chunk 1 ---
Source: ../knowledge_base\Meta-2024-Annual-Report.pdf
Page: 58
Table of Contents
Stock Performance Graph
This performance graph shall not be deemed "soliciting material" or to be "filed" with the SEC for purposes of Section 18 of the Exchange Act, or otherwise
subject to the liabilities under that Section, and shall not be deemed to be incorporated by reference into any filing of Meta Platforms, Inc. under the Securities
Act of 1933, as amended, or the Exchan ...

--- Chunk 2 ---
Source: ../knowledge_base\Meta-2024-Annual-Report.pdf
Page: 87
Table of Contents
META PLATFORMS, INC.
CONSOLIDATED STATEMENTS OF INCOME
(In millions, except per share amounts)
 
Year Ended December 31,
2024
2023
2022
Revenue
$
164,501 
$
134,902 
$
116,609 
Costs and expenses:
 
 
Cost of revenue
30,161 
25,959 
25,249 
Research and development
43,873 
38,483 
35,338 
Marketing and sales
11,347 
12,301 
15,262 
General and administrative
9,740 
11,408 
11,816 ...

--- Chunk