In [68]:
from langchain_community.document_loaders import PDFMinerPDFasHTMLLoader, AzureAIDocumentIntelligenceLoader, PyMuPDFLoader
from langchain_community.docstore.document import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai.embeddings import OpenAIEmbeddings
from bs4 import BeautifulSoup
from langchain_community.vectorstores import Qdrant
from langchain.retrievers import ParentDocumentRetriever, ContextualCompressionRetriever, SelfQueryRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor, LLMChainFilter, DocumentCompressorPipeline
from langchain.storage import InMemoryStore
from langchain_openai import OpenAI, ChatOpenAI
from langchain.chains.query_constructor.base import AttributeInfo
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
import re

In [2]:
import os
import openai
from getpass import getpass

openai.api_key = getpass("Please provide your OpenAI Key: ")
os.environ["OPENAI_API_KEY"] = openai.api_key

In [3]:
loader = PDFMinerPDFasHTMLLoader("meta_10k.pdf")
data = loader.load()[0]

In [4]:
soup = BeautifulSoup(data.page_content,'html.parser')
content = soup.find_all('div')

In [5]:
# Assuming 'content' is a list of parsed content from the document
snippets = []   # Collect all snippets that match a specific item pattern
current_item = None
current_text = ''

# Regular expression pattern to match the items as they appear in your table of contents
item_pattern = re.compile(r'Item\s\d+[A-Z]?.*')

for c in content:
    sp = c.find('span')
    if not sp:
        continue
    st = sp.get('style')
    if not st:
        continue
    
    # Check if the content matches the item pattern
    if item_pattern.match(c.text):
        # If it's a new item, save the previous one and start a new snippet
        if current_item:
            snippets.append((current_item, current_text))
        current_item = str(c.text.split('\n')[0])  # This captures the current item's title
        current_text = ''  # Reset the current text for the new item
    else:
        # If it's the same item, continue appending the text
        current_text += c.text

# Don't forget to append the last item after exiting the loop
if current_item:
    snippets.append((current_item, current_text))

In [None]:
print(len(snippets))
snippets[19]

In [6]:
# Assume we have a list of snippets, each a tuple with the item text and its associated font size
# For example: snippets = [('Item 1. Business', 12), ('Item 1A. Risk Factors', 12), ...]

def _new_section_based_on_heading(heading):
    item_heading_pattern = re.compile(r'Item\s+\d+[A-Z]?\.?')
    return bool(item_heading_pattern.match(heading))

semantic_snippets = []
for s in snippets:
    # Extracting the item heading and content from the snippet
    item_heading, content = s
    
    # Check if this is a new section that should start a new Document
    if not semantic_snippets or _new_section_based_on_heading(item_heading):
        metadata = {
            'heading': item_heading,
        }
        # Assume data.metadata is already defined elsewhere
        metadata.update(data.metadata)
        semantic_snippets.append(Document(page_content=content, metadata=metadata))
    else:
        # If not a new section, append content to the existing Document
        current_doc = semantic_snippets[-1]
        current_doc.page_content += ' ' + content

In [None]:
len(semantic_snippets)

In [None]:
print(semantic_snippets[18].page_content)
print(semantic_snippets[18].metadata)

In [None]:
print(len(semantic_snippets[18:]))
semantic_snippets[18:]

In [7]:
data = semantic_snippets[18:]
data

[Document(page_content='Overview\nOur mission is to give people the power to build community and bring the world closer together.\nAll of our products, including our apps, share the vision of helping to bring the metaverse to life. We build technology that helps people connect and\nshare,  find  communities,  and  grow  businesses.  Our  products  enable  people  to  connect  and  share  with  friends  and  family  through  mobile  devices,  personal\ncomputers, virtual reality (VR) and mixed reality (MR) headsets, and wearables. We also help people discover and learn about what is going on in the world\naround them, enable people to share their experiences, ideas, photos and videos, and other activities with audiences ranging from their closest family members\nand  friends  to  the  public  at  large,  and  stay  connected  everywhere  by  accessing  our  products.  Meta  is  moving  our  offerings  beyond  2D  screens  toward\nimmersive experiences like augmented and virtual reality 

In [24]:
len(data)

23

In [47]:
print(data[22].page_content)

None.
130
Table of Contents
SIGNATURES
Pursuant to the requirements of Section 13 or 15(d) of the Securities Exchange Act of 1934, the Registrant has duly caused this Annual Report on Form
10-K to be signed on its behalf by the undersigned, thereunto duly authorized, in the City of Menlo Park, State of California, on this 1st day of February 2024.
Date:
February 1, 2024
META PLATFORMS, INC.
/s/ Susan Li 
Susan Li
Chief Financial Officer
131
Table of Contents
POWER OF ATTORNEY
KNOW ALL PERSONS BY THESE PRESENTS, that each person whose signature appears below constitutes and appoints Susan Li and Katherine R.
Kelly, and each of them, as his or her true and lawful attorneys-in-fact and agents, with full power of substitution and resubstitution, for him or her and in his or
her name, place and stead, in any and all capacities, to sign any and all amendments to this Annual Report on Form 10-K, and to file the same, with all exhibits
thereto, and other documents in connection therewith, with

In [21]:
file_path = "meta_10k.pdf"
endpoint = "https://eastus.api.cognitive.microsoft.com/"
key = "3bdf1ef21a2b41878105a94e2e9368d0"
loader = AzureAIDocumentIntelligenceLoader(
    api_endpoint=endpoint, api_key=key, file_path=file_path, api_model="prebuilt-layout", mode="page"
)

data_azure = loader.load()

In [23]:
len(data_azure)

147

In [45]:
print(data_azure[0].page_content)

UNITED STATES SECURITIES AND EXCHANGE COMMISSION Washington, D.C. 20549 FORM 10-K (Mark One) ☒ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2023 or ☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to Commission File Number: 001-35551 0 Meta Meta Platforms, Inc. (Exact name of registrant as specified in its charter) Delaware (State or other jurisdiction of incorporation or organization) 20-1665019 (I.R.S. Employer Identification Number) 1 Meta Way, Menlo Park, California 94025 (Address of principal executive offices and Zip Code) (650) 543-4800 (Registrant's telephone number, including area code) Securities registered pursuant to Section 12(b) of the Act: Title of each class Trading symbol(s) META Class A Common Stock, $0.000006 par value Name of each exchange on which registered The Nasdaq Stock Market LLC Securities registered pursuant

In [39]:
loader = PyMuPDFLoader("meta_10k.pdf")
data_pdf = loader.load()

In [41]:
len(data_pdf)

147

In [46]:
print(data_pdf[132].page_content)

Table of Contents
POWER OF ATTORNEY
KNOW ALL PERSONS BY THESE PRESENTS, that each person whose signature appears below constitutes and appoints Susan Li and Katherine R.
Kelly, and each of them, as his or her true and lawful attorneys-in-fact and agents, with full power of substitution and resubstitution, for him or her and in his or
her name, place and stead, in any and all capacities, to sign any and all amendments to this Annual Report on Form 10-K, and to file the same, with all exhibits
thereto, and other documents in connection therewith, with the Securities and Exchange Commission, granting unto said attorneys-in-fact and agents, and each
of them, full power and authority to do and perform each and every act and thing requisite and necessary to be done in connection therewith, as fully to all
intents and purposes as he or she might or could do in person, hereby ratifying and confirming that all said attorneys-in-fact and agents, or any of them or their
or his or her substitute o

In [48]:
text_splitter = SemanticChunker(OpenAIEmbeddings())

In [49]:
texts = text_splitter.split_documents(data)
texts_azure = text_splitter.split_documents(data_azure)
texts_pdf = text_splitter.split_documents(data_pdf)

In [50]:
print(len(texts))
print(len(texts_azure))
print(len(texts_pdf))

149
346
346


In [51]:
print(texts[0].page_content)
print(texts[0].metadata)

Overview
Our mission is to give people the power to build community and bring the world closer together. All of our products, including our apps, share the vision of helping to bring the metaverse to life. We build technology that helps people connect and
share,  find  communities,  and  grow  businesses. Our  products  enable  people  to  connect  and  share  with  friends  and  family  through  mobile  devices,  personal
computers, virtual reality (VR) and mixed reality (MR) headsets, and wearables. We also help people discover and learn about what is going on in the world
around them, enable people to share their experiences, ideas, photos and videos, and other activities with audiences ranging from their closest family members
and  friends  to  the  public  at  large,  and  stay  connected  everywhere  by  accessing  our  products. Meta  is  moving  our  offerings  beyond  2D  screens  toward
immersive experiences like augmented and virtual reality to help build the metaverse, whic

In [56]:
print(texts_pdf[255].page_content)
print(texts_pdf[0].metadata)

Basic and diluted EPS are the same for each class of common stock because they are entitled to the same liquidation and dividend rights. The numerators and denominators of the basic and diluted EPS computations for our common stock are calculated as follows (in millions, except per
share amounts):
 
Year Ended December 31,
 
2023
2022
2021
 
Class
A
Class
B
Class
A
Class
B
Class
A
Class
B 
Basic EPS:
 
 
 
 
 
 
Numerator
 
 
 
 
 
 
Net income
$
33,722 
$
5,376 
$
19,729 
$
3,471 
$
33,328 
$
6,042 
Denominator
 
 
 
 
 
 
Shares used in computation of basic earnings per share
2,220 
354 
2,285 
402 
2,383 
432 
Basic EPS
$
15.19 
$
15.19 
$
8.63 
$
8.63 
$
13.99 
$
13.99 
Diluted EPS:
 
 
 
 
Numerator
 
 
 
 
 
 
Net income
$
33,722 
$
5,376 
$
19,729 
$
3,471 
$
33,328 
$
6,042 
Reallocation of net income as a result of conversion of Class B to
Class A common stock
5,376 
— 
3,471 
— 
6,042 
— 
Reallocation of net income to Class B common stock
— 
(112)
— 
(19)
— 
(93)
Net income f

In [57]:
print(texts_azure[255].page_content)
print(texts_azure[0].metadata)

Basic and diluted EPS are the same for each class of common stock because they are entitled to the same liquidation and dividend rights. The numerators and denominators of the basic and diluted EPS computations for our common stock are calculated as follows (in millions, except per share amounts): Year Ended December 31, 2023 2022 2021 Class A Class B Class A Class B Class A Class B Basic EPS: Numerator Net income $ 33,722 $ 5,376 $ 19,729 $ 3,471 $ 33,328 $ 6,042 Denominator Shares used in computation of basic earnings per share 2,220 354 2,285 402 2,383 432 Basic EPS $ 15.19 $ 15.19 $ 8.63 $ 8.63 $ 13.99 $ 13.99 Diluted EPS: Numerator Net income $ 33,722 $ 5,376 $ 19,729 $ 3,471 $ 33,328 $ 6,042 Reallocation of net income as a result of conversion of Class B to Class A common stock 5,376 — 3,471 — 6,042 — Reallocation of net income to Class B common stock — (112 — (19) — (93) Net income for diluted EPS $ 39,098 $ 5,264 $ 23,200 $ 3,452 $ 39,370 $ 5,949 Denominator Shares used in comp

In [62]:
data_azure

[Document(page_content='UNITED STATES SECURITIES AND EXCHANGE COMMISSION Washington, D.C. 20549 FORM 10-K (Mark One) ☒ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2023 or ☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to Commission File Number: 001-35551 0 Meta Meta Platforms, Inc. (Exact name of registrant as specified in its charter) Delaware (State or other jurisdiction of incorporation or organization) 20-1665019 (I.R.S. Employer Identification Number) 1 Meta Way, Menlo Park, California 94025 (Address of principal executive offices and Zip Code) (650) 543-4800 (Registrant\'s telephone number, including area code) Securities registered pursuant to Section 12(b) of the Act: Title of each class Trading symbol(s) META Class A Common Stock, $0.000006 par value Name of each exchange on which registered The Nasdaq Stock Market LLC Secur

In [70]:
text_splitter_rec = RecursiveCharacterTextSplitter(chunk_size = 4250, chunk_overlap = 100)

In [71]:
texts_rec = text_splitter_rec.split_documents(data)
texts_azure_rec = text_splitter_rec.split_documents(data_azure)
texts_pdf_rec = text_splitter_rec.split_documents(data_pdf)

In [72]:
print(len(texts_rec))
print(len(texts_azure_rec))
print(len(texts_pdf_rec))

143
207
208


In [78]:
embeddings_model = OpenAIEmbeddings(model = "text-embedding-3-small")
embeddings_model

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x2ae19c850>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x2ae17d850>, model='text-embedding-3-small', dimensions=None, deployment='text-embedding-ada-002', openai_api_version='', openai_api_base=None, openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [79]:
qdrant = Qdrant.from_documents(texts, embeddings_model, location=":memory:", collection_name = '10K', force_recreate=True)
qdrant

<langchain_community.vectorstores.qdrant.Qdrant at 0x2bd788950>

In [80]:
qdrant_azure = Qdrant.from_documents(texts_azure, embeddings_model, location=":memory:", collection_name = '10K_azure', force_recreate=True)
qdrant_azure

<langchain_community.vectorstores.qdrant.Qdrant at 0x2a6c6f810>

In [81]:
qdrant_pdf = Qdrant.from_documents(texts_pdf, embeddings_model, location=":memory:", collection_name = '10K_pdf', force_recreate=True)
qdrant_pdf

<langchain_community.vectorstores.qdrant.Qdrant at 0x2afbf7f90>

In [82]:
qdrant_rec = Qdrant.from_documents(texts_rec, embeddings_model, location=":memory:", collection_name = '10K_rec', force_recreate=True)
qdrant_azure_rec = Qdrant.from_documents(texts_azure_rec, embeddings_model, location=":memory:", collection_name = '10K_azure_rec', force_recreate=True)
qdrant_pdf_rec = Qdrant.from_documents(texts_pdf_rec, embeddings_model, location=":memory:", collection_name = '10K_pdf_rec', force_recreate=True)

In [102]:
query1 = "What was the total value of 'Cash and cash equivalents' as of December 31, 2023?"
query2 = "Who are Meta's 'Directors' (i.e., members of the Board of Directors)?"
found_docs = qdrant_azure_rec.similarity_search(query2)

In [103]:
for i in range(len(found_docs)):
    print(found_docs[i].metadata)

{'page': 14, '_id': '80f6f281f55f47c894809f3dc8efe8f9', '_collection_name': '10K_azure_rec'}
{'page': 52, '_id': '9462227379de4f4287ab70a0c93ebac4', '_collection_name': '10K_azure_rec'}
{'page': 137, '_id': '29eb2789ba6f415a827dcf6eb6af8a2a', '_collection_name': '10K_azure_rec'}
{'page': 13, '_id': '9a880163e2ce4e3c9f12f68970b174d4', '_collection_name': '10K_azure_rec'}


In [104]:
qdrant_retriever = qdrant.as_retriever()
qdrant_retriever_pdf = qdrant_pdf.as_retriever()
qdrant_retriever_azure = qdrant_azure.as_retriever()
qdrant_retriever_rec = qdrant_rec.as_retriever()
qdrant_retriever_pdf_rec = qdrant_pdf_rec.as_retriever()
qdrant_retriever_azure_rec = qdrant_azure_rec.as_retriever()

In [None]:
store = InMemoryStore()
child_splitter = RecursiveCharacterTextSplitter(chunk_size=4000)
pdr = ParentDocumentRetriever(vectorstore=qdrant,
                              docstore=store,
                              child_splitter=child_splitter)
pdr

In [None]:
pdr.add_documents(texts, ids=None)

In [None]:
retrieved_docs = pdr.invoke(query)

In [None]:
len(retrieved_docs[0].page_content)

In [None]:
print(retrieved_docs[0].page_content)

In [111]:
llm = OpenAI(temperature=0)
llm.model_name

'gpt-3.5-turbo-instruct'

In [None]:
compressor = LLMChainExtractor.from_llm(llm)
compressor

In [None]:
ccr = ContextualCompressionRetriever(base_compressor=compressor,
                                     base_retriever=qdrant_retriever)
ccr

In [None]:
query

In [None]:
compressed_docs = ccr.invoke(query)

In [None]:
print(compressed_docs[3].page_content)

In [None]:
compressor = LLMChainFilter.from_llm(llm)
compressor

In [None]:
ccr = ContextualCompressionRetriever(base_compressor=compressor,
                                     base_retriever=qdrant_retriever)
ccr

In [None]:
compressed_docs = ccr.invoke(query)

In [None]:
compressed_docs

In [None]:
compressor_filter = LLMChainFilter.from_llm(llm)
compressor_extractor = LLMChainExtractor.from_llm(llm)
compressor_pipeline = DocumentCompressorPipeline(transformers=[compressor_filter, compressor_extractor])
compressor_pipeline

In [None]:
ccr = ContextualCompressionRetriever(base_compressor=compressor_pipeline,
                                     base_retriever=qdrant_retriever)
ccr

In [None]:
compressed_docs = ccr.invoke(query)

In [None]:
compressed_docs

In [None]:
texts[0].metadata

In [512]:
metadata_field_info = [
]

document_content_desc = "Form 10-K annual report required by the U.S. Securities and Exchange Commission (SEC), that gives a comprehensive summary of a company's financial performance for company Meta for year 2023"

In [513]:
llm = ChatOpenAI(temperature=0)
llm.model_name

'gpt-3.5-turbo'

In [514]:
sqr_rec = SelfQueryRetriever.from_llm(llm,qdrant_rec, document_content_desc, metadata_field_info)
sqr_pdf_rec = SelfQueryRetriever.from_llm(llm,qdrant_pdf_rec, document_content_desc, metadata_field_info)

In [105]:
template = """You are an helpful assistant for question-answering tasks, specifically you are an expert in answering SEC 10-K report questions.
If you  don't know the answer, just say that you don't know.
Answer based on the context given to you, for a given question.

Context:
{context}

Question:
{question}

Answer:"""

rag_prompt = PromptTemplate.from_template(template)
rag_prompt

PromptTemplate(input_variables=['context', 'question'], template="You are an helpful assistant for question-answering tasks, specifically you are an expert in answering SEC 10-K report questions.\nIf you  don't know the answer, just say that you don't know.\nAnswer based on the context given to you, for a given question.\n\nContext:\n{context}\n\nQuestion:\n{question}\n\nAnswer:")

In [106]:
print(rag_prompt.template)

You are an helpful assistant for question-answering tasks, specifically you are an expert in answering SEC 10-K report questions.
If you  don't know the answer, just say that you don't know.
Answer based on the context given to you, for a given question.

Context:
{context}

Question:
{question}

Answer:


In [107]:
rag_chat_prompt = ChatPromptTemplate.from_template(template)
rag_chat_prompt

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an helpful assistant for question-answering tasks, specifically you are an expert in answering SEC 10-K report questions.\nIf you  don't know the answer, just say that you don't know.\nAnswer based on the context given to you, for a given question.\n\nContext:\n{context}\n\nQuestion:\n{question}\n\nAnswer:"))])

In [108]:
print(rag_chat_prompt.messages)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an helpful assistant for question-answering tasks, specifically you are an expert in answering SEC 10-K report questions.\nIf you  don't know the answer, just say that you don't know.\nAnswer based on the context given to you, for a given question.\n\nContext:\n{context}\n\nQuestion:\n{question}\n\nAnswer:"))]


In [109]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough

In [122]:
llm = ChatOpenAI(model = 'gpt-3.5-turbo',temperature=0)
llm.model_name

'gpt-3.5-turbo'

In [459]:
rag_qa_chain =  (
    {"context": itemgetter("question") | qdrant_retriever_azure_rec, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | llm, "context": itemgetter("context")}
)

In [460]:
result1 = rag_qa_chain.invoke({"question": query1})
result2 = rag_qa_chain.invoke({"question": query2})

In [461]:
print(result1['response'].content)
print(result2['response'].content)

The total value of 'Cash and cash equivalents' as of December 31, 2023, was $41.862 billion.
The members of Meta's Board of Directors are not explicitly mentioned in the provided text. Therefore, I don't know the specific names of Meta's Directors based on the given context.


In [539]:
from langchain.retrievers import BM25Retriever,EnsembleRetriever
from langchain.chains import RetrievalQA
from langchain.callbacks import StdOutCallbackHandler
ensemble_retriever = EnsembleRetriever(retrievers=[qdrant_retriever_pdf_rec,qdrant_retriever_rec])
ensemble_retriever

EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['Qdrant', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.qdrant.Qdrant object at 0x2a19a0750>), VectorStoreRetriever(tags=['Qdrant', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.qdrant.Qdrant object at 0x2a37f35d0>)], weights=[0.5, 0.5])

In [540]:
handler = StdOutCallbackHandler()
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = ensemble_retriever,
    verbose=True,
    callbacks=[handler],
    chain_type_kwargs={"prompt": rag_chat_prompt},
    return_source_documents=True
)

In [541]:
response1 = qa_chain({"query":query1})
response2 = qa_chain({"query":query2})



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [542]:
print(response1['source_documents'])
print(response2['source_documents'])

[Document(page_content='Table of Contents\nNote\xa05. Financial Instruments\nInstruments Measured at Fair Value\nWe classify our cash equivalents and marketable debt securities within Level\xa01 or Level\xa02 because we use quoted market prices or alternative pricing\nsources and models utilizing market observable inputs to determine their fair value. Certain other assets are classified within Level\xa03 because factors used to\ndevelop the estimated fair value are unobservable inputs that are not supported by market activity.\nThe following tables summarize our assets measured at fair value on a recurring basis and the classification by level of input within the fair value\nhierarchy (in millions):\n\xa0\n\xa0\nFair Value Measurement at Reporting Date Using\nDescription\nDecember 31, 2023\nQuoted\xa0Prices in Active\nMarkets for\nIdentical\xa0Assets\n(Level 1)\nSignificant Other\nObservable Inputs\n(Level 2)\nSignificant Unobservable\nInputs\n(Level 3)\nCash\n$\n6,265\xa0\nCash equiva

In [543]:
print(response1['result'])
print(response2['result'])

The total value of 'Cash and cash equivalents' as of December 31, 2023, was $41.862 billion.
The Directors of Meta Platforms, Inc. are:
1. Mark Zuckerberg
2. Susan Li
3. Aaron Anderson
4. Peggy Alford
5. Marc L. Andreessen
6. Andrew W. Houston
7. Nancy Killefer
8. Robert M. Kimmitt
9. Sheryl K. Sandberg
10. Tracey T. Travis
11. Tony Xu


In [504]:
for i in range(len(response1['source_documents'])):
    print(response1['source_documents'][i].metadata)

{'source': 'meta_10k.pdf', 'file_path': 'meta_10k.pdf', 'page': 107, 'total_pages': 147, 'format': 'PDF 1.4', 'title': '0001326801-24-000012', 'author': 'EDGAR® Online LLC, a subsidiary of OTC Markets Group', 'subject': 'Form 10-K filed on 2024-02-02 for the period ending 2023-12-31', 'keywords': '0001326801-24-000012; ; 10-K', 'creator': 'EDGAR Filing HTML Converter', 'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creationDate': "D:20240202060356-05'00'", 'modDate': "D:20240202060413-05'00'", 'trapped': '', 'encryption': 'Standard V2 R3 128-bit RC4', '_id': '79211edf788943599c149724aad453a4', '_collection_name': '10K_pdf_rec'}
{'heading': "Item 7. Management's Discussion and Analysis of Financial Condition and Results of Operations", 'source': 'meta_10k.pdf', '_id': '85b5047a36974c44948d5b6f0c0e54b4', '_collection_name': '10K_rec'}
{'source': 'meta_10k.pdf', 'file_path': 'meta_10k.pdf', 'page': 108, 'total_pages': 147, 'format': 'PDF 1.4', 'title': '0001326801-24-000012', 'author

In [565]:
rag_qa_chain =  (
    {"context": itemgetter("question") | sqr_pdf_rec, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_chat_prompt | llm, "context": itemgetter("context")}
)

In [566]:
result1 = rag_qa_chain.invoke({"question": query1})
result2 = rag_qa_chain.invoke({"question": query2})

In [567]:
print(result1['response'].content)
print(result2['response'].content)

The total value of 'Cash and cash equivalents' as of December 31, 2023, was $41.862 billion.
The directors of Meta, as listed in the SEC 10-K report, are:
1. Peggy Alford
2. Marc L. Andreessen
3. Andrew W. Houston
4. Nancy Killefer
5. Robert M. Kimmitt
6. Sheryl K. Sandberg
7. Tracey T. Travis
8. Tony Xu
