In [203]:
# attach to the same event-loop
import nest_asyncio

nest_asyncio.apply()

In [204]:
# attach to the same event-loop
import nest_asyncio

nest_asyncio.apply()

import logging
import sys

# Set up the root logger
logger = logging.getLogger()
logger.setLevel(logging.INFO)  # Set logger level to INFO

# Clear out any existing handlers
logger.handlers = []

# Set up the StreamHandler to output to sys.stdout (Colab's output)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)  # Set handler level to INFO

# Add the handler to the logger
logger.addHandler(handler)

In [None]:
from llama_index.core.evaluation import (
    FaithfulnessEvaluator,
    RelevancyEvaluator,
    CorrectnessEvaluator,
    RetrieverEvaluator,
    generate_question_context_pairs,
    EmbeddingQAFinetuneDataset
)

In [None]:
import os

# Now you can use it
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
print("API Key Set Successfully!")

API Key Set Successfully!


Loading the Data of Coco-Cola 10Kfilling from 2015 to 2024

In [5]:
import os
import requests
from bs4 import BeautifulSoup
from llama_parse import LlamaParse

# Define the target URL for Coca-Cola 10-K filings
url = "https://investors.coca-colacompany.com/filings-reports/annual-filings-10-k"

# Step 1: Scrape the webpage to get PDF links
def get_pdf_links(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    pdf_links = []

    for a_tag in soup.find_all('a', href=True):
        if a_tag['href'].endswith('.pdf'):
            pdf_links.append(a_tag['href'])

    return pdf_links

pdf_links = get_pdf_links(url)
print(f"Found {len(pdf_links)} PDF links.")

# Step 2: Download PDFs
def download_pdfs(pdf_links, save_dir="coca_cola_10k_pdfs"):
    os.makedirs(save_dir, exist_ok=True)
    for link in pdf_links:
        pdf_name = link.split("/")[-1]
        pdf_path = os.path.join(save_dir, pdf_name)

        response = requests.get(link)
        with open(pdf_path, 'wb') as f:
            f.write(response.content)
        print(f"Downloaded: {pdf_name}")

    return save_dir

pdf_directory = download_pdfs(pdf_links)



Found 10 PDF links.
Downloaded: 0001104659-24-035312.pdf
Downloaded: 0000021344-24-000009.pdf
Downloaded: 0000021344-23-000011.pdf
Downloaded: 0000021344-22-000009.pdf
Downloaded: 0000021344-21-000008.pdf
Downloaded: 0000021344-20-000006.pdf
Downloaded: 0000021344-19-000014.pdf
Downloaded: 0000021344-18-000008.pdf
Downloaded: 0000021344-17-000009.pdf
Downloaded: 0000021344-16-000050.pdf


In [7]:
!pip install llama-index


1317.85s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


In [12]:
pip install pymupdf

3374.23s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
Collecting pymupdf
  Using cached pymupdf-1.25.3-cp39-abi3-macosx_11_0_arm64.whl.metadata (3.4 kB)
Using cached pymupdf-1.25.3-cp39-abi3-macosx_11_0_arm64.whl (18.6 MB)
Installing collected packages: pymupdf
Successfully installed pymupdf-1.25.3
Note: you may need to restart the kernel to use updated packages.


In [None]:
import fitz  # PyMuPDF
from llama_index.core import Document  # Correct import for Document
import os

# Path to the directory containing the 10-K filings
pdf_folder_path = "/Users/spoorthiramireddygari/Downloads/RAG systems using LlamaIndex/Module 6 - Advanced approaches for powerful RAG system/coca_cola_10k_pdfs"

# Function to extract text from a PDF using PyMuPDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Load and parse all PDFs in the directory
documents = []
for filename in os.listdir(pdf_folder_path):
    if filename.endswith(".pdf"):
        file_path = os.path.join(pdf_folder_path, filename)
        print(f"Processing {filename}...")
        text = extract_text_from_pdf(file_path)
        documents.append(Document(text=text, metadata={"filename": filename}))

# Print the number of documents loaded
print(f"Number of documents loaded: {len(documents)}")

# Print the text of the first document (for debugging)
print("Sample text from the first document:")
print(documents[0].text[:1000])  # Print the first 1000 characters of the first document


Processing 0000021344-19-000014.pdf...
Processing 0000021344-23-000011.pdf...
Processing 0000021344-17-000009.pdf...
Processing 0000021344-15-000005-2.pdf...
Processing 0000021344-18-000008.pdf...
Processing 0000021344-22-000009.pdf...
Processing 0000021344-21-000008.pdf...
Processing 0000021344-24-000009.pdf...
Processing 0000021344-20-000006.pdf...
Processing 0000021344-16-000050.pdf...
Number of documents loaded: 10
Sample text from the first document:
UNITED STATES
SECURITIES AND EXCHANGE COMMISSION
Washington, D.C. 20549
FORM 10-K
(Mark One)
ý
ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934
For the fiscal year ended December 31, 2018
OR
o
TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934
For the transition period from to
Commission File Number 001-02217
(Exact name of Registrant as specified in its charter)
Delaware
(State or other jurisdiction of incorporation or organization)
58-0628465
(I.R.S. Employer 

Here only using the 10k filling documents for the year 2023 and 2024 for the sake of cost and resource management 

In [32]:
# Filter documents for 2023 and 2024
filtered_documents = []
for doc in documents:
    filename = doc.metadata["filename"]
    year = filename.split("-")[1]  # Extract the year part (e.g., "15" for 2015)
    if year in ["23", "24"]:  # Filter for 2023 and 2024
        filtered_documents.append(doc)

# Print the number of filtered documents
print(f"Number of filtered documents: {len(filtered_documents)}")

Number of filtered documents: 2


In [34]:
filtered_documents[1]



Set Up the Base Index - Standalone Usage

In [36]:
from llama_index.core import Settings
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter

Settings.text_splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=20)

# per-index
index = VectorStoreIndex.from_documents(
    filtered_documents,
    transformations=[SentenceSplitter(chunk_size=1024, chunk_overlap=20)],
)
index = VectorStoreIndex.from_documents(documents)

In [40]:
index.index_struct.__dict__

{'index_id': '75103c9b-8799-4b4d-abd2-630db92c7b74',
 'summary': None,
 'nodes_dict': {'5bd6ed70-c061-4879-9221-1d08c03d3a78': '5bd6ed70-c061-4879-9221-1d08c03d3a78',
  '8d4e2f19-ce30-4299-a674-2cd54ca06878': '8d4e2f19-ce30-4299-a674-2cd54ca06878',
  '93c24a7c-6a31-4290-95ac-950b7ee6fe96': '93c24a7c-6a31-4290-95ac-950b7ee6fe96',
  'c52e56e1-b537-4be7-9bfe-f0ea5d140868': 'c52e56e1-b537-4be7-9bfe-f0ea5d140868',
  'bd02868d-875e-4298-8e55-e81d831aa984': 'bd02868d-875e-4298-8e55-e81d831aa984',
  'a1563557-c07f-40bd-9688-f5bcf31d3b17': 'a1563557-c07f-40bd-9688-f5bcf31d3b17',
  '1d0b65d9-010a-4ad8-8485-6dc894ac023d': '1d0b65d9-010a-4ad8-8485-6dc894ac023d',
  '6f14e4ad-56fa-41fa-8e9a-a7dc082ebcdc': '6f14e4ad-56fa-41fa-8e9a-a7dc082ebcdc',
  '50d667bb-1a48-405d-b9a6-762561b3b8e2': '50d667bb-1a48-405d-b9a6-762561b3b8e2',
  'e124b77d-aafe-4613-95e3-7bf55d47a680': 'e124b77d-aafe-4613-95e3-7bf55d47a680',
  '6cdc7e98-b064-4379-ab6e-2a9e872fd21a': '6cdc7e98-b064-4379-ab6e-2a9e872fd21a',
  '545a5725-e

In [45]:
len(index.index_struct.nodes_dict)

2002

In [44]:
# Retrieve a dict mapping of ingested documents and their nodes+metadata.
index.ref_doc_info.keys()

dict_keys(['335fef4f-4361-412b-888f-ebc8daffdef2', '7c509c53-0295-4c83-82a6-406e6272b9f0', '054ca498-fd1a-4c50-9e64-022fa4c001a4', 'f9cfd3d9-2ec8-4a35-b98b-df78ddcfa0b0', '623ca537-54bd-464c-b161-ac8f6e5deb31', 'f9cedc7b-7d53-4b1d-987a-76476270e7d9', 'dd33f144-6be9-4958-b6b5-9e7a0fa4e288', 'c7e67796-aff3-4f72-bbca-18ffa14a56f4', '4bab9ddb-c3f7-4d78-b872-38cbacddb9c1', '8fcfaddd-e281-4f2a-a2c5-4d79dcef56da'])

In [75]:
base_query_engine = index.as_query_engine(similarity_top_k=3)

In [78]:
base_index_retriever = index.as_retriever(retriever_mode='embedding', similarity_top_k=3)


In [190]:
query1 = " What was the total revenue (Net Operating Revenues) of Coca-Cola in 2023? "

In [96]:
nodes = base_index_retriever.retrieve(query1)

In [97]:
len(nodes)

3

In [98]:
for node in nodes:
  print("Node Id:",node.id_)
  print("Metadata:",node.metadata)
  print("Score:",node.get_score())
  print("------------------------------------")

Node Id: 0d6a62c3-9e7f-432e-8cab-80cb487c1582
Metadata: {'filename': '0000021344-15-000005-2.pdf'}
Score: 0.8754568135975062
------------------------------------
Node Id: ecdd4c1f-8746-43ad-aa5f-66c8067d18fc
Metadata: {'filename': '0000021344-18-000008.pdf'}
Score: 0.8713790825829186
------------------------------------
Node Id: 8212785c-4e34-461a-93d1-0a952356e411
Metadata: {'filename': '0000021344-18-000008.pdf'}
Score: 0.8583082464315249
------------------------------------


In [99]:
response = base_query_engine.query(query1)

In [100]:
print(response)

The total revenue (Net Operating Revenues) of Coca-Cola in 2023 was $35,410 million.


Sentence Window Retreiver 

In [101]:
from llama_index.llms.openai import OpenAI
llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

In [102]:
from llama_index.embeddings.openai import OpenAIEmbedding
embed_model = OpenAIEmbedding(model="text-embedding-3-small")

In [103]:
from llama_index.core.node_parser import SentenceWindowNodeParser
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

In [104]:
nodes = node_parser.get_nodes_from_documents(filtered_documents)

In [105]:
len(nodes)

6319

In [106]:
sentence_index = VectorStoreIndex(nodes, embed_model=embed_model, show_progress=True)

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/175 [00:00<?, ?it/s]

In [113]:
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
sentence_query_engine = sentence_index.as_query_engine(
    similarity_top_k=3,
    node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ],
)

In [114]:
sentence_index_retriever = sentence_index.as_retriever(retriever_mode='embedding', similarity_top_k=3)

In [115]:
sentence_window_response = sentence_query_engine.query(query1)

In [116]:
nodes_retriver = sentence_index_retriever.retrieve(query1)

In [121]:
len(nodes_retriver)


3

In [119]:
for node in nodes_retriver:
  print("Node Id:",node.id_)
  print("Metadata:",node.metadata)
  print("Score:",node.get_score())
  print("------------------------------------")

Node Id: c184af8f-7009-43bf-bead-2a10b779ea9a
Metadata: {'filename': '0000021344-24-000009.pdf', 'window': 'Distribution System\nWe make our branded beverage products available to consumers in more than 200 countries and territories through our network \nof independent bottling partners, distributors, wholesalers and retailers as well as our consolidated bottling and distribution \noperations.  Consumers enjoy finished beverage products bearing trademarks owned by or licensed to the Company at a rate of \n4\n2.2 billion servings each day.  Our strong and stable bottling and distribution system helps us capture growth by manufacturing, \ndistributing and selling existing, enhanced and new innovative products to consumers throughout the world.\n The Coca-Cola system sold 33.3 billion and 32.7 billion unit cases of our products in 2023 and 2022, respectively.  Sparkling \nsoft drinks represented 69% of our worldwide unit case volume in both 2023 and 2022.  Trademark Coca-Cola accounted fo

In [120]:
print(sentence_window_response)

The total revenue (Net Operating Revenues) of Coca-Cola in 2023 was $45,754 million.


## Parse Chunk Hierarchy from Text, Load into Storage

Automerging Retriever

In [136]:
from llama_index.core.node_parser import HierarchicalNodeParser, get_leaf_nodes, get_root_nodes

# Define chunk sizes explicitly to enforce hierarchy
node_parser_am = HierarchicalNodeParser.from_defaults(
    chunk_sizes=[2048, 1024, 512]  # Larger chunks first, then progressively smaller
)

# Parse hierarchical nodes
nodes_am = node_parser_am.get_nodes_from_documents(filtered_documents)

# Extract leaf and root nodes
leaf_nodes = get_leaf_nodes(nodes_am)
root_nodes = get_root_nodes(nodes_am)

# Print counts
print(f"Total Nodes: {len(nodes_am)}")
print(f"Leaf Nodes: {len(leaf_nodes)}")
print(f"Root Nodes: {len(root_nodes)}")


Total Nodes: 1369
Leaf Nodes: 844
Root Nodes: 143


In [137]:
# define storage context
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage import StorageContext
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

docstore = SimpleDocumentStore()

# insert nodes into docstore
docstore.add_documents(nodes_am)

# define storage context (will include vector store by default too)
storage_context = StorageContext.from_defaults(docstore=docstore)

In [138]:
## Load index into vector index
from llama_index.core import VectorStoreIndex

base_index_AM = VectorStoreIndex(
    leaf_nodes,
    embed_model = OpenAIEmbedding(model='text-embedding-3-small'),
    storage_context=storage_context,
)

Automerging Retriever 

In [139]:
from llama_index.core.retrievers.auto_merging_retriever import AutoMergingRetriever
base_retriever_AM = base_index_AM.as_retriever(similarity_top_k=3)
retriever_AM= AutoMergingRetriever(base_retriever_AM, storage_context, verbose=True)

In [140]:

base_nodes_AMR = base_retriever_AM.retrieve(query1)

nodes_AMR = retriever_AM.retrieve(query1)

In [141]:
len(nodes_AMR)

6

In [142]:
len(base_nodes_AMR)

6

In [144]:
from llama_index.core.response.notebook_utils import display_source_node

for node in nodes_AMR:
    display_source_node(node, source_length=10000)

**Node ID:** b8d28144-b26e-4aae-8559-a421e2c883e5<br>**Similarity:** 0.6753678467755753<br>**Text:** 59
ITEM 8.  FINANCIAL STATEMENTS AND SUPPLEMENTARY DATA
Table of Contents
Page 
Consolidated Statements of Income
61
Consolidated Statements of Comprehensive Income
62
Consolidated Balance Sheets
63
Consolidated Statements of Cash Flows
64
Consolidated Statements of Shareowners’ Equity
65
Notes to Consolidated Financial Statements
66
Note 1
Business and Summary of Significant Accounting Policies
66
Note 2
Acquisitions and Divestitures
72
Note 3
Net Operating Revenues
74
Note 4
Investments
76
Note 5
Hedging Transactions and Derivative Financial Instruments
78
Note 6
Equity Method Investments
84
Note 7
Intangible Assets
85
Note 8
Accounts Payable and Accrued Expenses
87
Note 9
Supply Chain Finance Program
87
Note 10 Leases
87
Note 11 Debt and Borrowing Arrangements
88
Note 12 Commitments and Contingencies
89
Note 13 Stock-Based Compensation Plans
92
Note 14 Pension and Other Postretirement Benefit Plans
95
Note 15 Income Taxes
103
Note 16 Other Comprehensive Income
106
Note 17 Fair Value Measurements
109
Note 18 Significant Operating and Nonoperating Items
115
Note 19 Restructuring
116
Note 20 Operating Segments
118
Note 21 Net Change in Operating Assets and Liabilities
123
124
126
Report of Management
Report of Independent Registered Public Accounting Firm (PCAOB ID: 42)
Report of Independent Registered Public Accounting Firm on Internal Control Over Financial Reporting
128
60
THE COCA-COLA COMPANY AND SUBSIDIARIES
CONSOLIDATED STATEMENTS OF INCOME
(In millions except per share data)
Year Ended December 31,
2023
2022
2021
Net Operating Revenues
$ 
45,754 $ 
43,004 $ 
38,655 
Cost of goods sold
18,520 
18,000 
15,357 
Gross Profit
27,234 
25,004 
23,298 
Selling, general and administrative expenses
13,972 
12,880 
12,144 
Other operating charges
1,951 
1,<br>

**Node ID:** 24107b08-699d-41c2-962f-5878e27ccdbc<br>**Similarity:** 0.6593718671632317<br>**Text:** 59
ITEM 8.  FINANCIAL STATEMENTS AND SUPPLEMENTARY DATA
Table of Contents
Page 
Consolidated Statements of Income
61
Consolidated Statements of Comprehensive Income
62
Consolidated Balance Sheets
63
Consolidated Statements of Cash Flows
64
Consolidated Statements of Shareowners’ Equity
65
Notes to Consolidated Financial Statements
66
Note 1
Business and Summary of Significant Accounting Policies
66
Note 2
Acquisitions and Divestitures
72
Note 3
Net Operating Revenues
74
Note 4
Investments
76
Note 5
Hedging Transactions and Derivative Financial Instruments
78
Note 6
Equity Method Investments
84
Note 7
Intangible Assets
86
Note 8
Accounts Payable and Accrued Expenses
87
Note 9
Leases
87
Note 10
Debt and Borrowing Arrangements
88
Note 11
Commitments and Contingencies
90
Note 12
Stock-Based Compensation Plans
92
Note 13
Pension and Other Postretirement Benefit Plans
96
Note 14
Income Taxes
102
Note 15
Other Comprehensive Income
106
Note 16
Fair Value Measurements
109
Note 17
Significant Operating and Nonoperating Items
115
Note 18
Restructuring
116
Note 19
Operating Segments
118
Note 20
Net Change in Operating Assets and Liabilities
123
Report of Management
124
Report of Independent Registered Public Accounting Firm (PCAOB ID: 42)
126
Report of Independent Registered Public Accounting Firm on Internal Control Over Financial Reporting
128
60
THE COCA-COLA COMPANY AND SUBSIDIARIES
CONSOLIDATED STATEMENTS OF INCOME
(In millions except per share data)
Year Ended December 31,
2022
2021
2020
Net Operating Revenues
$
43,004  $
38,655  $
33,014 
Cost of goods sold
18,000 
15,357 
13,433 
Gross Profit
25,004 
23,298 
19,581 
Selling, general and administrative expenses
12,880 
12,144 
9,731 
Other operating charges
1,<br>

**Node ID:** b1f4b048-86e0-481a-be2d-86cf9b30815c<br>**Similarity:** 0.6254581018148931<br>**Text:** Divestitures
During 2023, proceeds from disposals of businesses, equity method investments and nonmarketable securities totaled 
$430 million, which primarily related to sales of our ownership interests in our equity method investees in Indonesia and 
Pakistan, for which we received cash proceeds of $402 million and a note receivable of $200 million. We recognized a net gain 
of $94 million as a result of these transactions.
During 2022, proceeds from disposals of businesses, equity method investments and nonmarketable securities totaled 
$458 million, which primarily related to the refranchising of our bottling operations in Cambodia. We received net cash 
proceeds of $228 million and recognized a net gain of $153 million as a result of the refranchising. Also included was the sale 
of our ownership interest in one of our equity method investees, for which we received cash proceeds of $123 million and 
recognized a net gain of $13 million.
During 2021, proceeds from disposals of businesses, equity method investments and nonmarketable securities totaled 
$2,180 million, which primarily related to the sale of our ownership interest in Coca-Cola Amatil Limited (“CCA”), an equity 
method investee, to Coca-Cola Europacific Partners plc (“CCEP”), also an equity method investee. We received cash proceeds 
of $1,738 million and recognized a net gain of $695 million as a result of the sale and the related reversal of cumulative 
translation adjustments. Also included were the sale of our ownership interest in an equity method investee and the sale of a 
portion of our ownership interest in another equity method investee. We received cash proceeds of $293 million and recognized 
a net gain of $114 million as a result of these sales. 
All of the gains and losses discussed above were recorded in the line item other income (loss) — net in our consolidated 
statements of income.
Assets and Liabilities Held for Sale
As of December 31, 2023, the Company’s bottling operations in the Philippines and Bangladesh and certain bottling operations 
in India met the criteria to be classified as held for sale and are expected to be refranchised during the first quarter of 2024.<br>

**Node ID:** 20c984c6-3c7b-48e2-82e6-654b8213025b<br>**Similarity:** 0.6187719532947155<br>**Text:** • Operations Review — an analysis of our consolidated results of operations for 2023 and 2022 and year-to-year 
comparisons between 2023 and 2022. An analysis of our consolidated results of operations for 2022 and 2021 and year-
34
to-year comparisons between 2022 and 2021 can be found in MD&A in Part II, Item 7 of the Company’s Form 10-K for 
the year ended December 31, 2022.
• Liquidity, Capital Resources and Financial Position — an analysis of cash flows, contractual obligations, foreign 
exchange, and the impact of inflation and changing prices.
OUR BUSINESS
General
The Coca-Cola Company is a total beverage company, and beverage products bearing our trademarks, sold in the United States 
since 1886, are now sold in more than 200 countries and territories. We own or license and market numerous beverage brands, 
which we group into the following categories: Trademark Coca-Cola; sparkling flavors; water, sports, coffee and tea; juice, 
value-added dairy and plant-based beverages; and emerging beverages. We own and market several of the world’s largest 
nonalcoholic sparkling soft drink brands, including Coca-Cola, Sprite, Fanta, Coca-Cola Zero Sugar and Diet Coke/Coca-Cola 
Light.
We make our branded beverage products available to consumers throughout the world through our network of independent 
bottling partners, distributors, wholesalers and retailers as well as the Company’s consolidated bottling and distribution 
operations. Beverages bearing trademarks owned by or licensed to us account for 2.2 billion of the estimated 64 billion servings 
of all beverages consumed worldwide every day.
We believe our success depends on our ability to connect with consumers by providing them with a wide variety of beverage 
options to meet their desires, needs and lifestyles. Our success further depends on the ability of our people to execute 
effectively, every day.
Our Company operates in two lines of business: concentrate operations and finished product operations.
Our concentrate operations typically generate net operating revenues by selling beverage concentrates, sometimes referred to as 
“beverage bases,” syrups, including fountain syrups, and certain finished beverages to authorized bottling operations (to which 
we typically refer as our “bottlers” or our “bottling partners”).<br>

**Node ID:** d8b99221-40b2-4a5d-a531-e712bf1c22f5<br>**Similarity:** 0.6175314027032073<br>**Text:** The 
increase in net sales to equity method investees in 2023 was primarily due to volume growth and favorable pricing initiatives. 
In addition, purchases of beverage products from equity method investees were $579 million, $505 million and $496 million in 
2023, 2022 and 2021, respectively. 
The following table presents the difference between calculated fair value, based on quoted closing prices of publicly traded 
shares, and our Company’s carrying value in investments in publicly traded companies accounted for under the equity method 
(in millions):
December 31, 2023
Fair Value
Carrying Value
Difference
Monster Beverage Corporation
$ 
11,766 $ 
4,837 $ 
6,929 
Coca-Cola Europacific Partners plc
 
5,870  
3,858  
2,012 
Coca-Cola FEMSA, S.A.B. de C.V.
 
5,549  
2,092  
3,457 
Coca-Cola HBC AG
 
2,296  
1,252  
1,044 
Coca-Cola Consolidated, Inc.
 
2,304  
473  
1,831 
Coca-Cola Bottlers Japan Holdings Inc.
 
484  
367  
117 
Coca-Cola İçecek A.Ş.
 
911  
228  
683 
Embotelladora Andina S.A.
 
137  
90  
47 
Total
$ 
29,317 $ 
13,197 $ 
16,120 
Net Receivables and Dividends from Equity Method Investees
Total net receivables due from equity method investees were $1,527 million and $1,191 million as of December 31, 2023 and 
2022, respectively. The total amount of dividends received from equity method investees was $672 million, $634 million and 
$823 million for the years ended December 31, 2023, 2022 and 2021, respectively. The amount of consolidated reinvested 
earnings that represents undistributed earnings of investments accounted for under the equity method as of December 31, 2023 
was $8,005 million.<br>

**Node ID:** 3c566ba1-30ff-419c-bcef-d1a92d919972<br>**Similarity:** 0.6146191490032119<br>**Text:** 43
Information about our volume growth worldwide and by operating segment is as follows:
Percent Change 2023 versus 2022
Unit Cases
1,2
Concentrate 
Sales
Worldwide
 2% 
 2% 
Europe, Middle East & Africa
 (2) 
 — 
Latin America
 5 
 6 
North America
 (1) 
 (1) 
Asia Pacific
 3 
 — 
Global Ventures
 4 
 5 
Bottling Investments
 (1) 
3
         N/A
1 Bottling Investments operating segment data reflects unit case volume growth for consolidated bottlers only.
2 Geographic and Global Ventures operating segment data reflect unit case volume growth for all bottlers, both consolidated and 
unconsolidated, and distributors in the applicable geographic areas. Global Ventures operating segment data also reflects unit case volume 
growth for Costa retail stores.
3 After considering the impact of structural changes, unit case volume for Bottling Investments grew 6%.
Unit Case Volume
The Coca-Cola system sold 33.3 billion and 32.7 billion unit cases of our products in 2023 and 2022, respectively.
Unit case volume in Europe, Middle East and Africa decreased 2%, which included a 3% decline in sparkling flavors, a 14% 
decline in juice, value-added dairy and plant-based beverages, a 1% decline in Trademark Coca-Cola, and a 2% decline in 
water, sports, coffee and tea. The operating segment reported declines in unit case volume of 6% in the Europe operating unit 
and 1% in the Eurasia and Middle East operating unit, partially offset by growth in unit case volume of 3% in the Africa 
operating unit. The decline in unit case volume in Europe, Middle East and Africa was primarily due to the suspension of the 
Company’s business in Russia in March 2022. 
In Latin America, unit case volume increased 5%, which included 5% growth in Trademark Coca-Cola, 9% growth in water, 
sports, coffee and tea, 2% growth in sparkling flavors and 3% growth in juice, value-added dairy and plant-based beverages. 
The operating segment’s volume performance included 5% growth in both Mexico and Brazil.<br>

In [150]:
from llama_index.core.query_engine import RetrieverQueryEngine
query_engine_AMR = RetrieverQueryEngine.from_args(retriever_AM)
base_query_engine_AM = RetrieverQueryEngine.from_args(base_retriever_AM)

In [146]:
response_AMR = query_engine_AMR.query(query1)

In [147]:
print(str(response_AMR))

The total revenue (Net Operating Revenues) of Coca-Cola in 2023 was $45,754 million.


In [151]:
response_base = base_query_engine_AM.query(query1)

In [152]:
print(str(response_base))

$45,754 million


In [171]:
for doc in filtered_documents[:5]:  # Check the first 5 documents
    print(doc.metadata)  # Ensure 'company', 'year', and 'subcategory' are in metadata


{'filename': '0000021344-23-000011.pdf'}
{'filename': '0000021344-24-000009.pdf'}


AutoRetriver

In [155]:
import chromadb
chroma_client = chromadb.EphemeralClient()
chroma_collection = chroma_client.create_collection("quickstart")

In [156]:
from llama_index.core import VectorStoreIndex
from llama_index.core.retrievers import VectorIndexAutoRetriever
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.storage import StorageContext

# Step 1: Set up the Chroma vector store
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)


In [181]:
# Example of adding metadata fields manually based on the filename
for doc in filtered_documents:
    # Check if metadata exists, and if not, initialize it as an empty dictionary
    if not hasattr(doc, "metadata"):
        doc.metadata = {}

    # If the document contains a 'filename' attribute, we will use that
    filename = doc.metadata.get("filename", "")

    # If the filename is available, extract year and other metadata
    if filename:
        # Extract year from filename (assuming it's part of the filename)
        year = filename.split("-")[1][:2]  # Extract the '23' or '24' part
        year = "20" + year  # Convert to '2023' or '2024'
        
        # Assign company and subcategory based on the context
        doc.metadata["company"] = "Coca-Cola"
        doc.metadata["year"] = year
        doc.metadata["subcategory"] = "10-K"

    # Print the updated metadata for verification
    print(doc.metadata)  # Ensure metadata contains company, year, and subcategory




{'filename': '0000021344-23-000011.pdf', 'company': 'Coca-Cola', 'year': '2023', 'subcategory': '10-K'}
{'filename': '0000021344-24-000009.pdf', 'company': 'Coca-Cola', 'year': '2024', 'subcategory': '10-K'}


In [182]:
index = VectorStoreIndex.from_documents(
    filtered_documents, 
    storage_context=storage_context,
    embed_model=OpenAIEmbedding(),
    metadata_fields=["company", "subcategory", "year"]  # Ensure these fields are included
)


In [196]:
auto_retriever = VectorIndexAutoRetriever(
    index,
    vector_store_info=vector_store,
    metadata_filters={"company": ["Coca-Cola"], "year": ["2023", "2024"], "subcategory": ["10-K"]},
    verbose=True
)


In [197]:
query1 = " What was the total revenue (Net Operating Revenues) of Coca-Cola in 2023? "

In [198]:

retrieved_docs = auto_retriever.retrieve(query1)

# Display retrieved documents
for i, doc in enumerate(retrieved_docs[:5]):  # Show first 5 results
    print(f"\nDocument {i+1} (Year: {doc.metadata.get('year', 'Unknown')}):")
    print(doc.text[:500])  # Print first 500 characters for preview


Using query str: total revenue of Coca-Cola in 2023
Using filters: [('company', '==', 'Coca-Cola'), ('year', '==', '2023')]

Document 1 (Year: 2023):
This
91
impact would include taxes and interest accrued through December 31, 2022 for the 2007 through 2009 litigated tax years and for subsequent tax years from 2010 through
2022. The calculations incorporated the estimated impact of correlative adjustments to the previously accrued transition tax payable under the 2017 Tax Cuts and Jobs Act
(“Tax Reform Act”). The Company estimates that the potential aggregate incremental tax and interest liability could be approximately $14 billion as of Dece

Document 2 (Year: 2023):
260 
Equity income (loss) — net
43 
7 
(1)
9 
— 
1,184 
230 
— 
1,472 
Income (loss) before income taxes
3,952 
2,879 
3,768 
2,320 
196 
1,743 
(3,172)
— 
11,686 
Identifiable operating assets
7,088 
2,067 
25,760 
2,368 
7,325 
10,232 
19,158 
— 
73,998 
Investments
410 
629 
15 
219 
— 
12,892 
4,600 
— 
18,765 
Capita

In [199]:
from llama_index. core import get_response_synthesizer
response_synthesizer = get_response_synthesizer (response_mode="compact")

In [200]:
# Step 3: Generate the Final Response
final_response = response_synthesizer.synthesize(query1, retrieved_docs)
# Print or use the final response
print(final_response)

The total revenue (Net Operating Revenues) of Coca-Cola in 2023 was $38,655 million.


In [211]:
# Step 4: Run a test query for comparison
query = "What were Coca-Cola's key financial highlights in 2023?"
retrievers = {
    "BaseReteriver":base_retriever_AM,
    "AutoRetriever": auto_retriever,
    "SentenceWindowRetriever": sentence_index_retriever,
    "AutoMergingRetriever": retriever_AM,
}

results = {}
for name, retriever in retrievers.items():
    print(f"\nUsing {name}:")
    retrieved_docs = retriever.retrieve(query)
    results[name] = retrieved_docs
    for i, doc in enumerate(retrieved_docs[:5]):  # Show first 5 results
        print(f"\nDocument {i+1} (Year: {doc.metadata.get('year', 'Unknown')}):")
        print(doc.text[:500])  # Print first 500 characters for preview



Using BaseReteriver:
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"

Document 1 (Year: Unknown):
59
ITEM 8.  FINANCIAL STATEMENTS AND SUPPLEMENTARY DATA
Table of Contents
Page 
Consolidated Statements of Income
61
Consolidated Statements of Comprehensive Income
62
Consolidated Balance Sheets
63
Consolidated Statements of Cash Flows
64
Consolidated Statements of Shareowners’ Equity
65
Notes to Consolidated Financial Statements
66
Note 1
Business and Summary of Significant Accounting Policies
66
Note 2
Acquisitions and Divestitures
72
Note 3
Net Operating Revenues
74
Note 4
Investments
76
Not

Document 2 (Year: Unknown):
59
ITEM 8.  FINANCIAL STATEMENTS AND SUPPLEMENTARY DATA
Table of Contents
Page 
Consolidated Statements of Income
61
Consolidated Statements of Comprehensive Income
62
Consolidated Balance Sheets
63
Consolidated Statements of Cash Flows
64
Consolidated Statements of Shareowners’ Equity
65
Notes to Consolidated Financial Statements
66
Note 1
Bu

## Key Observations:

- AutoRetriever provided the most relevant results for 2023, including specific financial figures such as equity income and income before taxes.
- BaseReteriver and AutoMergingRetriever returned similar results, with some mention of 2023 data in Document 3 (e.g., net sales to equity method investees).
- SentenceWindowRetriever referenced the 2023 Annual Report but did not provide specific financial highlights for 2023.

## Hybrid Fusion Retriever 

In [215]:
filtered_documents



In [217]:
from llama_index.core import VectorStoreIndex

# Ensure docs is a list, not a single document
docs = [filtered_documents[0]]

# Create the index with the corrected list
index = VectorStoreIndex.from_documents(docs, chunk_size=256)


HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [218]:
index.index_struct.__dict__

{'index_id': '7946378c-abe2-4eac-a430-94ef10212048',
 'summary': None,
 'nodes_dict': {'5bccba43-cf1e-4d83-9967-257407dccfc2': '5bccba43-cf1e-4d83-9967-257407dccfc2',
  '4cd0af67-6a7c-4fbd-a058-a70d3d67b4de': '4cd0af67-6a7c-4fbd-a058-a70d3d67b4de',
  '90214c24-0b40-4391-8837-243a40b99ed6': '90214c24-0b40-4391-8837-243a40b99ed6',
  '891ab850-ceed-4300-af2c-9d223e6d6388': '891ab850-ceed-4300-af2c-9d223e6d6388',
  '3370c38c-3121-4d5b-a005-0d9763e9d212': '3370c38c-3121-4d5b-a005-0d9763e9d212',
  '382aad67-d0b6-4176-9faa-c795ab9bd759': '382aad67-d0b6-4176-9faa-c795ab9bd759',
  '4de73f75-abab-4691-8149-2b8fc9be6c3f': '4de73f75-abab-4691-8149-2b8fc9be6c3f',
  '32d4cdd9-6e99-48d5-97f1-e6b4cf67c993': '32d4cdd9-6e99-48d5-97f1-e6b4cf67c993',
  '14f8040f-07bd-4094-ae9f-50439d4ff8f7': '14f8040f-07bd-4094-ae9f-50439d4ff8f7',
  '5f9e114c-cbe9-4c46-afdf-313ec510eb3d': '5f9e114c-cbe9-4c46-afdf-313ec510eb3d',
  '02d2e34a-5b74-4ceb-af01-4ff649f9b3a1': '02d2e34a-5b74-4ceb-af01-4ff649f9b3a1',
  '0a61bf5a-e

In [219]:
len(index.index_struct.nodes_dict)

185

## Hybrid Fusion Retriever 

In [220]:
%%capture
%pip install llama-index-retrievers-bm25

In [221]:
from llama_index.retrievers.bm25 import BM25Retriever

vector_retriever = index.as_retriever(similarity_top_k=2)

bm25_retriever = BM25Retriever.from_defaults(
    docstore=index.docstore, similarity_top_k=2
)

In [222]:
from llama_index.core.retrievers import QueryFusionRetriever

retriever = QueryFusionRetriever(
    [vector_retriever, bm25_retriever],
    similarity_top_k=2,
    num_queries=4,  # set this to 1 to disable query generation
    mode="reciprocal_rerank",
    use_async=True,
    verbose=True,
    #query_gen_prompt="...",  # we could override the query generation prompt here
)

In [223]:
nodes_with_scores = retriever.retrieve(
    "What are the company’s main revenue streams, and how have they performed?"
)

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Generated queries:
1. What are the top revenue-generating products or services for the company?
2. How has the company's advertising and marketing strategies impacted their revenue streams?
3. Are there any new revenue streams that the company has recently introduced, and how successful have they been?
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [224]:
for node in nodes_with_scores:
    print(f"Score: {node.score:.2f} - {node.text} - {node.metadata}...\n-----\n")

Score: 0.05 - Refer to Note 13 of Notes to Consolidated Financial Statements for additional information about our pension plans and related actuarial assumptions.
Revenue Recognition
Revenue is recognized when performance obligations under the terms of the contracts with our customers are satisfied. Our performance obligation generally consists of the
promise to sell concentrates, syrups or finished products to our bottling partners, wholesalers, distributors or retailers. Control of the concentrates, syrups or finished products is
transferred upon shipment to, or receipt at, our customers’ locations, as determined by the specific terms of the contract. Upon transfer of control to the customer, which
completes our performance obligation, revenue is recognized. Our sales terms generally do not allow for a right of return except for matters related to any manufacturing
defects on our part. After completion of our performance obligation, we have an unconditional right to consideration as 

In [225]:
from llama_index.core.query_engine import RetrieverQueryEngine

query_engine = RetrieverQueryEngine.from_args(retriever)

In [226]:
response = query_engine.query("What are the company’s main revenue streams, and how have they performed?")

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Generated queries:
1. What are the top revenue-generating products/services of the company?
2. How has the company's advertising and marketing revenue evolved over time?
3. Are there any new revenue streams that the company has recently introduced, and how successful have they been?
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [227]:
from llama_index.core.response.notebook_utils import display_response

display_response(response)

**`Final Response:`** The company's main revenue streams come from selling concentrates, syrups, or finished products to bottling partners, wholesalers, distributors, or retailers. Revenue is recognized when control of the products is transferred to the customers. The company also generates revenue from various programs such as cash discounts, promotional activities, and volume-based incentive programs. These revenue streams have performed well, with consideration given to variable factors like customer sales volumes, selling prices, and discount rates to ensure accurate revenue recognition.

## Query Engine

In [None]:
embed_model = OpenAIEmbedding(model='text-embedding-3-small')

In [230]:
from llama_index.core import SummaryIndex


In [231]:
# Summary Index for summarization questions
summary_index = SummaryIndex(nodes, embed_model=embed_model)

# Vector Index for answering specific context questions
vector_index = VectorStoreIndex(nodes, embed_model=embed_model)

HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.

  def __call__(self, frame, event, arg):


HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.

In [232]:
# Summary Index Query Engine
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)

# Vector Index Query Engine
vector_query_engine = vector_index.as_query_engine()

In [234]:
from llama_index.core.tools.query_engine import QueryEngineTool

# Summary Index tool
summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description="Useful for summarization questions related to 10-K filings of the Coca-Cola Company in year 2023 or For the fiscal year ended December 31, 2022",
)

# Vector Index tool
vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description="Useful for retrieving specific context 10-K filings of the Coca-Cola Company in year 2023 or For the fiscal year ended December 31, 2022 ",
)

In [235]:
from llama_index.core import SimpleKeywordTableIndex

keyword_index = SimpleKeywordTableIndex(nodes)

keyword_query_engine = keyword_index.as_query_engine()

keyword_tool = QueryEngineTool.from_defaults(
    query_engine=keyword_query_engine,
    description="Useful for retrieving specific context using keywords from 10-K filings of the Coca-Cola Company in year 2023 or For the fiscal year ended December 31, 2022 ",
)

In [236]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors.llm_selectors import LLMSingleSelector, LLMMultiSelector
from llama_index.core.selectors.pydantic_selectors import PydanticMultiSelector, PydanticSingleSelector

In [237]:
query_engine = RouterQueryEngine(
    selector=PydanticMultiSelector.from_defaults(),
    query_engine_tools=[
        vector_tool,
        keyword_tool,
        summary_tool
    ],
)

In [239]:
# This query could use either a keyword or vector query engine, so it will combine responses from both
response = query_engine.query(
    "How does Coca-Cola manage supply chain disruptions and increased input costs?"
)

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Selecting query engine 1: This choice is relevant as it allows for retrieving specific context using keywords from 10-K filings, which can provide detailed information on how Coca-Cola manages supply chain disruptions and increased input costs..
> Starting query: How does Coca-Cola manage supply chain disruptions and increased input costs?
query keywords: ['supply', 'costs', 'coca', 'chain', 'manage', 'disruptions', 'cola', 'increased', 'input']
> Extracted keywords: ['supply', 'costs', 'coca', 'chain', 'manage', 'disruptions', 'cola', 'increased', 'input']
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [241]:
from IPython.core.display import HTML, display

display(HTML(f'<p style="font-size:14px">{response.response}</p>'))


  from IPython.core.display import HTML, display


In [254]:
# This query could use either a keyword or vector query engine, so it will combine responses from both
response = query_engine.query(
    "What are the company’s main revenue streams, and how have they performed?"
)

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Selecting query engine 1: This choice is useful for retrieving specific context using keywords from 10-K filings, which can provide detailed information on the company's revenue streams and performance..
> Starting query: What are the company’s main revenue streams, and how have they performed?
query keywords: ['company', 'main', 'performed', 'streams', 'revenue']
> Extracted keywords: ['company', 'main', 'performed', 'revenue']
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [255]:
from IPython.core.display import HTML, display

display(HTML(f'<p style="font-size:14px">{response.response}</p>'))

  from IPython.core.display import HTML, display


## Sub Question Query Engine

In [243]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core import set_global_service_context
from llama_index.core.response.pprint_utils import pprint_response
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine

In [244]:
Annual_10k_23_docs = SimpleDirectoryReader(input_files=["/Users/spoorthiramireddygari/Downloads/RAG systems using LlamaIndex/Module 6 - Advanced approaches for powerful RAG system/coca_cola_10k_pdfs/0000021344-23-000011.pdf"]).load_data()
Annual_10k_24_docs = SimpleDirectoryReader(input_files=["/Users/spoorthiramireddygari/Downloads/RAG systems using LlamaIndex/Module 6 - Advanced approaches for powerful RAG system/coca_cola_10k_pdfs/0000021344-24-000009.pdf"]).load_data()

In [245]:
print(f'Loaded cocacola 2023 10-K with {len(Annual_10k_23_docs)} pages')
print(f'Loaded cococola 2024 10-K with {len(Annual_10k_24_docs)} pages')

Loaded cocacola 2023 10-K with 183 pages
Loaded cococola 2024 10-K with 146 pages


In [246]:
A23_index = VectorStoreIndex.from_documents(Annual_10k_23_docs)
A24_index = VectorStoreIndex.from_documents(Annual_10k_24_docs)

HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [247]:
A23_engine = A23_index.as_query_engine(similarity_top_k=3)
A24_engine = A24_index.as_query_engine(similarity_top_k=3)

In [248]:
query_engine_tools = [
    QueryEngineTool(
        query_engine=A23_engine,
        metadata=ToolMetadata(name='Annual_10k_23', description='Provides information about context from 10-K filings of the Coca-Cola Company in year 2023 or For the fiscal year ended December 31, 2022 "')
    ),
    QueryEngineTool(
        query_engine=A24_engine,
        metadata=ToolMetadata(name='Annual_10k_24', description='Provides information about  context from 10-K filings of the Coca-Cola Company in year 2024 or For the fiscal year ended December 31, 2023')
    ),
]

s_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=query_engine_tools)

In [249]:
response = await s_engine.aquery('What were the key financial highlights for Coca-Cola in 2023 and 2024?')

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Generated 2 sub questions.
[1;3;38;2;237;90;200m[Annual_10k_23] Q: What were the key financial highlights for Coca-Cola in 2023?
[0m[1;3;38;2;90;149;237m[Annual_10k_24] Q: What were the key financial highlights for Coca-Cola in 2024?
[0mHTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;2;237;90;200m[Annual_10k_23] A: The key financial highlights for Coca-Cola in 2023 included net operating revenues of $43,004 million, gross profit of $25,004 million, operating income of $10,909 million, and consolidated net income of $9,571 million. Additionally, the basic net income per share was $2.20, and the diluted net income per share was $2.19.
[0mHTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[

In [250]:
print(response)

The key financial highlights for Coca-Cola in 2023 included net operating revenues of $43,004 million, gross profit of $25,004 million, operating income of $10,909 million, and consolidated net income of $9,571 million. The basic net income per share was $2.20, and the diluted net income per share was $2.19. In 2024, Coca-Cola's concentrate operations accounted for 58% of total net operating revenues, while finished product operations contributed 42%. Additionally, concentrate operations represented 83% of total worldwide unit case volume, with finished product operations making up the remaining 17%. The company focused on strategic priorities like brand portfolio enhancement, innovation, consumer-centric marketing, revenue growth management, and sustainability integration to drive future growth and value creation.


In [257]:
response = await s_engine.aquery(' what is difference in amount of net operating activities from 2023 to 2024')

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Generated 2 sub questions.
[1;3;38;2;237;90;200m[Annual_10k_23] Q: What is the net operating activities amount in 2023 from the Annual_10k_23 tool?
[0m[1;3;38;2;90;149;237m[Annual_10k_24] Q: What is the net operating activities amount in 2024 from the Annual_10k_24 tool?
[0mHTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;2;237;90;200m[Annual_10k_23] A: The net operating activities amount in 2023 from the Annual_10k_23 tool is not provided in the context information.
[0m[1;3;38;2;90;149;237m[Annual_10k_24] A: The net operating activities amount in 2024 from the Annual_10k_24 tool is not provided in the context information.
[0mHTTP Reques

In [258]:
print(response)

The difference in the amount of net operating activities from 2023 to 2024 is not provided in the context information.
