In [20]:
from dotenv import load_dotenv
import os
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.core import SimpleDirectoryReader
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex

In [21]:
load_dotenv()

True

In [22]:
api_key = os.environ["OPENAI_API_KEY"]

In [23]:
Settings.llm = OpenAI(model = "gpt-4o-mini")

In [24]:
def load_documents(directory):
    documents = SimpleDirectoryReader(directory).load_data()
    return documents

In [46]:
def create_index(db, documents, collection_name):
    chroma_collection = db.get_or_create_collection(collection_name)
    
    vector_store = ChromaVectorStore(chroma_collection = chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store = vector_store)
    
    print(documents)
    index = VectorStoreIndex.from_documents(
        documents, storage_context = storage_context, store_nodes_override=True
    )
    return index



In [47]:
docs = load_documents("./data")
print(docs)
db = chromadb.PersistentClient(path = "./db")
index = create_index(db, docs, "custom_files")


[Document(id_='d5a8cec8-b8ff-4fe1-b0cf-8e2a32587a2a', embedding=None, metadata={'page_label': '1', 'file_name': '2024q3-alphabet-earnings-release.pdf', 'file_path': '/Users/khalidrajan/Documents/Document Q & A App/data/2024q3-alphabet-earnings-release.pdf', 'file_type': 'application/pdf', 'file_size': 127666, 'creation_date': '2025-01-24', 'last_modified_date': '2024-12-10'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='Alphabet Announces Third Quarter 2024 Results\nMOUNTAIN VIEW, Calif. – October 29, 2024 – Alphabet Inc. (NASDAQ: GOOG, GOOGL) today announced financial \nresults for the quarter ended September 30, 2024.\n• Consolidated 

In [48]:
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x133816420>

In [49]:
query_engine = index.as_query_engine()
response = query_engine.query("What does this document contain?")
print(response)

This document contains the consolidated balance sheets of Alphabet Inc. as of December 31, 2023, and September 30, 2024. It details the company's assets, liabilities, and stockholders' equity, including current and non-current assets, current and long-term liabilities, and components of stockholders' equity such as preferred stock, common stock, accumulated other comprehensive income, and retained earnings.


In [50]:
query_engine = index.as_query_engine()
response = query_engine.query("What was Alphabet's revenue in Q3 2024?")
print(response)


Alphabet's revenue in Q3 2024 was $88.3 billion.


In [51]:
response = query_engine.query("How much did Alphabet's revenue increase yoy in Q3 2024?")
print(response)

Alphabet's revenue increased by 15% year over year in Q3 2024.


In [52]:
from llama_index.core.evaluation import FaithfulnessEvaluator
from llama_index.core.evaluation import RelevancyEvaluator

In [53]:
llm = OpenAI(model="gpt-4o-mini", temperature=0.0)

In [54]:
import nest_asyncio
nest_asyncio.apply()

faithfulness_evaluator = FaithfulnessEvaluator(llm = llm)
eval_result = faithfulness_evaluator.evaluate_response(response=response)
print(eval_result.score)
print(eval_result.passing)

nest_asyncio.apply()
relevancy_evaluator = RelevancyEvaluator(llm=llm)
eval_result = relevancy_evaluator.evaluate_response(query = "How much did Alphabet's revenue increase yoy in Q3 2024?", response=response)
print(eval_result.score)
print(eval_result.passing)



1.0
True
1.0
True


In [55]:
def evaluate_faithfulness(query, response) -> tuple[float, bool]:
    faithfulness_evaluator = FaithfulnessEvaluator(llm = llm)
    eval_result = faithfulness_evaluator.evaluate_response(query = query, response=response)
    return (eval_result.score, eval_result.passing)

evaluate_faithfulness("How much did Alphabet's revenue increase yoy in Q3 2024?", response)

(1.0, True)

In [56]:
def evaluate_relevancy(query, response) -> tuple[float, bool]:
    relevancy_evaluator = RelevancyEvaluator(llm=llm)
    eval_result = relevancy_evaluator.evaluate_response(query = query, response=response)
    return (eval_result.score, eval_result.passing)

evaluate_relevancy("How much did Alphabet's revenue increase yoy in Q3 2024?", response)

(1.0, True)

## Create a Hybrid Fusion Retriever

In [57]:
docs = load_documents("./data")
print(docs)
db = chromadb.PersistentClient(path = "./db")
index = create_index(db, docs, "custom_files")

[Document(id_='61f05534-80d1-4fe6-afac-7f07a4b35477', embedding=None, metadata={'page_label': '1', 'file_name': '2024q3-alphabet-earnings-release.pdf', 'file_path': '/Users/khalidrajan/Documents/Document Q & A App/data/2024q3-alphabet-earnings-release.pdf', 'file_type': 'application/pdf', 'file_size': 127666, 'creation_date': '2025-01-24', 'last_modified_date': '2024-12-10'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='Alphabet Announces Third Quarter 2024 Results\nMOUNTAIN VIEW, Calif. – October 29, 2024 – Alphabet Inc. (NASDAQ: GOOG, GOOGL) today announced financial \nresults for the quarter ended September 30, 2024.\n• Consolidated 

In [59]:
print(index.docstore.docs)

{'70c37d54-4b42-4a90-bf68-21eae68d2d34': TextNode(id_='70c37d54-4b42-4a90-bf68-21eae68d2d34', embedding=None, metadata={'page_label': '1', 'file_name': '2024q3-alphabet-earnings-release.pdf', 'file_path': '/Users/khalidrajan/Documents/Document Q & A App/data/2024q3-alphabet-earnings-release.pdf', 'file_type': 'application/pdf', 'file_size': 127666, 'creation_date': '2025-01-24', 'last_modified_date': '2024-12-10'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='61f05534-80d1-4fe6-afac-7f07a4b35477', node_type='4', metadata={'page_label': '1', 'file_name': '2024q3-alphabet-earnings-release.pdf', 'file_path': '/Users/khalidrajan/Documents/Document Q & A App/data/2024q3-alphabet-earnings-release.pdf', 'file_t

In [60]:
from llama_index.retrievers.bm25 import BM25Retriever

vector_retriever = index.as_retriever(similarity_top_k=2)
bm25_retriever = BM25Retriever.from_defaults(docstore = index.docstore, similarity_top_k=2)

In [61]:
from llama_index.core.retrievers import QueryFusionRetriever

retriever = QueryFusionRetriever(
    retrievers = [vector_retriever, bm25_retriever],
    similarity_top_k = 2,
    num_queries = 4,
    mode = "reciprocal_rerank",
    use_async = True,
    verbose = True
)

In [62]:
# apply nested async to run in a notebook
import nest_asyncio

nest_asyncio.apply()

In [63]:
nodes_with_scores = retriever.retrieve(
    "How much did Google Cloud contribute as a percentage to Alphabet's revenue in Q3 2024?  Did this represent an increase from the previous year?"
)

Generated queries:
- Google Cloud percentage of Alphabet revenue Q3 2024 compared to Q3 2023
- Alphabet revenue breakdown Q3 2024 Google Cloud contribution
- Year-over-year growth of Google Cloud revenue in Alphabet Q3 2024


In [64]:
for node in nodes_with_scores:
    print(node.node.text)
    print(node.score)
    print("-"*100)

Alphabet Announces Third Quarter 2024 Results
MOUNTAIN VIEW, Calif. – October 29, 2024 – Alphabet Inc. (NASDAQ: GOOG, GOOGL) today announced financial 
results for the quarter ended September 30, 2024.
• Consolidated Alphabet revenues in Q3 2024 increased 15%, or 16% in constant currency, year over year to 
$88.3 billion reflecting strong momentum across the business.
• Google Services revenues increased 13% to $76.5 billion, led by strength across Google Search & other, 
Google subscriptions, platforms, and devices, and YouTube ads.
• Google Cloud revenues increased 35% to $11.4 billion led by accelerated growth in Google Cloud Platform 
(GCP) across AI Infrastructure, Generative AI Solutions, and core GCP products.
• Total operating income increased 34% and operating margin percent expanded by 4.5 percentage points to 
32%.
• Net income increased 34% and EPS increased 37% to $2.12.
Sundar Pichai, CEO, said: “The momentum across the company is extraordinary. Our commitment to innovati

In [65]:
from llama_index.core.query_engine import RetrieverQueryEngine

query_engine = RetrieverQueryEngine.from_args(retriever)

In [70]:
response = query_engine.query(
    "How much did Google Cloud contribute as a percentage to Alphabet's revenue in Q3 2024?  Did this represent an increase from the previous year?"
)
print(response)

Generated queries:
- Google Cloud percentage contribution to Alphabet revenue Q3 2024
- Alphabet revenue breakdown Q3 2024 Google Cloud comparison to Q3 2023
- Year-over-year growth Google Cloud revenue percentage Alphabet Q3 2024
Google Cloud contributed approximately 12.9% to Alphabet's revenue in Q3 2024, with revenues of $11.4 billion out of total revenues of $88.3 billion. This represented an increase from the previous year, where Google Cloud's revenue was $8.4 billion, accounting for about 11% of total revenues of $76.7 billion.


In [67]:
def evaluate_faithfulness(query, response) -> tuple[float, bool]:
    faithfulness_evaluator = FaithfulnessEvaluator(llm = llm)
    eval_result = faithfulness_evaluator.evaluate_response(query = query, response=response)
    return (eval_result.score, eval_result.passing)

evaluate_faithfulness("How much did Google Cloud contribute as a percentage to Alphabet's revenue in Q3 2024?  Did this represent an increase from the previous year?", response)

(1.0, True)

In [68]:
def evaluate_relevancy(query, response) -> tuple[float, bool]:
    relevancy_evaluator = RelevancyEvaluator(llm=llm)
    eval_result = relevancy_evaluator.evaluate_response(query = query, response=response)
    return (eval_result.score, eval_result.passing)

evaluate_relevancy("How much did Google Cloud contribute as a percentage to Alphabet's revenue in Q3 2024?  Did this represent an increase from the previous year?", response)

(1.0, True)