In [4]:
!pip install langchain langchain-openai openai faiss-cpu python-dotenv rank_bm25
!pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.1-py3-none-any.whl.metadata (2.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.5.2-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.22.0-py3-none-any.whl.metadata (7.2 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)
Downloading langchain_community-0.3.1-py3-none-any.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━

In [5]:
import hashlib
import os
import getpass
from typing import List, Tuple
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from rank_bm25 import BM25Okapi
from langchain.schema import Document

In [6]:
from google.colab import userdata
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

In [24]:
class ContextualRetrieval:
  """
  Class implementing contextual retrieval
  """
  def __init__(self):
    self.text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=100,
    )
    self.embeddings = OpenAIEmbeddings()
    self.llm = ChatOpenAI(
        model="gpt-4o",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2
    )
  def process_document(self, document:str) -> Tuple[List[Document], List[Document]]:
    #processing a document by splitting it into chunks and generating context for each chunk
    chunks = self.text_splitter.create_documents([document])
    contextualized_chunks = self.generate_contextualized_chunks(document, chunks)
    return chunks, contextualized_chunks
  def generate_contextualized_chunks(self, document:str, chunks:List[Document]) -> List[Document]:
    #generating a contextualized version of the given chunks
    contextualized_chunks = []
    for chunk in chunks:
      context = self.generate_context(document, chunk.page_content)
      contextualized_content = f"{context}\n\n{chunk.page_content}"
      contextualized_chunks.append(Document(page_content=context, metadata=chunk.metadata))
    return contextualized_chunks

  def generate_context(self, document:str, chunk:str)-> str:
    ##generate context for a specific chunk using the model
    prompt = ChatPromptTemplate.from_template(
        """
        You are an AI assistant specializing in financial analysis, particularly for Apple, Inc. Your task is to provide brief, relevant context for a chunk of text from Apple's Q3 2023 financial report.
Here is the financial report:
<document>
{document}
</document>

Here is the chunk we want to situate within the whole document:
<chunk>
{chunk}
</chunk>

Provide a concise context (2-3 sentences) for this chunk, considering the following guidelines:
1. Identify the main financial topic or metric discussed (e.g., revenue, profitability, segment performance, market position).
2. Mention any relevant time periods or comparisons (e.g., Q3 2023, year-over-year changes).
3. If applicable, note how this information relates to Apple's overall financial health, strategy, or market position.
4. Include any key figures or percentages that provide important context.
5. Do not use phrases like "This chunk discusses" or "This section provides". Instead, directly state the context.

Please give a short succinct context to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct context and nothing else.

Context:


        """
    )
    messages = prompt.format_messages(document=document, chunk=chunk)
    response = self.llm.invoke(messages)
    return response.content

  def create_vectorstores(self, chunks:List[Document]) -> FAISS:
    return FAISS.from_documents(chunks, self.embeddings)
  def create_bm25_index(self, chunks:List[Document]) -> BM25Okapi:
    tokenized_chunks = [chunk.page_content.split() for chunk in chunks]
    return BM25Okapi(tokenized_chunks)

  def generate_answer(self, query: str, relevant_chunks: List[str]) -> str:
        prompt = ChatPromptTemplate.from_template("""
        Based on the following information, please provide a concise and accurate answer to the question.
        If the information is not sufficient to answer the question, say so.

        Question: {query}

        Relevant information:
        {chunks}

        Answer:
        """)
        messages = prompt.format_messages(query=query, chunks="\n\n".join(relevant_chunks))
        response = self.llm.invoke(messages)
        return response.content




In [25]:
document ="""
Apple Inc. FY24 Q3 Consolidated Financial Statements
Executive Summary:
Apple Inc. reports strong results for Q3 FY24, with continued growth in net sales and operational efficiency. The following sections provide insights into the financial performance, market position, and strategic outlook of the company.

1. Financial Performance Overview:
Net Sales:

Q3 2024: $85.78 billion, a 4.9% increase compared to $81.80 billion in Q3 2023.
Products: $61.56 billion (up from $60.58 billion in Q3 2023).
Services: $24.21 billion (up from $21.21 billion in Q3 2023).
Gross Margin:

$39.68 billion in Q3 2024, up from $36.41 billion in Q3 2023.
Operating Income:

$25.35 billion, compared to $22.99 billion in the previous year.
Net Income:

Q3 2024: $21.45 billion, compared to $19.88 billion in Q3 2023.
Earnings Per Share (EPS):

Basic: $1.40 (Q3 2024), up from $1.27 (Q3 2023).
Diluted: $1.40 (Q3 2024), up from $1.26 (Q3 2023).
2. Segment Analysis:
Net Sales by Reportable Segment:

Americas: $37.68 billion (up from $35.38 billion).
Europe: $21.88 billion (up from $20.21 billion).
Greater China: $14.73 billion (down from $15.76 billion).
Japan: $5.10 billion (up from $4.82 billion).
Rest of Asia Pacific: $6.39 billion (up from $5.63 billion).
Net Sales by Product Category:

iPhone: $39.30 billion, slightly down from $39.67 billion.
Mac: $7.01 billion, up from $6.84 billion.
iPad: $7.16 billion, up from $5.79 billion.
Wearables, Home, and Accessories: $8.10 billion, down from $8.28 billion.
Services: $24.21 billion, up from $21.21 billion.
3. Operating Expenses:
Research and Development (R&D):

$8.01 billion, compared to $7.44 billion in Q3 2023.
Selling, General, and Administrative (SG&A):

$6.32 billion, up from $5.97 billion in Q3 2023.
4. Balance Sheet Highlights:
Assets:

Current Assets: $125.44 billion, down from $143.57 billion in September 2023.
Non-current Assets: $206.18 billion, slightly down from $209.02 billion.
Liabilities:

Current Liabilities: $131.62 billion, down from $145.31 billion.
Non-current Liabilities: $133.28 billion, down from $145.13 billion.
Total Liabilities and Shareholders' Equity:

Total liabilities: $264.90 billion, compared to $290.44 billion.
Shareholders' equity: $66.71 billion, up from $62.15 billion.
5. Cash Flow Summary:
Operating Activities:

Net cash generated: $91.44 billion (up from $88.95 billion).
Investing Activities:

Net cash generated: $1.49 billion, slightly up from $1.31 billion in Q3 2023.
Financing Activities:

Cash used: $97.04 billion (up from $85.34 billion).
6. Debt and Capital Structure:
Total Debt: $98.31 billion (down from $105.10 billion in September 2023).
Debt-to-Equity Ratio: Decreased slightly as debt reduced and shareholders' equity increased.
7. Key Metrics:
Net Income Growth: Up by 7.9% compared to Q3 2023.
Service Revenue Growth: Up by 14.1% YoY, indicating strong demand for Apple's service ecosystem.
EPS Growth: Strong performance reflected in both basic and diluted EPS improvements.
Conclusion:
Apple’s Q3 2024 results show continued resilience and growth, driven by its diverse product portfolio and expanding services sector. However, challenges like declining sales in Greater China could pose risks. Nonetheless, Apple's strong financial health, efficient cost management, and shareholder returns solidify its market leadership.





"""



In [26]:
context_retrieval = ContextualRetrieval()

In [27]:
context_retrieval

<__main__.ContextualRetrieval at 0x78c87b0210c0>

In [28]:
og_chunks, contextualized_chunks = context_retrieval.process_document(document)

In [29]:
print(og_chunks[0])

page_content='Apple Inc. FY24 Q3 Consolidated Financial Statements
Executive Summary:
Apple Inc. reports strong results for Q3 FY24, with continued growth in net sales and operational efficiency. The following sections provide insights into the financial performance, market position, and strategic outlook of the company.

1. Financial Performance Overview:
Net Sales:

Q3 2024: $85.78 billion, a 4.9% increase compared to $81.80 billion in Q3 2023.
Products: $61.56 billion (up from $60.58 billion in Q3 2023).
Services: $24.21 billion (up from $21.21 billion in Q3 2023).
Gross Margin:

$39.68 billion in Q3 2024, up from $36.41 billion in Q3 2023.
Operating Income:

$25.35 billion, compared to $22.99 billion in the previous year.
Net Income:'


In [30]:
print(contextualized_chunks[0])

page_content='Context:
The chunk focuses on Apple's financial performance in Q3 FY24, highlighting a 4.9% year-over-year increase in net sales to $85.78 billion, with products and services contributing $61.56 billion and $24.21 billion, respectively. It also notes improvements in gross margin ($39.68 billion) and operating income ($25.35 billion), reflecting Apple's strong financial health and operational efficiency.'


In [31]:

original_vectorstore = context_retrieval.create_vectorstores(og_chunks)
contextualized_vectorstore = context_retrieval.create_vectorstores(contextualized_chunks)

In [32]:
original_bm25_index = context_retrieval.create_bm25_index(og_chunks)
contextualized_bm25_index = context_retrieval.create_bm25_index(contextualized_chunks)

In [41]:
query = "What was Apple's total revenue in Q3 2024 what was the gross profit and cash position?"
print(f"Query: {query}")
original_vector_results = original_vectorstore.similarity_search(query, k=3)
contextualized_vector_results = contextualized_vectorstore.similarity_search(query, k=3)


##so now we have query split for a normal keyword search
og_tokenized_query = query.split()

#querying the bm25 results using the og method w/o contextualized results
og_bm25_results = original_bm25_index.get_top_n(og_tokenized_query, og_chunks,n=3)

#splitting for the normal keyword search using contextualized query
contextualized_tokenized_query = query.split()
##querying contextualized query
contextualized_bm25_results = original_bm25_index.get_top_n(contextualized_tokenized_query, contextualized_chunks,n=3)





Query: What was Apple's total revenue in Q3 2024 what was the gross profit and cash position?


In [43]:
og_bm25_results

[Document(metadata={}, page_content="EPS Growth: Strong performance reflected in both basic and diluted EPS improvements.\nConclusion:\nApple’s Q3 2024 results show continued resilience and growth, driven by its diverse product portfolio and expanding services sector. However, challenges like declining sales in Greater China could pose risks. Nonetheless, Apple's strong financial health, efficient cost management, and shareholder returns solidify its market leadership."),
 Document(metadata={}, page_content="Current Liabilities: $131.62 billion, down from $145.31 billion.\nNon-current Liabilities: $133.28 billion, down from $145.13 billion.\nTotal Liabilities and Shareholders' Equity:\n\nTotal liabilities: $264.90 billion, compared to $290.44 billion.\nShareholders' equity: $66.71 billion, up from $62.15 billion.\n5. Cash Flow Summary:\nOperating Activities:\n\nNet cash generated: $91.44 billion (up from $88.95 billion).\nInvesting Activities:\n\nNet cash generated: $1.49 billion, slig

In [44]:
contextualized_bm25_results

[Document(metadata={}, page_content="EPS growth highlights Apple's strong financial performance in Q3 2024, with basic and diluted EPS increasing to $1.40 from $1.27 and $1.26 respectively in Q3 2023. Despite challenges like declining sales in Greater China, Apple's diverse product portfolio, expanding services sector, and efficient cost management underscore its market leadership and financial resilience."),
 Document(metadata={}, page_content="Current and non-current liabilities for Q3 2024 decreased to $131.62 billion and $133.28 billion, respectively, from $145.31 billion and $145.13 billion in Q3 2023, reflecting improved financial stability. Total liabilities dropped to $264.90 billion, while shareholders' equity rose to $66.71 billion, indicating a stronger balance sheet. This is part of Apple's broader financial health, supported by robust cash flow from operating activities, which increased to $91.44 billion."),
 Document(metadata={}, page_content="Context:\nThe chunk focuses 

In [45]:
og_bm25_answer = context_retrieval.generate_answer(query, [doc.page_content for doc in og_bm25_results])
contextualized_bm25_answer = context_retrieval.generate_answer(query, [doc.page_content for doc in contextualized_bm25_results])

In [46]:
og_bm25_answer

"Apple's total revenue in Q3 2024 was $85.78 billion. The gross profit was $39.68 billion. The cash position, based on net cash generated from operating activities, was $91.44 billion."

In [48]:
contextualized_bm25_answer

"The information provided does not include Apple's total revenue, gross profit, or cash position for Q3 2024. Therefore, it is not sufficient to answer the question."