In [None]:
import os
from dotenv import load_dotenv

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# MODEL = "gpt-5-nano"
MODEL = "mistral:latest"
# MODEL = "gpt-oss:20b"


In [2]:
from langchain_openai import ChatOpenAI
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.embeddings import OpenAIEmbeddings

if MODEL.startswith("gpt"):
    model = ChatOpenAI(api_key=OPENAI_API_KEY, model=MODEL)
    embeddings = OpenAIEmbeddings()
else:
    model = Ollama(model = MODEL)
    embeddings = OllamaEmbeddings(model="nomic-embed-text")


model.invoke("tell me something the richest man of the world")

  embeddings = OpenAIEmbeddings()


AIMessage(content="The title “richest person in the world” isn’t fixed—it's about who has the largest net worth at any given moment, and it shifts with stock prices and company fortunes. In recent years the top spot has alternated mainly between:\n\n- Elon Musk (Tesla, SpaceX)\n- Bernard Arnault (LVMH)\n- Jeff Bezos (Amazon)\n\nQuick facts:\n- Elon Musk’s wealth is largely tied to Tesla stock and SpaceX.\n- Bernard Arnault’s wealth comes from LVMH and its luxury brands (Louis Vuitton, Dior, Sephora, etc.).\n- Jeff Bezos built most of his wealth from Amazon stock and also runs other ventures like Blue Origin.\n\nWould you like me to share the very latest ranking (If you want, I can summarize the current publicly reported leader and give a quick bio)?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 1647, 'prompt_tokens': 15, 'total_tokens': 1662, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tok

In [3]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()
chain = model | parser
chain.invoke("who is the richest woman in the world")

'Francoise Bettencourt Meyers, the heiress to L’Oréal, is widely considered the richest woman in the world. Her net worth is typically reported in the high tens of billions to around $80–90 billion, though the exact figure and ranking can vary with stock prices and the source.\n\nOther near-top contenders (and their ranks can shift) include Julia Koch, Yang Huiyan, and MacKenzie Scott.\n\nIf you want the exact current ranking, I can point you to the latest Forbes real-time list or Bloomberg Billionaires Index.'

In [4]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, 
    chunk_overlap=200
)

loader = PyPDFLoader("ADNOC_AnnualReport.pdf")
pages = loader.load_and_split(text_splitter)
pages

[Document(metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.1 (Macintosh)', 'creationdate': '2025-04-03T12:06:44+04:00', 'moddate': '2025-04-03T12:08:16+04:00', 'trapped': '/False', 'source': 'ADNOC_AnnualReport.pdf', 'total_pages': 141, 'page': 0, 'page_label': '1'}, page_content='DRIVING GROWTH, \nPOWERED BY \nINNOV ATION\n@adnocdistribution\nAnnual Report 2024'),
 Document(metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.1 (Macintosh)', 'creationdate': '2025-04-03T12:06:44+04:00', 'moddate': '2025-04-03T12:08:16+04:00', 'trapped': '/False', 'source': 'ADNOC_AnnualReport.pdf', 'total_pages': 141, 'page': 1, 'page_label': '2'}, page_content='As part of this transformation, we are expanding beyond \ntraditional fuel retail  by doubling down on non-fuel \nbusiness, investing in EV charging infrastructure, and \nstrengthening our international operations. We are \nprioritizing innovation and enhancing customer experience \nin line 

In [5]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't
answer the question, reply "I don't know"

Context = {context}

Question = {question}

"""

prompt = PromptTemplate.from_template(template)
prompt.format(context="here is the context", question="here is some question")


'\nAnswer the question based on the context below. If you can\'t\nanswer the question, reply "I don\'t know"\n\nContext = here is the context\n\nQuestion = here is some question\n\n'

In [6]:
chain = prompt | model | parser

In [7]:
chain.input_schema.model_json_schema()

{'properties': {'context': {'title': 'Context', 'type': 'string'},
  'question': {'title': 'Question', 'type': 'string'}},
 'required': ['context', 'question'],
 'title': 'PromptInput',
 'type': 'object'}

In [8]:
chain.invoke(
    {
        "context": """Our system gathers and analyzes data from multiple sources, including various sensors,
such as perimeter protection, video surveillance cameras and video analytics, license plate
readers (LPRs) and access control; databases; and open data sources, including the web
and social networks.
The collected data is then fed into our advanced analytics and state-of-the-art command
and control center to analyze and automatically identify abnormal events or behaviors as
defined and configured by system users. It creates a comprehensive unified situational
awareness picture (USAP) on geographic information system (GIS) maps, provides real-time
actionable intelligence, triggers alerts, recommends actions and guides the organization
through managing the event from proactive detection to closure.
The bottom line: you are able to better detect, prepare for and manage security breaches
and risks; ensure proper handling of liquefied natural gas and hazardous materials; and
transport cargo safely across long-distance, cross-country routes.""",
        "question": "give me the short summary"
    
    }
)

'An integrated system that collects data from sensors (perimeter, cameras, LPRs, access control), databases, and open sources, analyzes it to automatically detect abnormal events, and presents a unified situational awareness picture on GIS maps. It provides real-time actionable intelligence, triggers alerts, recommends actions, and guides incident management from detection to closure, with a focus on security, hazardous materials handling, and safe long-distance cargo transport.'

In [9]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore = DocArrayInMemorySearch.from_documents(
    pages,
    embedding = embeddings
)



In [10]:
retriever= vectorstore.as_retriever()

retriever.invoke("ADNOC Profit")

[Document(metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.1 (Macintosh)', 'creationdate': '2025-04-03T12:06:44+04:00', 'moddate': '2025-04-03T12:08:16+04:00', 'trapped': '/False', 'source': 'ADNOC_AnnualReport.pdf', 'total_pages': 141, 'page': 50, 'page_label': '51'}, page_content='Revenue – non-fuel 1,574 1,403 12.2%\nGross Profit 4,704 4,499 4.6%\n  Gross profit – fuel 3,844 3,735 3.0%\n  Gross profit – non-fuel 860 764 12.5%\nEBITDA 2,728 2,646 3.1%\nOperating profit 2,046 2,033 0.5%\nCapital expenditure 849 864 -1.8%\n51ADNOC Distribution Annual Report 2024'),
 Document(metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.1 (Macintosh)', 'creationdate': '2025-04-03T12:06:44+04:00', 'moddate': '2025-04-03T12:08:16+04:00', 'trapped': '/False', 'source': 'ADNOC_AnnualReport.pdf', 'total_pages': 141, 'page': 135, 'page_label': '136'}, page_content='Impairment losses and other operating expenses (25,910) (17,764) (38,469) (82,143)\n

In [11]:
from operator import itemgetter 


chain =( 
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question")
    }
    
    | prompt 
    | model 
    | parser 
)

chain.invoke({"question":"what are key findings of the document"})



'- Key audit matter: Impairment of goodwill and other intangible assets, driven by the use of complex and subjective management estimates based on judgement of key variables and market conditions (refer to Note 3 and Note 6 for policy/disclosures).\n- How the audit addressed this matter: Evaluated management’s impairment process and controls, tested management’s future cash flow forecasts and the mathematical accuracy of value-in-use calculations, involved valuations specialists to assess methodology and reasonableness of key assumptions, and discussed these with those charged with governance.\n- Key finding: Intangible assets including goodwill were not impaired as of 31 December 2024.'

In [None]:
questions = [
    "What is the purpose of this document?",
    "How many subsidiaries the group company has?",
    "What is the profit ADNOC got in 2024?",
    "Which are the most profitable segments of the ADNOC?",
    "What is distribution plan of ADNOC for 2025?",
    ]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question':question})}")

Question: What is the purpose of this document?
Answer: To explain and establish ADNOC’s corporate governance framework and policies, ensuring accountability, equity, transparency and disclosure, and timely, reliable public reporting in compliance with UAE SCA rules.
Question: How many subsidiaries the group company has?
Answer: 2
Question: What is the profit ADNOC got in 2024?
Answer: AED 3.86 billion (EBITDA) in 2024.
Question: Which are the most profitable segments of the ADNOC?
Answer: I don't know
Question: What is distributionplan of ADNOC for 2025?
Answer: An attractive 2024-28 dividend policy supported by a visible cashflow profile.


In [13]:
for s in chain.stream({"question": "Which are the most profitable segments of the ADNOC?"}):
    print(s,end="",flush=True)

I don't know

In [14]:
chain.batch([{"question":q} for q in questions])

['To report on ADNOC’s governance, internal controls, accounting policies, and regulatory compliance (i.e., an annual report that provides transparency to shareholders and ensures compliance with UAE regulations, including the SCA Corporate Governance Rules).',
 'Two subsidiaries (ADNOC Distribution Global Company LLC and Total Energies Marketing Egypt LLC).',
 'AED 3.86 billion (EBITDA) in 2024.',
 "I don't know",
 "I don't know"]