In [1]:
from Gemini import generatePromptsDictionary
import io
from Fullcontext_main import retrieveCompanyYearReports
from google import genai
client = genai.Client()

companyYearReports = retrieveCompanyYearReports("Airlines", "QuantasAirways", "2024")

for doc in companyYearReports:
    print(f"Now prompting document: {doc.company_name} {doc.period} {doc.topic} {doc.mimetype} {doc.counter} ")

    doc_io = io.BytesIO(doc.file_value)
    uploaded_doc =  client.files.upload(
        file=doc_io,
        config=dict(
            mime_type=doc.mimetype)
    )
    print(f"Uploaded doc: {uploaded_doc.name} with MIME type: {uploaded_doc.mime_type}")

    prompts = generatePromptsDictionary(doc)


Download 100 of 2024.pdf application/pdf
Download 100 of 2024.pdf application/pdf
Now prompting document: QuantasAirways 2024 Topic.ESG application/pdf 1 
Uploaded doc: files/kj51bhabvcw6 with MIME type: application/pdf
Now prompting document: QuantasAirways 2024 Topic.FINANCIAL application/pdf 1 
Uploaded doc: files/a2ewqgthk9i4 with MIME type: application/pdf


In [4]:
from Gemini import IndicatorExtraction

inline_requests = []

for indicatorID in prompts:
    inline_request = {
        "contents": [{
            "parts": [
                {"text": prompts[indicatorID]},
                {"file_data": {"file_uri": uploaded_doc.uri, "mime_type": uploaded_doc.mime_type}}
            ]
        }],
        "config": {
            "thinking_config": {
                  "include_thoughts": True,
            },
            "response_mime_type": "application/json",
            "response_schema": IndicatorExtraction
        }
    }
    inline_requests.append(inline_request)

print(inline_requests)





[{'contents': [{'parts': [{'text': '"Extract the following metric from the provided document:\n      -Money (OpEx, CapEx) used for environmental purposes (EU Taxonomy aligned CapEx and OpEx starting in 2024) of the reporting company QuantasAirways for the year 2024.\n      \n      Metric-specific instructions:\n      OpEx, CapEx used for environmental purposes (Taxonomy aligned). Specifically NOT money invested in carbon credits and offsets, those are already covered in the carbon_credits_offsets indicator. This value may also be specified as total expenses for environmental purposes.\n      From the year 2024 on, you can expect report to include figures of invested CapEx and OpEx that are aligned with climate friendly target according to the EU taxonomy. Prior to this year, general figures for investment in environment causes are sufficient. You should look for tables describing the Operational Expenditure and the Capital Expanditure in the requested period. Find the taxonomy-eligeble

In [5]:
inline_batch_job = client.batches.create(
    model="models/gemini-2.5-flash",
    src=inline_requests,
    config={
        'display_name': "quantas-airlines-inline-batch-test1"
    },
)

In [6]:
# Use the name of the job you want to check
# e.g., inline_batch_job.name from the previous step
job_name0 = "batches/68ohlbuc7numz7ketcvkwizg4dh8povs8eoa"  # (e.g. 'batches/your-batch-id')
job_name = "batches/zi9hlczk1uwzljrevytef2qwqw41djv4wfnq"  # (e.g. 'batches/your-batch-id')
batch_job_from_file = client.batches.get(name=job_name)

import time

completed_states = set([
    'JOB_STATE_SUCCEEDED',
    'JOB_STATE_FAILED',
    'JOB_STATE_CANCELLED',
    'JOB_STATE_EXPIRED',
])

while batch_job_from_file.state.name not in completed_states:
  print(f"Current state: {batch_job_from_file.state.name}")
  time.sleep(30) # Wait for 30 seconds before polling again

print(f"Job finished with state: {batch_job_from_file.state.name}")
if batch_job_from_file.state.name == 'JOB_STATE_FAILED':
    print(f"Error: {batch_job_from_file.error}")

Job finished with state: JOB_STATE_SUCCEEDED


In [8]:
completed_states = set([
    'JOB_STATE_SUCCEEDED',
    'JOB_STATE_FAILED',
    'JOB_STATE_CANCELLED',
    'JOB_STATE_EXPIRED',
])

print("Listing recent batch jobs:\n")

# Note: The list API currently doesn't return inlined_responses.
# As a workaround,you can make a `get` call for inline jobs to see their results.
batches = client.batches.list(config={'page_size': 10})

for b in batches.page:
    print(f"Job Name: {b.name}")
    print(f"  - Display Name: {b.display_name}")
    print(f"  - State: {b.state.name}")
    print(f"  - Create Time: {b.create_time.strftime('%Y-%m-%d %H:%M:%S')}")
    if b.state.name in completed_states:
        print(f"  - End Time: {b.end_time.strftime('%Y-%m-%d %H:%M:%S')}")




    # Check if it was an inline job (no destination file)
    if b.dest is not None:
      if not b.dest.file_name:
        full_job = client.batches.get(name=b.name)
        if full_job.inlined_responses:
            print("  - Type: Inline ({} responses)".format(len(full_job.inlined_responses)))
      else:
          print(f"  - Type: File-based (Output: {b.dest.file_name})")

    print("-" * 20)

Listing recent batch jobs:

Job Name: batches/5dfzbzpknv9dkwjdy3sx1w9g21u409g92aoj
  - Display Name: my-batch-job-with-file-and-thoughts-thinking_config
  - State: JOB_STATE_SUCCEEDED
  - Create Time: 2025-09-12 16:59:15
  - End Time: 2025-09-12 17:00:40
  - Type: File-based (Output: files/batch-5dfzbzpknv9dkwjdy3sx1w9g21u409g92aoj)
--------------------
Job Name: batches/nm1ycsfts46b6vh2090bh4xyo9plyvs0dr06
  - Display Name: quantas-airlines-inline-batch-test1
  - State: JOB_STATE_SUCCEEDED
  - Create Time: 2025-09-12 16:51:34
  - End Time: 2025-09-12 17:00:44
--------------------
Job Name: batches/ff0deax8tnaay5rr0gqkn6g22pl3pd4orcrk
  - Display Name: extraction_attempt_3
  - State: JOB_STATE_SUCCEEDED
  - Create Time: 2025-09-11 22:12:59
  - End Time: 2025-09-12 10:09:13
  - Type: File-based (Output: files/batch-ff0deax8tnaay5rr0gqkn6g22pl3pd4orcrk)
--------------------
Job Name: batches/vnb8p71ws0ug2mgdkb6vmo11xlxspy40okyd
  - Display Name: my-batch-job-with-file-and-thoughts-thinki

In [11]:
batch_job_inline = client.batches.get(name="batches/nm1ycsfts46b6vh2090bh4xyo9plyvs0dr06")

# print the response
for i, inline_response in enumerate(batch_job_inline.dest.inlined_responses, start=1):
    print(f"\n--- Response {i} ---")

    # Check for a successful response
    if inline_response.response:
        # The .text property is a shortcut to the generated text.
        #print(inline_response.response.text)
        print(inline_response.response)


--- Response 1 ---
sdk_http_response=None candidates=[Candidate(
  citation_metadata=CitationMetadata(
    citations=[
      Citation(
        end_index=2994,
        license='',
        start_index=2625,
        uri='https://www.listcorp.com/asx/qan/qantas-airways-limited/news/qantas-group-fy22-appendix-4e-and-preliminary-final-report-2752657.html'
      ),
    ]
  ),
  content=Content(
    parts=[
      Part(
        text="""**Assessing Environmental Expenditure in the Qantas Annual Report for 2024**

Okay, I'm tasked with pinpointing the specific amount of Operational Expenditure (OpEx) and Capital Expenditure (CapEx) Qantas allocated to environmental purposes in 2024, focusing on EU Taxonomy alignment. This is a bit tricky, and I need to be precise.  The goal is clear: identify *only* money spent on environmental initiatives that align with the EU Taxonomy, explicitly excluding any investment in carbon credits and offsets.

My initial search focused on terms like "environmental pu