In [1]:
from google import genai
import json

client = genai.Client()

In [6]:
json_file_path = 'batch_input_output_files/big_dataset_1.json'

print(f"Uploading JSONL file: {json_file_path}")
batch_input_file = client.files.upload(
    file=json_file_path
    )
print(f"Uploaded JSONL file: {batch_input_file.name}")

print("\nCreating batch job...")
batch_job_from_file = client.batches.create(
    model="gemini-2.5-flash",
    src=batch_input_file.name,
    config={
        'display_name': 'big_dataset_1.json',
    }
)
print(f"Created batch job from file: {batch_job_from_file.name}")
print("You can now monitor the job status using its name.")

Uploading JSONL file: batch_input_output_files/big_dataset_1.json
Uploaded JSONL file: files/2peg4v3ssu75

Creating batch job...
Created batch job from file: batches/sy6qfpgtt4a9myxdal4pzscjwd4srs155156
You can now monitor the job status using its name.


In [5]:
completed_states = set([
    'JOB_STATE_SUCCEEDED',
    'JOB_STATE_FAILED',
    'JOB_STATE_CANCELLED',
    'JOB_STATE_EXPIRED',
])

print("Listing recent batch jobs:\n")

# Note: The list API currently doesn't return inlined_responses.
# As a workaround,you can make a `get` call for inline jobs to see their results.
batches = client.batches.list(config={'page_size': 10})

for b in batches.page:
    print(f"Job Name: {b.name}")
    print(f"  - Display Name: {b.display_name}")
    print(f"  - State: {b.state.name}")
    print(f"  - Create Time: {b.create_time.strftime('%Y-%m-%d %H:%M:%S')}")
    if b.state.name in completed_states:
        print(f"  - End Time: {b.end_time.strftime('%Y-%m-%d %H:%M:%S')}")


    # Check if it was an inline job (no destination file)
    if b.dest is not None:
      if not b.dest.file_name:
        full_job = client.batches.get(name=b.name)
        if full_job.inlined_responses:
            print("  - Type: Inline ({} responses)".format(len(full_job.inlined_responses)))
      else:
          print(f"  - Type: File-based (Output: {b.dest.file_name})")

    print("-" * 20)

Listing recent batch jobs:

Job Name: batches/ogretrlgrqbvhrtl8gtwry5vuahi7nqey9m8
  - Display Name: big_dataset_3.json
  - State: JOB_STATE_SUCCEEDED
  - Create Time: 2025-10-20 23:14:26
  - End Time: 2025-10-20 23:31:30
  - Type: File-based (Output: files/batch-ogretrlgrqbvhrtl8gtwry5vuahi7nqey9m8)
--------------------
Job Name: batches/5tlblj8ilphbwiew7h9dngcsd103go74uwmj
  - Display Name: big_dataset_2.json
  - State: JOB_STATE_SUCCEEDED
  - Create Time: 2025-10-20 20:21:49
  - End Time: 2025-10-20 20:44:11
  - Type: File-based (Output: files/batch-5tlblj8ilphbwiew7h9dngcsd103go74uwmj)
--------------------


In [3]:
batch_job = client.batches.get(name="batches/5tlblj8ilphbwiew7h9dngcsd103go74uwmj")

if batch_job.state.name == 'JOB_STATE_SUCCEEDED':
    # The output is in another file.
    #result_file_name = "files/batch-qv5ik4aou0ah06h3nx09obewd5y6udcf2fb6"
    result_file_name = batch_job.dest.file_name

    print(f"Results are in file: {result_file_name}")

    print("\nDownloading and parsing result file content...")
    file_content_bytes = client.files.download(file=result_file_name)
    file_content = file_content_bytes.decode('utf-8')

    pretty_file_content = ""

    #The result file is also a JSONL file. Parse and print each line.
    for line in file_content.splitlines():
      if line:
        parsed_response = json.loads(line)
        response_text = parsed_response['response']['candidates'][0]['content']['parts'][0]['text']
        print(response_text)
        print(json.dumps(response_text, indent=4))


    #   pretty_file_content = pretty_file_content + json.dumps(parsed_response, indent=2) + "\n"
        #Pretty-print the JSON for readability
    #    print(json.dumps(parsed_response, indent=2))
    #    print("-" * 20)

    #json_file_path = 'big_dataset_1_results.json'
    #print(f"\nCreating JSONL file: {json_file_path}")
    #with open(json_file_path, 'w') as f:
    #  f.write(file_content + '\n')

else:
    print(f"Job did not succeed. Final state: {batch_job.state.name}")

Results are in file: files/batch-5tlblj8ilphbwiew7h9dngcsd103go74uwmj

Downloading and parsing result file content...
**My Assessment of AnhuiConchCement's Supplier Screening Process in 2022**

Okay, so the user wants to know if AnhuiConchCement screens its suppliers using environmental criteria for 2022.  I need to find this specific information in the annual report. I started by looking for key phrases like "supplier screening," "environmental criteria," "sustainable practices," and "supplier standards." I figured the "Supply Chain Management," "Environmental Management," and "ESG Governance" sections would be the most relevant.

I skimmed the report, and on page 3, I saw that section 5.4, "Supply Chain Management," seemed like a promising area to focus on.  I also looked at the Chairman's statement and the Stakeholder Engagement section, but those didn't directly address the specific question about *screening* with *environmental criteria*. They mentioned quality control and supplie

KeyboardInterrupt: 

In [5]:
client.batches.cancel(name="batches/7k4p0kdc4vpuuw842zi3c7rp79wy7orzb71l")


In [7]:
file_content_bytes = client.files.download(file="files/qw9nltlrozph")
file_content = file_content_bytes.decode('utf-8')

pretty_file_content = ""

file_path = 'somefking.pdf'
print(f"\nCreating pdf file: {file_path}")
with open(file_path, 'w') as f:
  f.write(file_content + '\n')

ClientError: 400 INVALID_ARGUMENT. {'error': {'code': 400, 'message': 'Only GENERATED files can be downloaded.', 'status': 'INVALID_ARGUMENT'}}