In [1]:
from pyspark.sql import SparkSession
import pyspark
import os   
import sys

os.environ['PYSPARK_PYTHON'] = sys.executable
os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable
os.environ['JAVA_HOME'] = "/usr/lib/jvm/java-17-openjdk-amd64"

print("Python executable being used:", sys.executable)
print("java home:", os.environ.get('JAVA_HOME'))

packages=",".join(["org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.9.0",
"org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.1",
"com.amazonaws:aws-java-sdk-bundle:1.12.262",
"org.apache.hadoop:hadoop-aws:3.3.4"])


spark = (SparkSession.builder
    .appName("KafkaSparkLocal")
    .master("local[4]")
    .config("spark.jars.packages", packages) 
    
    # S3A/MinIO connection settings
    .config("spark.hadoop.fs.s3a.endpoint", "http://minio-api.192.168.49.2.nip.io")
    .config("spark.hadoop.fs.s3a.access.key", "minio")
    .config("spark.hadoop.fs.s3a.secret.key", "minio123")
    .config("spark.hadoop.fs.s3a.path.style.access", "true")
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
    .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "false")
    .config("spark.hadoop.fs.s3a.aws.credentials.provider", 
            "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider")
    
    # Multipart upload settings (simple for small files)
    .config("spark.hadoop.fs.s3a.fast.upload", "true")
    .config("spark.hadoop.fs.s3a.multipart.size", "52428800")  # 50MB (won't be reached)
    .config("spark.hadoop.fs.s3a.multipart.threshold", "52428800")  # Start multipart at 50MB
    
    # Iceberg catalog configuration
    .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions")
    .config("spark.sql.catalog.my_catalog", "org.apache.iceberg.spark.SparkCatalog")
    .config("spark.sql.catalog.my_catalog.type", "hadoop")
    .config("spark.sql.catalog.my_catalog.warehouse", "s3a://test2/mywarehouse")
    .config("spark.sql.iceberg.vectorization.enabled", "false")
    
    .getOrCreate()
)

Python executable being used: /home/kumararpita/alpaca_stream_ingestion/.venv/bin/python
java home: /usr/lib/jvm/java-17-openjdk-amd64


25/11/13 11:47:21 WARN Utils: Your hostname, kumararpita-OMEN-Laptop-15-en0xxx resolves to a loopback address: 127.0.1.1; using 192.168.1.22 instead (on interface wlo1)
25/11/13 11:47:21 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address


:: loading settings :: url = jar:file:/home/kumararpita/alpaca_stream_ingestion/.venv/lib/python3.10/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /home/kumararpita/.ivy2/cache
The jars for the packages stored in: /home/kumararpita/.ivy2/jars
org.apache.iceberg#iceberg-spark-runtime-3.5_2.12 added as a dependency
org.apache.spark#spark-sql-kafka-0-10_2.12 added as a dependency
com.amazonaws#aws-java-sdk-bundle added as a dependency
org.apache.hadoop#hadoop-aws added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-4b84de9e-388c-45dc-b259-cd4ecd273acc;1.0
	confs: [default]
	found org.apache.iceberg#iceberg-spark-runtime-3.5_2.12;1.9.0 in central
	found org.apache.spark#spark-sql-kafka-0-10_2.12;3.5.1 in central
	found org.apache.spark#spark-token-provider-kafka-0-10_2.12;3.5.1 in central
	found org.apache.kafka#kafka-clients;3.4.1 in central
	found org.lz4#lz4-java;1.8.0 in central
	found org.xerial.snappy#snappy-java;1.1.10.3 in central
	found org.slf4j#slf4j-api;2.0.7 in central
	found org.apache.hadoop#hadoop-client-runtime;3.3.4 in central
	found org.apache.hadoop#hado

In [3]:
spark.sql("show tables in my_catalog.iex_db").show(truncate=False)

+---------+------------------------------+-----------+
|namespace|tableName                     |isTemporary|
+---------+------------------------------+-----------+
|iex_db   |raw_stream_iex_simulate_1     |false      |
|iex_db   |raw_stream_iex_simulate       |false      |
|iex_db   |raw_stream_iex_2              |false      |
|iex_db   |raw_iex_bars_iceberg_history  |false      |
|iex_db   |raw_iex_bars_iceberg_history_1|false      |
+---------+------------------------------+-----------+



In [2]:
spark.sql("select count(*) from my_catalog.iex_db.raw_stream_iex_simulate_1").show()

25/11/13 11:47:27 WARN MetricsConfig: Cannot locate configuration: tried hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties


+--------+
|count(1)|
+--------+
|   35213|
+--------+



In [8]:
spark.sql("select * from my_catalog.iex_db.raw_stream_iex_simulate_1.history order by made_current_at desc").show(truncate=False)

+-----------------------+-------------------+-------------------+-------------------+
|made_current_at        |snapshot_id        |parent_id          |is_current_ancestor|
+-----------------------+-------------------+-------------------+-------------------+
|2025-11-12 21:20:01.435|8036801604445774392|3226599583107567081|true               |
|2025-11-12 21:19:01.387|3226599583107567081|5857842333919397334|true               |
|2025-11-12 21:18:01.346|5857842333919397334|1421835160532225960|true               |
|2025-11-12 21:17:01.231|1421835160532225960|3742565245462726011|true               |
|2025-11-12 21:16:01.231|3742565245462726011|270633736931421162 |true               |
|2025-11-12 21:15:01.253|270633736931421162 |184092711543636498 |true               |
|2025-11-12 21:14:01.244|184092711543636498 |1420931409252013295|true               |
|2025-11-12 21:13:01.255|1420931409252013295|5582774671696242896|true               |
|2025-11-12 21:12:01.286|5582774671696242896|284068578

In [7]:
spark.sql("select * from my_catalog.iex_db.raw_stream_iex_simulate_1.history order by made_current_at desc").show(100,truncate=False)

+-----------------------+-------------------+-------------------+-------------------+
|made_current_at        |snapshot_id        |parent_id          |is_current_ancestor|
+-----------------------+-------------------+-------------------+-------------------+
|2025-11-12 21:07:01.434|1305777011589596320|6984047973173646351|true               |
|2025-11-12 21:06:01.48 |6984047973173646351|3896755042063846122|true               |
|2025-11-12 21:05:01.413|3896755042063846122|3667594001235061581|true               |
|2025-11-12 21:04:01.311|3667594001235061581|8897264040870990808|true               |
|2025-11-12 21:03:01.357|8897264040870990808|6472659118310701612|true               |
|2025-11-12 21:02:01.364|6472659118310701612|9070257501486615611|true               |
|2025-11-12 21:01:01.392|9070257501486615611|7954944005028566293|true               |
|2025-11-12 21:00:19.853|7954944005028566293|7189241901420270994|true               |
|2025-11-12 11:43:44.915|7189241901420270994|688340860

In [12]:
df = spark.sql("CALL my_catalog.system.expire_snapshots('my_catalog.iex_db.raw_stream_iex_simulate_1', TIMESTAMP '2025-11-12 11:43:44.915')")

                                                                                

In [14]:
df.show(100,truncate=False)

+------------------------+-----------------------------------+-----------------------------------+----------------------------+----------------------------+------------------------------+
|deleted_data_files_count|deleted_position_delete_files_count|deleted_equality_delete_files_count|deleted_manifest_files_count|deleted_manifest_lists_count|deleted_statistics_files_count|
+------------------------+-----------------------------------+-----------------------------------+----------------------------+----------------------------+------------------------------+
|138                     |0                                  |0                                  |46                          |47                          |0                             |
+------------------------+-----------------------------------+-----------------------------------+----------------------------+----------------------------+------------------------------+



In [6]:
##catalog.table.metadata_log_entries
spark.sql("select * from my_catalog.iex_db.raw_stream_iex_simulate_1.metadata_log_entries order by timestamp desc").show(100,truncate=False)

+-----------------------+-----------------------------------------------------------------------------------+-------------------+----------------+----------------------+
|timestamp              |file                                                                               |latest_snapshot_id |latest_schema_id|latest_sequence_number|
+-----------------------+-----------------------------------------------------------------------------------+-------------------+----------------+----------------------+
|2025-11-12 21:06:01.48 |s3a://test2/mywarehouse/iex_db/raw_stream_iex_simulate_1/metadata/v56.metadata.json|6984047973173646351|0               |55                    |
|2025-11-12 21:05:01.413|s3a://test2/mywarehouse/iex_db/raw_stream_iex_simulate_1/metadata/v55.metadata.json|3896755042063846122|0               |54                    |
|2025-11-12 21:04:01.311|s3a://test2/mywarehouse/iex_db/raw_stream_iex_simulate_1/metadata/v54.metadata.json|3667594001235061581|0               |53  

In [5]:
spark.sql("select * from my_catalog.iex_db.raw_stream_iex_simulate_1.snapshots").show(100,truncate=False)

+-----------------------+-------------------+-------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|committed_at           |snapshot_id        |parent_id          |operation|manifest_list                                                                                      

In [3]:
spark.sql("select * from my_catalog.iex_db.raw_stream_iex_simulate_1.files order by file_size_in_bytes desc").show(100,truncate=False)

                                                                                

+-------+--------------------------------------------------------------------------------------------------------------------------------+-----------+-------+------------+------------------+--------------------------------------------------------------------------+-----------------------------------------------------------------------------+--------------------------------------------------------+----------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+-------------+------------+-------------+------------+--------------------+---------

In [5]:
spark.sql("select * from my_catalog.iex_db.raw_stream_iex_simulate_1.files order by file_size_in_bytes desc").show(100,truncate=False)

+-------+--------------------------------------------------------------------------------------------------------------------------+-----------+-------+------------+------------------+---------------------------------------------------------------------------------+------------------------------------------------------------------------------------+--------------------------------------------------------+----------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+-------------+------------+-------------+------------+--------------------+----

In [None]:
spark.sql("select * from my_catalog.iex_db.raw_stream_iex_simulate_1.files order by file_size_in_bytes desc").show(100,truncate=False)

+-------+--------------------------------------------------------------------------------------------------------------------------+-----------+-------+------------+------------------+------------------------------------------------------------------------------+------------------------------------------------------------------------------------+--------------------------------------------------------+----------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+-------------+------------+-------------+------------+--------------------+-------

In [18]:
spark.sql("select * from my_catalog.iex_db.raw_stream_iex_simulate_1.manifests").show(100,truncate=False)

+-------+---------------------------------------------------------------------------------------------------------------+------+-----------------+-------------------+----------------------+-------------------------+------------------------+------------------------+---------------------------+--------------------------+-------------------+
|content|path                                                                                                           |length|partition_spec_id|added_snapshot_id  |added_data_files_count|existing_data_files_count|deleted_data_files_count|added_delete_files_count|existing_delete_files_count|deleted_delete_files_count|partition_summaries|
+-------+---------------------------------------------------------------------------------------------------------------+------+-----------------+-------------------+----------------------+-------------------------+------------------------+------------------------+---------------------------+-------------------------

In [14]:
spark.sql("CALL my_catalog.system.rewrite_data_files('my_catalog.iex_db.raw_stream_iex_simulate_1')")

25/11/12 11:43:44 WARN S3ABlockOutputStream: Application invoked the Syncable API against stream writing to mywarehouse/iex_db/raw_stream_iex_simulate_1/data/00000-20-a928fb12-3ed4-4fa3-ac7d-cb3f78502306-0-00001.parquet. This is unsupported
                                                                                

DataFrame[rewritten_data_files_count: int, added_data_files_count: int, rewritten_bytes_count: bigint, failed_data_files_count: int]

In [None]:
spark.sql("select * from my_catalog.iex_db.raw_stream_iex_2 limit 2").show()

In [7]:
spark.sql("select offset from my_catalog.iex_db.raw_stream_iex_2 limit order by 1 desc").show(100,truncate=False)

+------+
|offset|
+------+
|718   |
|717   |
|716   |
|715   |
|714   |
|713   |
|712   |
|711   |
|705   |
|704   |
|703   |
|702   |
|701   |
|700   |
|699   |
|698   |
|697   |
|696   |
|695   |
|694   |
|693   |
|692   |
|689   |
|688   |
|687   |
|686   |
|685   |
|684   |
|683   |
|682   |
+------+



In [2]:

packages=",".join(["org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.9.0",
"org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.1",
"com.amazonaws:aws-java-sdk-bundle:1.12.262",
"org.apache.hadoop:hadoop-aws:3.3.4"])

In [3]:
packages="""
org.apache.iceberg:iceberg-spark-runtime-3.5.1_2.12:1.3.0,
org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.1,
com.amazonaws:aws-java-sdk-bundle:1.12.262,
org.apache.hadoop:hadoop-aws:3.3.4
"""

In [7]:
spark = (SparkSession.builder
    .appName("KafkaSparkLocal")
    .master("local[4]")
    .config("spark.jars.packages", packages) 
    .config("spark.hadoop.fs.s3a.endpoint", "http://minio-api.192.168.49.2.nip.io")
    .config("spark.hadoop.fs.s3a.access.key", "minio")
    .config("spark.hadoop.fs.s3a.secret.key", "minio123")
    .config("spark.hadoop.fs.s3a.path.style.access", "true")
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
    .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "false")
    .config("spark.hadoop.fs.s3a.aws.credentials.provider", 
            "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider")
    .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions")
    .config("spark.sql.catalog.my_catalog", "org.apache.iceberg.spark.SparkCatalog")
    .config("spark.sql.catalog.my_catalog.type", "hadoop")
    .config("spark.sql.catalog.my_catalog.warehouse", "s3a://test2/mywarehouse")
    .getOrCreate())


25/10/30 11:53:45 WARN Utils: Your hostname, kumararpita-OMEN-Laptop-15-en0xxx resolves to a loopback address: 127.0.1.1; using 192.168.1.22 instead (on interface wlo1)
25/10/30 11:53:45 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address


:: loading settings :: url = jar:file:/home/kumararpita/alpaca_stream_ingestion/.venv/lib/python3.10/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /home/kumararpita/.ivy2/cache
The jars for the packages stored in: /home/kumararpita/.ivy2/jars

org.apache.iceberg#iceberg-spark-runtime-3.5.1_2.12 added as a dependency

org.apache.spark#spark-sql-kafka-0-10_2.12 added as a dependency

com.amazonaws#aws-java-sdk-bundle added as a dependency

org.apache.hadoop#hadoop-aws added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-bb6942b1-26d4-49e8-a60f-fdd65d8b3e42;1.0
	confs: [default]
:: resolution report :: resolve 11062ms :: artifacts dl 0ms
	:: modules in use:
	---------------------------------------------------------------------
	|                  |            modules            ||   artifacts   |
	|       conf       | number| search|dwnlded|evicted|| number|dwnlded|
	---------------------------------------------------------------------
	|      default     |   4   |   0   |   0   |   0   ||   0   |   0   |
	-------------------------------------------------------------------

PySparkRuntimeError: [JAVA_GATEWAY_EXITED] Java gateway process exited before sending its port number.

In [6]:
spark.sql("select * from my_catalog.nyc.taxis").show()

NameError: name 'spark' is not defined

In [9]:
df.count()

25/08/16 11:37:08 WARN AdminClientConfig: These configurations '[key.deserializer, value.deserializer, enable.auto.commit, max.poll.records, auto.offset.reset]' were supplied but are not used yet.
                                                                                

751

In [7]:
df.count()

25/07/23 17:59:54 WARN AdminClientConfig: These configurations '[key.deserializer, value.deserializer, enable.auto.commit, max.poll.records, auto.offset.reset]' were supplied but are not used yet.
                                                                                

11

In [10]:
df.show()

25/08/16 11:37:21 WARN AdminClientConfig: These configurations '[key.deserializer, value.deserializer, enable.auto.commit, max.poll.records, auto.offset.reset]' were supplied but are not used yet.
                                                                                

+----+--------------------+-----------+---------+------+--------------------+-------------+
| key|               value|      topic|partition|offset|           timestamp|timestampType|
+----+--------------------+-----------+---------+------+--------------------+-------------+
|NULL|[5B 7B 22 54 22 3...|iex-topic-1|        0|     0|2025-08-12 19:18:...|            0|
|NULL|[5B 7B 22 54 22 3...|iex-topic-1|        0|     1|2025-08-12 19:19:...|            0|
|NULL|[5B 7B 22 54 22 3...|iex-topic-1|        0|     2|2025-08-12 19:19:...|            0|
|NULL|[5B 7B 22 54 22 3...|iex-topic-1|        0|     3|2025-08-12 19:19:...|            0|
|NULL|[5B 7B 22 54 22 3...|iex-topic-1|        0|     4|2025-08-12 19:20:...|            0|
|NULL|[5B 7B 22 54 22 3...|iex-topic-1|        0|     5|2025-08-12 19:21:...|            0|
|NULL|[5B 7B 22 54 22 3...|iex-topic-1|        0|     6|2025-08-12 19:21:...|            0|
|NULL|[5B 7B 22 54 22 3...|iex-topic-1|        0|     7|2025-08-12 19:21:...|   

In [12]:
df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)").show(truncate=False)

25/08/16 11:37:55 WARN AdminClientConfig: These configurations '[key.deserializer, value.deserializer, enable.auto.commit, max.poll.records, auto.offset.reset]' were supplied but are not used yet.


+----+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [7]:
df = spark.read \
    .format("kafka") \
    .option("kafka.bootstrap.servers", "192.168.49.2:32100") \
    .option("subscribe", "iex-topic-1-flattened") \
    .load()

In [8]:
df.count()

25/10/30 11:37:44 WARN AdminClientConfig: These configurations '[key.deserializer, value.deserializer, enable.auto.commit, max.poll.records, auto.offset.reset]' were supplied but are not used yet.


0

In [9]:
df = spark.read.format("json").load("/nvmewd/data/iex/bars.ndjson")

                                                                                

In [10]:
df.count()

                                                                                

1727829

In [11]:
df.printSchema()

root
 |-- c: double (nullable = true)
 |-- h: double (nullable = true)
 |-- l: double (nullable = true)
 |-- n: long (nullable = true)
 |-- o: double (nullable = true)
 |-- symbol: string (nullable = true)
 |-- t: string (nullable = true)
 |-- v: long (nullable = true)
 |-- vw: double (nullable = true)



In [12]:
df.show(truncate=False)

+-------+-------+-------+---+-------+------+--------------------+-----+----------+
|c      |h      |l      |n  |o      |symbol|t                   |v    |vw        |
+-------+-------+-------+---+-------+------+--------------------+-----+----------+
|146.48 |146.94 |146.39 |212|146.435|AAPL  |2021-08-02T13:30:00Z|15435|146.603929|
|146.6  |146.8  |146.45 |84 |146.57 |AAPL  |2021-08-02T13:31:00Z|10174|146.649776|
|146.27 |146.59 |146.2  |55 |146.59 |AAPL  |2021-08-02T13:32:00Z|5746 |146.363981|
|146.36 |146.36 |146.12 |62 |146.325|AAPL  |2021-08-02T13:33:00Z|5657 |146.262453|
|146.44 |146.45 |146.2  |58 |146.32 |AAPL  |2021-08-02T13:34:00Z|6241 |146.336665|
|146.09 |146.37 |146.04 |61 |146.37 |AAPL  |2021-08-02T13:35:00Z|7171 |146.112933|
|145.685|145.825|145.55 |57 |145.815|AAPL  |2021-08-02T13:36:00Z|6276 |145.681446|
|145.575|145.775|145.575|54 |145.62 |AAPL  |2021-08-02T13:37:00Z|5984 |145.638786|
|145.67 |145.78 |145.565|49 |145.58 |AAPL  |2021-08-02T13:38:00Z|5758 |145.678175|
|145

In [13]:
df.createOrReplaceTempView("iex_bars")

In [15]:
df = spark.range(10)
df.write.mode("overwrite").parquet("s3a://test2/sample_test_table")

                                                                                

In [17]:
spark.sql("SHOW NAMESPACES IN my_catalog").show()

+---------+
|namespace|
+---------+
|      nyc|
+---------+



In [14]:
spark.sql("""
create table my_catalog.iex_db.raw_iex_bars_iceberg_history
using iceberg
          as select * from iex_bars
""")

25/10/30 11:43:20 WARN S3ABlockOutputStream: Application invoked the Syncable API against stream writing to mywarehouse/iex_db/raw_iex_bars_iceberg_history/data/00001-20-47c02eed-64f4-4528-b991-fbd92c1c9365-0-00001.parquet. This is unsupported
25/10/30 11:45:13 WARN S3AInstrumentation: Closing output stream statistics while data is still marked as pending upload in OutputStreamStatistics{counters=((stream_write_exceptions_completing_upload=0) (op_abort=0) (object_multipart_aborted=0) (op_hsync=0) (action_executor_acquired=0) (stream_write_queue_duration=0) (stream_write_bytes=8071941) (op_abort.failures=0) (stream_write_exceptions=0) (object_multipart_aborted.failures=0) (stream_write_total_data=0) (multipart_upload_completed=0) (stream_write_block_uploads=1) (op_hflush=1) (multipart_upload_completed.failures=0) (stream_write_total_time=0) (action_executor_acquired.failures=0));
gauges=((stream_write_block_uploads_data_pending=8071941) (stream_write_block_uploads_pending=1));
minimums=

Py4JJavaError: An error occurred while calling o52.sql.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 12.0 failed 1 times, most recent failure: Lost task 2.0 in stage 12.0 (TID 21) (192.168.1.22 executor driver): java.io.UncheckedIOException: Failed to close current writer
	at org.apache.iceberg.io.RollingFileWriter.closeCurrentWriter(RollingFileWriter.java:124)
	at org.apache.iceberg.io.RollingFileWriter.close(RollingFileWriter.java:147)
	at org.apache.iceberg.io.RollingDataWriter.close(RollingDataWriter.java:32)
	at org.apache.iceberg.spark.source.SparkWrite$UnpartitionedDataWriter.close(SparkWrite.java:747)
	at org.apache.iceberg.spark.source.SparkWrite$UnpartitionedDataWriter.commit(SparkWrite.java:729)
	at org.apache.spark.sql.execution.datasources.v2.WritingSparkTask.$anonfun$run$1(WriteToDataSourceV2Exec.scala:470)
	at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397)
	at org.apache.spark.sql.execution.datasources.v2.WritingSparkTask.run(WriteToDataSourceV2Exec.scala:486)
	at org.apache.spark.sql.execution.datasources.v2.WritingSparkTask.run$(WriteToDataSourceV2Exec.scala:425)
	at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:491)
	at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:388)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
	at org.apache.spark.scheduler.Task.run(Task.scala:141)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
	at java.base/java.lang.Thread.run(Thread.java:840)
Caused by: org.apache.hadoop.fs.s3a.AWSS3IOException: Writing Object on mywarehouse/iex_db/raw_iex_bars_iceberg_history/data/00002-21-47c02eed-64f4-4528-b991-fbd92c1c9365-0-00001.parquet: com.amazonaws.services.s3.model.AmazonS3Exception: Request Entity Too Large (Service: Amazon S3; Status Code: 413; Error Code: 413 Request Entity Too Large; Request ID: null; S3 Extended Request ID: null; Proxy: null), S3 Extended Request ID: null:413 Request Entity Too Large: Request Entity Too Large (Service: Amazon S3; Status Code: 413; Error Code: 413 Request Entity Too Large; Request ID: null; S3 Extended Request ID: null; Proxy: null)
	at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:320)
	at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:119)
	at org.apache.hadoop.fs.s3a.Invoker.lambda$retry$4(Invoker.java:322)
	at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:414)
	at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:318)
	at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:293)
	at org.apache.hadoop.fs.s3a.WriteOperationHelper.retry(WriteOperationHelper.java:208)
	at org.apache.hadoop.fs.s3a.WriteOperationHelper.putObject(WriteOperationHelper.java:563)
	at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.lambda$putObject$0(S3ABlockOutputStream.java:562)
	at org.apache.hadoop.thirdparty.com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:125)
	at org.apache.hadoop.thirdparty.com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:69)
	at org.apache.hadoop.thirdparty.com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:78)
	at org.apache.hadoop.util.SemaphoredDelegatingExecutor$RunnableWithPermitRelease.run(SemaphoredDelegatingExecutor.java:196)
	at org.apache.hadoop.util.SemaphoredDelegatingExecutor$RunnableWithPermitRelease.run(SemaphoredDelegatingExecutor.java:196)
	... 3 more
Caused by: com.amazonaws.services.s3.model.AmazonS3Exception: Request Entity Too Large (Service: Amazon S3; Status Code: 413; Error Code: 413 Request Entity Too Large; Request ID: null; S3 Extended Request ID: null; Proxy: null), S3 Extended Request ID: null
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1879)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleServiceErrorResponse(AmazonHttpClient.java:1418)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1387)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1157)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:814)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:781)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:755)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:715)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:697)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:561)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:541)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5456)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5403)
	at com.amazonaws.services.s3.AmazonS3Client.access$300(AmazonS3Client.java:421)
	at com.amazonaws.services.s3.AmazonS3Client$PutObjectStrategy.invokeServiceCall(AmazonS3Client.java:6531)
	at com.amazonaws.services.s3.AmazonS3Client.uploadObject(AmazonS3Client.java:1861)
	at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1821)
	at org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$putObjectDirect$17(S3AFileSystem.java:2877)
	at org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfSupplier(IOStatisticsBinding.java:604)
	at org.apache.hadoop.fs.s3a.S3AFileSystem.putObjectDirect(S3AFileSystem.java:2874)
	at org.apache.hadoop.fs.s3a.WriteOperationHelper.lambda$putObject$7(WriteOperationHelper.java:566)
	at org.apache.hadoop.fs.store.audit.AuditingFunctions.lambda$withinAuditSpan$0(AuditingFunctions.java:62)
	at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:117)
	... 15 more

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2856)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2792)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2791)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2791)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1247)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1247)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1247)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3060)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2994)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2983)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:989)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2398)
	at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:385)
	at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2$(WriteToDataSourceV2Exec.scala:359)
	at org.apache.spark.sql.execution.datasources.v2.AppendDataExec.writeWithV2(WriteToDataSourceV2Exec.scala:225)
	at org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run(WriteToDataSourceV2Exec.scala:337)
	at org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run$(WriteToDataSourceV2Exec.scala:336)
	at org.apache.spark.sql.execution.datasources.v2.AppendDataExec.run(WriteToDataSourceV2Exec.scala:225)
	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43)
	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43)
	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)
	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)
	at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:142)
	at org.apache.spark.sql.execution.datasources.v2.V2CreateTableAsSelectBaseExec.$anonfun$writeToTable$1(WriteToDataSourceV2Exec.scala:577)
	at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397)
	at org.apache.spark.sql.execution.datasources.v2.V2CreateTableAsSelectBaseExec.writeToTable(WriteToDataSourceV2Exec.scala:573)
	at org.apache.spark.sql.execution.datasources.v2.V2CreateTableAsSelectBaseExec.writeToTable$(WriteToDataSourceV2Exec.scala:567)
	at org.apache.spark.sql.execution.datasources.v2.AtomicCreateTableAsSelectExec.writeToTable(WriteToDataSourceV2Exec.scala:100)
	at org.apache.spark.sql.execution.datasources.v2.AtomicCreateTableAsSelectExec.run(WriteToDataSourceV2Exec.scala:121)
	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43)
	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43)
	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)
	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)
	at org.apache.spark.sql.Dataset.<init>(Dataset.scala:220)
	at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
	at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:638)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:629)
	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:659)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:569)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.base/java.lang.Thread.run(Thread.java:840)
Caused by: java.io.UncheckedIOException: Failed to close current writer
	at org.apache.iceberg.io.RollingFileWriter.closeCurrentWriter(RollingFileWriter.java:124)
	at org.apache.iceberg.io.RollingFileWriter.close(RollingFileWriter.java:147)
	at org.apache.iceberg.io.RollingDataWriter.close(RollingDataWriter.java:32)
	at org.apache.iceberg.spark.source.SparkWrite$UnpartitionedDataWriter.close(SparkWrite.java:747)
	at org.apache.iceberg.spark.source.SparkWrite$UnpartitionedDataWriter.commit(SparkWrite.java:729)
	at org.apache.spark.sql.execution.datasources.v2.WritingSparkTask.$anonfun$run$1(WriteToDataSourceV2Exec.scala:470)
	at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397)
	at org.apache.spark.sql.execution.datasources.v2.WritingSparkTask.run(WriteToDataSourceV2Exec.scala:486)
	at org.apache.spark.sql.execution.datasources.v2.WritingSparkTask.run$(WriteToDataSourceV2Exec.scala:425)
	at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:491)
	at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:388)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
	at org.apache.spark.scheduler.Task.run(Task.scala:141)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
	... 1 more
Caused by: org.apache.hadoop.fs.s3a.AWSS3IOException: Writing Object on mywarehouse/iex_db/raw_iex_bars_iceberg_history/data/00002-21-47c02eed-64f4-4528-b991-fbd92c1c9365-0-00001.parquet: com.amazonaws.services.s3.model.AmazonS3Exception: Request Entity Too Large (Service: Amazon S3; Status Code: 413; Error Code: 413 Request Entity Too Large; Request ID: null; S3 Extended Request ID: null; Proxy: null), S3 Extended Request ID: null:413 Request Entity Too Large: Request Entity Too Large (Service: Amazon S3; Status Code: 413; Error Code: 413 Request Entity Too Large; Request ID: null; S3 Extended Request ID: null; Proxy: null)
	at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:320)
	at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:119)
	at org.apache.hadoop.fs.s3a.Invoker.lambda$retry$4(Invoker.java:322)
	at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:414)
	at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:318)
	at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:293)
	at org.apache.hadoop.fs.s3a.WriteOperationHelper.retry(WriteOperationHelper.java:208)
	at org.apache.hadoop.fs.s3a.WriteOperationHelper.putObject(WriteOperationHelper.java:563)
	at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.lambda$putObject$0(S3ABlockOutputStream.java:562)
	at org.apache.hadoop.thirdparty.com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:125)
	at org.apache.hadoop.thirdparty.com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:69)
	at org.apache.hadoop.thirdparty.com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:78)
	at org.apache.hadoop.util.SemaphoredDelegatingExecutor$RunnableWithPermitRelease.run(SemaphoredDelegatingExecutor.java:196)
	at org.apache.hadoop.util.SemaphoredDelegatingExecutor$RunnableWithPermitRelease.run(SemaphoredDelegatingExecutor.java:196)
	... 3 more
Caused by: com.amazonaws.services.s3.model.AmazonS3Exception: Request Entity Too Large (Service: Amazon S3; Status Code: 413; Error Code: 413 Request Entity Too Large; Request ID: null; S3 Extended Request ID: null; Proxy: null), S3 Extended Request ID: null
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1879)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleServiceErrorResponse(AmazonHttpClient.java:1418)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1387)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1157)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:814)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:781)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:755)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:715)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:697)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:561)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:541)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5456)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5403)
	at com.amazonaws.services.s3.AmazonS3Client.access$300(AmazonS3Client.java:421)
	at com.amazonaws.services.s3.AmazonS3Client$PutObjectStrategy.invokeServiceCall(AmazonS3Client.java:6531)
	at com.amazonaws.services.s3.AmazonS3Client.uploadObject(AmazonS3Client.java:1861)
	at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1821)
	at org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$putObjectDirect$17(S3AFileSystem.java:2877)
	at org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfSupplier(IOStatisticsBinding.java:604)
	at org.apache.hadoop.fs.s3a.S3AFileSystem.putObjectDirect(S3AFileSystem.java:2874)
	at org.apache.hadoop.fs.s3a.WriteOperationHelper.lambda$putObject$7(WriteOperationHelper.java:566)
	at org.apache.hadoop.fs.store.audit.AuditingFunctions.lambda$withinAuditSpan$0(AuditingFunctions.java:62)
	at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:117)
	... 15 more


25/10/30 11:45:46 WARN S3AInstrumentation: Closing output stream statistics while data is still marked as pending upload in OutputStreamStatistics{counters=((multipart_upload_completed.failures=0) (stream_write_queue_duration=0) (stream_write_exceptions_completing_upload=0) (action_executor_acquired=0) (stream_write_total_data=0) (stream_write_block_uploads=1) (stream_write_total_time=0) (stream_write_bytes=8213847) (multipart_upload_completed=0) (op_abort.failures=0) (op_abort=0) (op_hflush=1) (action_executor_acquired.failures=0) (object_multipart_aborted.failures=0) (stream_write_exceptions=0) (op_hsync=0) (object_multipart_aborted=0));
gauges=((stream_write_block_uploads_pending=1) (stream_write_block_uploads_data_pending=8213847));
minimums=((multipart_upload_completed.min=-1) (object_multipart_aborted.failures.min=-1) (object_multipart_aborted.min=-1) (op_abort.min=-1) (op_abort.failures.min=-1) (multipart_upload_completed.failures.min=-1) (action_executor_acquired.min=-1) (actio