In [None]:
import boto3
from owlna import Athena
athena = Athena(session=boto3.Session())
database, table_name = "unittest", "pyathena_csv_unittest"
athena

In [None]:
with athena.connect(query_options={
    "WorkGroup": "cdh_tradingsurveillanceproducer_42189",
    "QueryExecutionContext": {
        'Database': database,
        'Catalog': 'AwsDataCatalog'
    },
}).cursor() as cursor:
    # cursor.execute(f"DROP TABLE {table_name}", wait=0.5)
    cursor.execute(f"""CREATE EXTERNAL TABLE `{table_name}`(
  `string` string COMMENT 'test',
  `date` string,
  `timestamp` string,
  `int` string,
  `tinyint` string,
  `smallint` string,
  `bigint` string,
  `double` string,
  `float` string,
  `char` char(10),
  `varchar` varchar(120)
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
   'separatorChar' = ',',
   'quoteChar' = '"'
)
STORED AS TEXTFILE
LOCATION 's3://unittest/{table_name}'
TBLPROPERTIES (
    "skip.header.line.count"="1",
    'serialization.null.format'=''
)""", wait=0.5)
    
    table = cursor.connection.table(catalog='AwsDataCatalog', database=database, name=table_name)
table

In [None]:
with athena.connect() as connection:
    # persists boto3 client
    boto3_client = connection.client
    
    # get table
    table = connection.table(catalog='AwsDataCatalog', database=database, name=table_name)
    
    # insert arrow    
    import pyarrow

    data = pyarrow.RecordBatch.from_arrays(
        [
            pyarrow.array([None, "pstring"]),
            pyarrow.array([None, 1]),
            pyarrow.array([None, "test"]),
            pyarrow.array([None, 10]),
            pyarrow.array(["2022-10-10 12:12:00.123Z", "2022-10-10 12:12:00.123456789Z"]),
            pyarrow.array(["2022-10-10", "2022-10-12"])
        ],
        schema=pyarrow.schema(
            [
                pyarrow.field("pstring", pyarrow.string()),
                pyarrow.field("pint", pyarrow.int64()),
                pyarrow.field("string", pyarrow.string()),
                pyarrow.field("int", pyarrow.int32()),
                pyarrow.field("timestamp", pyarrow.string()),
                pyarrow.field("date", pyarrow.string())
            ]
        )
    )
    
    # cf https://arrow.apache.org/docs/python/generated/pyarrow.dataset.write_dataset.html#pyarrow.dataset.write_dataset
    
    # existing_data_behavior = "overwrite_or_ignore" default will append
    # existing_data_behavior = "delete_matching" to overwrite dir / partition dir
    
    table.insert_arrow_batch(
        data,
        base_dir=None, # put {"key": "value"} to append to table.location path
        existing_data_behavior="delete_matching",
        safe=False # safe casting to table.schema_arrow, default owlna.config.DEFAULT_SAFE_MODE
    )