In [29]:
import sys
import os
import threading
import boto3
from boto3.s3.transfer import TransferConfig
import urllib3
from dotenv import load_dotenv

load_dotenv()

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

S3_BUCKET = "base"
FILE_PATH = "data"
KEY_PATH = "1brc"

s3_client = boto3.client(
    "s3",
    endpoint_url="https://swfs-s3.lab.s-miras.com/",
    aws_access_key_id=os.getenv("S3_AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("S3_AWS_SECRET_ACCESS_KEY"),
    verify=False,
)

transfer_config = TransferConfig(
    multipart_threshold=1024 * 25,
    max_concurrency=10,
    multipart_chunksize=1024 * 25,
    use_threads=True,
)


class ProgressPercentage(object):
    def __init__(self, filename):
        self._filename = filename
        self._size = float(os.path.getsize(filename))
        self._seen_so_far = 0
        self._lock = threading.Lock()

    def __call__(self, bytes_amount):
        with self._lock:
            self._seen_so_far += bytes_amount
            percentage = (self._seen_so_far / self._size) * 100
            sys.stdout.write(
                "\r%s  %s / %s  (%.2f%%)"
                % (self._filename, self._seen_so_far, self._size, percentage)
            )
            sys.stdout.flush()

In [30]:
!ls -lh data/ | grep -v "^Permissions" | awk '{print $2 "\t" $9}'

4.3G	measurements.parquet
16G	measurements.txt


In [31]:
s3_client.upload_file(
    f"{FILE_PATH}/measurements.txt",
    S3_BUCKET,
    f"{KEY_PATH}/measurements.txt",
    ExtraArgs={"ACL": "public-read", "ContentType": "text/plain"},
    Config=transfer_config,
    Callback=ProgressPercentage(f"{FILE_PATH}/measurements.txt"),
)

data/measurements.txt  15901107260 / 15901107260.0  (100.00%)

In [32]:
s3_client.upload_file(
    f"{FILE_PATH}/measurements.parquet",
    S3_BUCKET,
    f"{KEY_PATH}/measurements.parquet",
    ExtraArgs={"ACL": "public-read", "ContentType": "application/octet-stream"},
    Config=transfer_config,
    Callback=ProgressPercentage(f"{FILE_PATH}/measurements.parquet"),
)

data/measurements.parquet  4346710127 / 4346710127.0  (100.00%)