# util-cos

This component provides COS utility functions (e.g. creating a bucket, listing contents of a bucket)

Open Issues:
- [] make sure endpoint starts with https independent of input start is empty, s3 or s3a
- [] make sure there is a / symbol between bucket and path although not specified

In [None]:
!pip install aiobotocore botocore s3fs

In [None]:
import logging
import os
import re
import s3fs
import sys
import glob

In [None]:
# access key id
access_key_id = os.environ.get('access_key_id')

# secret access key
secret_access_key = os.environ.get('secret_access_key')

# cos/s3 endpoint
endpoint = os.environ.get('endpoint')

# cos bucket name
bucket_name = os.environ.get('bucket_name')

# path
path = os.environ.get('path','')

# source in case of uploads
source = os.environ.get('source', '')

# target in case of downloads
target = os.environ.get('target', '')

# recursive
recursive = bool(os.environ.get('recursive','False'))

# operation (mkdir|ls|find|get|put|rm|sync_to_cos|sync_to_local|glob)
operation = os.environ.get('operation')

# log level
log_level = os.environ.get('log_level', 'INFO')

In [None]:
root = logging.getLogger()
root.setLevel(log_level)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(log_level)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
root.addHandler(handler)


parameters = list(
    map(lambda s: re.sub('$', '"', s),
        map(
            lambda s: s.replace('=', '="'),
            filter(
                lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\/A-Za-z0-9]*', s)),
                sys.argv
            )
    )))

logging.info('Logging parameters: ' + ''.join(parameters))
for parameter in parameters:
    logging.info('Parameter: ' + parameter)
    exec(parameter)

recursive = bool(recursive)

In [None]:
def print_list(l):
    for file in l:
        print(file)

In [None]:
s3 = s3fs.S3FileSystem(
    anon=False,
    key=access_key_id,
    secret=secret_access_key,
    client_kwargs={'endpoint_url': endpoint}
)

if operation == 'mkdir':
    s3.mkdir(bucket_name+path)
elif operation == 'ls':
    print_list(s3.ls(bucket_name+path))
elif operation == 'find':
    print_list(s3.find(bucket_name+path))
elif operation == 'put':
    print(s3.put(source,bucket_name+path, recursive=recursive))
elif operation == 'sync_to_cos':
    for file in glob.glob(source, recursive=recursive):
        logging.info(f'processing {file}')
        if s3.exists(bucket_name+file):
            logging.info(f'exists {file}')
            logging.debug(f's3.info {s3.info(bucket_name+file)}')
            if s3.info(bucket_name+file)['size'] != os.path.getsize(file):
                logging.info(f'uploading {file}')
                s3.put(file, bucket_name+file)
            else:
                logging.info(f'skipping {file}')
        else:
            logging.info(f'uploading {file}')
            s3.put(file, bucket_name+file)
elif operation == 'sync_to_local':
    for full_path in s3.glob(bucket_name+path):
        local_full_path = target+full_path
        logging.info(f'processing {full_path}')
        if s3.info(full_path)['type'] == 'directory':
            logging.debug(f'skipping directory {full_path}')
            continue
        if os.path.exists(local_full_path):
            logging.info(f'exists {full_path}')
            logging.debug(f's3.info {s3.info(full_path)}')
            if s3.info(full_path)['size'] != os.path.getsize(local_full_path):
                logging.info(f'downloading {full_path} to {local_full_path}')
                s3.get(full_path, local_full_path)
            else:
                logging.info(f'skipping {full_path}')
        else:
            logging.info(f'downloading {full_path} to {local_full_path}')
            s3.get(full_path, local_full_path)
elif operation == 'get':
    s3.get(bucket_name+path, target, recursive=recursive)
elif operation == 'rm':
    s3.rm(bucket_name+path, recursive=recursive)
elif operation == 'glob':
    print_list(s3.glob(bucket_name+path))
else:
    logging.error(f'operation unkonwn {operation}')