In [None]:
import boto3
import zipfile
import os, sys, glob
from pathlib import Path

# adds the package path to the Python path to make sure all the local imports work fine 
if os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd()))) not in sys.path:
    sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd()))))
    
from wp4.constants import S3_ACCESS_KEY, S3_SECRET_KEY, S3_ENDPOINT, DATA_DIR_GFAS, DATA_DIR_ERA5, DATA_DIR_CAMS_AN, DATA_DIR_CAMS_RE, DATA_DIR_LC

### Initiate S3 client & check buckets

In [None]:
s3_client = boto3.client(
    's3',
    aws_access_key_id=S3_ACCESS_KEY,
    aws_secret_access_key=S3_SECRET_KEY,
    endpoint_url=S3_ENDPOINT
)

existing_bucket_names = [bucket['Name'] for bucket in s3_client.list_buckets()['Buckets']]
print(f'Current buckets: {", ".join(existing_bucket_names)}')

In [None]:
TARGET_BUCKET = "cams-analysis"

### Checking Bucket Names and creating target bucket if is does not exist

In [None]:
# Check if bucket exists and if not create new bucket
existing_bucket_names = [bucket['Name'] for bucket in s3_client.list_buckets()['Buckets']]

if not TARGET_BUCKET in existing_bucket_names:
    print(f'Creating S3 bucket: {TARGET_BUCKET}')
    try:
        s3_client.create_bucket(Bucket=TARGET_BUCKET)
    except:
        raise
    else:
        print(f'Bucket {TARGET_BUCKET} created')
        existing_bucket_names = [bucket['Name'] for bucket in s3_client.list_buckets()['Buckets']]
        print(f'Current buckets: {", ".join(existing_bucket_names)}')

### Zip & upload single file

In [None]:
DATA_DIR = Path('path')  # Directory where the file is located
FILE_NAME = '' # name of the file
FILE_TYPE = '.nc'  # file extension .nc .tif etc
OVERWRITE_FILE = False  # overwrite the file if already present in the bucket

os.chdir(DATA_DIR)

file_name = f'{FILE_NAME}{FILE_TYPE}'
out_zip_file = f'{FILE_NAME}.zip'
out_zip_file_loc = DATA_DIR.joinpath(f'{FILE_NAME}.zip')
  
# create zip folder with the file we want to compress 
zipfile.ZipFile(out_zip_file, mode='w').write(
    file_name,
    compress_type=zipfile.ZIP_DEFLATED
)

# list the current contents in the bucket
bucket_contents = s3_client.list_objects_v2(Bucket=TARGET_BUCKET)

if "Contents" in bucket_contents.keys():
    bucket_content_names  = [f['Key'] for f in bucket_contents["Contents"]]
else:
    bucket_content_names  = []
    

if out_zip_file in bucket_content_names and not OVERWRITE_FILE:
    # if there is already a file with the same name  in the bucket and we do not want to overwrite
    print(f'{file_name} already present in bucket')
elif (out_zip_file in bucket_content_names) and OVERWRITE_FILE:
    # if there is already a file with the same name in the bucket and we do want to overwrite
    print(f'{out_zip_file} already present in bucket. Will overwrite this file.')
    print(f'Starting upload of {out_zip_file} to bucket: {TARGET_BUCKET}')
    try:
        s3_client.upload_file(out_zip_file_loc.as_posix(), TARGET_BUCKET, out_zip_file)
    except:
        raise
    else:
        print(f'Upload of {out_zip_file} to bucket: {TARGET_BUCKET} completed')
else:
    # if there is no file with the same name  in the bucket
    print(f'Starting upload of {out_zip_file} to bucket: {TARGET_BUCKET}')
    try:
        s3_client.upload_file(out_zip_file_loc.as_posix(), TARGET_BUCKET, out_zip_file)
    except:
        raise
    else:
        print(f'Upload of {out_zip_file} to bucket: {TARGET_BUCKET} completed')

### Zip & upload multiple files of single file type within folder

In [None]:
DATA_DIR = r"path_to_datadir"  # Directory where the file is located
FOLDER_NAME = 'folder_name.zip' # name of the zip file uploaded to the bucket
FILE_TYPE = '.nc'  # file extension of the files in the folder to zip 
OVERWRITE_FILE = False  # overwrite the file if already present in the bucket

os.chdir(DATA_DIR)

# compress and store all files in a zip folder
with zipfile.ZipFile(FOLDER_NAME, 'w') as _zip:
    for file_name in glob.glob(f'{DATA_DIR}/*{FILE_TYPE}'):
        out_zip_file = Path(file_name).name
        _zip.write(out_zip_file, compress_type=zipfile.ZIP_DEFLATED)

# list current bucket contents
bucket_contents = s3_client.list_objects_v2(Bucket=TARGET_BUCKET)

if "Contents" in bucket_contents.keys():
    bucket_content_names  = [f['Key'] for f in bucket_contents["Contents"]]
else:
    bucket_content_names  = []

# uploading    
if FOLDER_NAME in bucket_content_names and not OVERWRITE_FILE:
    # if there is already a file with the same name  in the bucket and we do not want to overwrite
    print(f'{FOLDER_NAME} already present in bucket')
elif FOLDER_NAME in bucket_content_names and OVERWRITE_FILE:
    # if there is already a file with the same name in the bucket and we do want to overwrite
    try:
        print(f'File {FOLDER_NAME} already present in bucket. Will overwrite this file.')
        print(f'Starting upload of {FOLDER_NAME} to bucket: {TARGET_BUCKET}')
        s3_client.upload_file(Path(DATA_DIR).joinpath(FOLDER_NAME).as_posix(), TARGET_BUCKET, FOLDER_NAME)
    except:
        raise
    else:
        print(f'Upload of {FOLDER_NAME} to bucket: {TARGET_BUCKET} completed')
else:
    # if there is no file with the same name  in the bucket
    try:
        print(f'Starting upload of {FOLDER_NAME} to bucket: {TARGET_BUCKET}')
        s3_client.upload_file(Path(DATA_DIR).joinpath(FOLDER_NAME).as_posix(), TARGET_BUCKET, FOLDER_NAME)
    except:
        raise
    else:
        print(f'Upload of {FOLDER_NAME} to bucket: {TARGET_BUCKET} completed')

### Check the contents of the target bucket

In [None]:
bucket_contents = s3_client.list_objects_v2(Bucket=TARGET_BUCKET)

if "Contents" in bucket_contents.keys():
    bucket_content_names  = [f['Key'] for f in bucket_contents["Contents"]]
    print(f'Current contents of bucket {TARGET_BUCKET}: {", ".join(bucket_content_names)}.')

In [None]:
# Remove zip archives

for file in glob.glob(f'{DATA_DIR}*.zip'):
    os.remove(file)