In [7]:
import os
import boto3
import gzip
import shutil
import re

In [8]:
def decompress_save(file_name:str, 
                    in_bucket:str, in_path:str,
                    out_bucket:str, out_path:str,
                    temp_folder:str,
                    deleting_file=True,
                    verbose=False):
    """
    Downloads a compressed .gz file from an S3 bucket, 
    and saves a decompressed version in another S3 bucket.

    file_name : Name of the compressed file (without .gz)
    in_bucket : Bucket where the compressed file is
    in_path : Path in the in_bucket to the compressed file
    out_bucket : Bucket to save the decompressed file to
    out_path : Path in the out_bucket to save the decompressed file
    temp_folder : Temporary folder to manipulate files
    """
    s3 = boto3.client('s3')
    os.makedirs("/home/ec2-user/FlashStorage/tmp/", exist_ok=True)
    # Download compressed images from S3
    if verbose: print(f"Downloading : https://s3.amazonaws.com/{in_bucket}/{in_path}/{file_name}.gz")
    s3.download_file(in_bucket, f"{in_path}/{file_name}.gz", f"{temp_folder}/{file_name}.gz")
    # Decompress images
    if verbose: print(f"Decompressing : {temp_folder}/{file_name}.gz")
    with gzip.open(f"{temp_folder}/{file_name}.gz", 'rb') as image_gz:
        with open(f"{temp_folder}/{file_name}", 'wb') as image_fits:
            shutil.copyfileobj(image_gz, image_fits)
    # Delete compressed images
    if verbose: print(f"Removing : {temp_folder}/{file_name}.gz")
    os.remove(f"{temp_folder}/{file_name}.gz")
    #Save decompressed images to S3
    if verbose: print(f"Uploading : https://s3.amazonaws.com/{out_bucket}/{out_path}/{file_name}")
    s3.upload_file(f"{temp_folder}/{file_name}", out_bucket, f"{out_path}/{file_name}")
    # Delete decompressed images
    if deleting_file:
        if verbose: print(f"Removing : {temp_folder}/{file_name}")
        os.remove(f"{temp_folder}/{file_name}")

In [9]:
def find_files(bucket:str, path:str, regex:str) -> list[str]:
    """
    Find all files in a bucket matching a given regular expression.

    bucket : Bucket to search files in
    path : Path to search files in
    regex : Regular expression to match

    Returns : List of file names (without path)
    """
    files = []
    s3 = boto3.resource('s3')
    bucket = s3.Bucket(bucket)
    for obj in bucket.objects.filter(Prefix=f"{path}/"):
        if re.search(regex, obj.key):
            files.append(obj.key[len(path)+1:])
    return files

In [11]:
files = find_files('grizli-v2', 'JwstMosaics/v7', ".+gds-grizli.+v7.2.+((f\d+w-.*clear_drc)|ir).+(sci|wht).+")
files

['gds-grizli-v7.2-ir_drc_sci.fits.gz', 'gds-grizli-v7.2-ir_drc_wht.fits.gz']

In [12]:
for i, file_name in enumerate(files):
    print(f"{i+1}/{len(files)}")
    decompress_save(file_name[:-3],
                    in_bucket='grizli-v2', in_path='JwstMosaics/v7',
                    out_bucket='aurelien-sepp', out_path='image/GDS',
                    temp_folder='/FlashStorage/tmp',
                    deleting_file=False, verbose=True)

1/2
Downloading : https://s3.amazonaws.com/grizli-v2/JwstMosaics/v7/gds-grizli-v7.2-ir_drc_sci.fits.gz
Decompressing : /FlashStorage/tmp/gds-grizli-v7.2-ir_drc_sci.fits.gz
Removing : /FlashStorage/tmp/gds-grizli-v7.2-ir_drc_sci.fits.gz
Uploading : https://s3.amazonaws.com/aurelien-sepp/image/GDS/gds-grizli-v7.2-ir_drc_sci.fits
2/2
Downloading : https://s3.amazonaws.com/grizli-v2/JwstMosaics/v7/gds-grizli-v7.2-ir_drc_wht.fits.gz
Decompressing : /FlashStorage/tmp/gds-grizli-v7.2-ir_drc_wht.fits.gz
Removing : /FlashStorage/tmp/gds-grizli-v7.2-ir_drc_wht.fits.gz
Uploading : https://s3.amazonaws.com/aurelien-sepp/image/GDS/gds-grizli-v7.2-ir_drc_wht.fits


## Download files

In [4]:
import os
import boto3
s3 = boto3.client('s3')

In [2]:
field = 'GDS'
filter = 'f200w'

In [None]:
# Download full image
os.makedirs(f"/FlashStorage/image/{field}/", exist_ok=True)
for filter in ['f115w', 'f200w', 'f277w', 'f444w']:
    print(filter)
    s3.download_file('aurelien-sepp', f"image/{field}/gds-grizli-v7.2-{filter}-clear_drc_sci.fits", f"/FlashStorage/image/{field}/gds-grizli-v7.2-{filter}-clear_drc_sci.fits")
    s3.download_file('aurelien-sepp', f"image/{field}/gds-grizli-v7.2-{filter}-clear_drc_wht.fits", f"/FlashStorage/image/{field}/gds-grizli-v7.2-{filter}-clear_drc_wht.fits")

In [5]:
# Download star catalog
os.makedirs(f"/FlashStorage/catalog/{field}/", exist_ok=True)
s3.download_file('aurelien-sepp', f"catalog/{field}/GDS_drc_cat_star.fits", f"/FlashStorage/catalog/{field}/GDS_drc_cat_star.fits")

In [6]:
# Download DJA catalog
os.makedirs(f"/home/ec2-user/DAWN/DJA-SEpp/image/{field}/dja", exist_ok=True)
s3.download_file('grizli-v2', 'JwstMosaics/v7/gds-grizli-v7.2-fix_phot_apcorr.fits', f"/home/ec2-user/DAWN/DJA-SEpp/image/{field}/dja/gds-grizli-v7.2-fix_phot_apcorr.fits")