# Image download from AWS S3

SourceXtractor++ (as SExtractor and PSFEx) need decompressed images to run. Therefore, before attempting to run any of the following notebooks, it is necessary to download and decompress the images from the DJA you will use. This notebook automates this process

In [1]:
import dja_sepp

## Batch download and decompress

To select the files you want to download and decompress, you have to use a [regular expression](https://regexr.com/). This allows to create a filter to match only, and precisely, the images you wan to work with.

In [2]:
files = dja_sepp.s3.find_files(bucket='grizli-v2', 
                               path='JwstMosaics/v7', 
                               regex=".+gds-grizli.+v7.2.+((f\d+(w|m)-.*clear_drc)|ir).+(sci|wht).+")
files

['gds-grizli-v7.2-f090w-clear_drc_sci.fits.gz',
 'gds-grizli-v7.2-f090w-clear_drc_wht.fits.gz',
 'gds-grizli-v7.2-f115w-clear_drc_sci.fits.gz',
 'gds-grizli-v7.2-f115w-clear_drc_wht.fits.gz',
 'gds-grizli-v7.2-f150w-clear_drc_sci.fits.gz',
 'gds-grizli-v7.2-f150w-clear_drc_wht.fits.gz',
 'gds-grizli-v7.2-f182m-clear_drc_sci.fits.gz',
 'gds-grizli-v7.2-f182m-clear_drc_wht.fits.gz',
 'gds-grizli-v7.2-f200w-clear_drc_sci.fits.gz',
 'gds-grizli-v7.2-f200w-clear_drc_wht.fits.gz',
 'gds-grizli-v7.2-f210m-clear_drc_sci.fits.gz',
 'gds-grizli-v7.2-f210m-clear_drc_wht.fits.gz',
 'gds-grizli-v7.2-f250m-clear_drc_sci.fits.gz',
 'gds-grizli-v7.2-f250m-clear_drc_wht.fits.gz',
 'gds-grizli-v7.2-f277w-clear_drc_sci.fits.gz',
 'gds-grizli-v7.2-f277w-clear_drc_wht.fits.gz',
 'gds-grizli-v7.2-f300m-clear_drc_sci.fits.gz',
 'gds-grizli-v7.2-f300m-clear_drc_wht.fits.gz',
 'gds-grizli-v7.2-f335m-clear_drc_sci.fits.gz',
 'gds-grizli-v7.2-f335m-clear_drc_wht.fits.gz',
 'gds-grizli-v7.2-f356w-clear_drc_sci.fi

We can then download and decompress all the selected images. The function `decompress_save_to_S3` can be used to automatically save the decompressed images to a S3 bucket for later use.

In [3]:
home = "/FlashStorage/DAWN/"
field = 'GDS'
for i, file_name in enumerate(files):
    print(f"{i+1}/{len(files)}")
    dja_sepp.s3.decompress_save_to_S3(file_name,
                                      in_bucket='grizli-v2', in_path='JwstMosaics/v7',
                                      out_bucket='aurelien-sepp', out_path=f"{field}/image",
                                      temp_folder=f"{home}/fields/{field}/image",
                                      deleting_file=False, verbose=True)

SyntaxError: invalid syntax. Perhaps you forgot a comma? (1954386726.py, line 7)

## Download files

If you have saved the decompressed images to a S3 bucket, you can them easily download them again with the following commands, to adapt to your needs and file naming.

In [2]:
import os
import boto3
s3 = boto3.client('s3')

In [6]:
home = "/home/aurelien/DAWN/DJA-SEpp"
field = 'primer-uds-north-grizli-v7.2'

In [4]:
files = dja_sepp.s3.find_files(bucket='aurelien-sepp', 
                               path=f'{field}/image', 
                               regex=".+((f\d+(w|m)-.*clear_drc)|ir).+(sci|wht).+")
files

['ceers-full-grizli-v7.2-f115w-clear_drc_sci.fits',
 'ceers-full-grizli-v7.2-f115w-clear_drc_wht.fits',
 'ceers-full-grizli-v7.2-f150w-clear_drc_sci.fits',
 'ceers-full-grizli-v7.2-f150w-clear_drc_wht.fits',
 'ceers-full-grizli-v7.2-f182m-clear_drc_sci.fits',
 'ceers-full-grizli-v7.2-f182m-clear_drc_wht.fits',
 'ceers-full-grizli-v7.2-f200w-clear_drc_sci.fits',
 'ceers-full-grizli-v7.2-f200w-clear_drc_wht.fits',
 'ceers-full-grizli-v7.2-f210m-clear_drc_sci.fits',
 'ceers-full-grizli-v7.2-f210m-clear_drc_wht.fits',
 'ceers-full-grizli-v7.2-f277w-clear_drc_sci.fits',
 'ceers-full-grizli-v7.2-f277w-clear_drc_wht.fits',
 'ceers-full-grizli-v7.2-f356w-clear_drc_sci.fits',
 'ceers-full-grizli-v7.2-f356w-clear_drc_wht.fits',
 'ceers-full-grizli-v7.2-f410m-clear_drc_sci.fits',
 'ceers-full-grizli-v7.2-f410m-clear_drc_wht.fits',
 'ceers-full-grizli-v7.2-f444w-clear_drc_sci.fits',
 'ceers-full-grizli-v7.2-f444w-clear_drc_wht.fits',
 'ceers-full-grizli-v7.2-ir_drc_sci.fits',
 'ceers-full-grizli-v

#### Download full image

In [5]:
folder = f"{home}/fields/{field}/image"
os.makedirs(folder, exist_ok=True)
for file in files:
    print(file)
    s3.download_file('aurelien-sepp', f"{field}/image/{file}", f"{folder}/{file}")

ceers-full-grizli-v7.2-f115w-clear_drc_sci.fits
ceers-full-grizli-v7.2-f115w-clear_drc_wht.fits


KeyboardInterrupt: 

#### Download full image (specific filters only)

In [10]:
folder = f"{home}/fields/{field}/image"
filter_list = ['f090w', 'f115w', 'f150w', 'f200w', 'f277w', 'f356w', 'f444w', 'ir']
files_filter = [file for file in files if any([(filter in file) for filter in filter_list])]
os.makedirs(folder, exist_ok=True)
for file in files_filter:
    print(file)
    s3.download_file('aurelien-sepp', f"{field}/image/{file}", f"{folder}/{file}")

gds-grizli-v7.2-f444w-clear_drc_sci.fits
gds-grizli-v7.2-f444w-clear_drc_wht.fits


#### Download star catalog

In [6]:
folder = f"{home}/fields/{field}/catalog"
os.makedirs(folder, exist_ok=True)
s3.download_file('aurelien-sepp', f"{field}/catalog/{field}_drc_cat_star.fits", f"{folder}/{field}_drc_cat_star.fits")

#### Download DJA catalog

In [4]:
folder = f"{home}/fields/{field}/catalog/dja"
os.makedirs(folder, exist_ok=True)
cat_dja = dja_sepp.s3.find_files(bucket='grizli-v2', 
                                 path='JwstMosaics/v7', 
                                 regex=f"{field}-fix_phot_apcorr.fits")[0]
print(cat_dja)
s3.download_file('grizli-v2', f"JwstMosaics/v7/{cat_dja}", f"{folder}/{cat_dja}")

ceers-full-grizli-v7.2-fix_phot_apcorr.fits


#### Download DJA photo-z catalog

In [5]:
import tarfile
folder = f"{home}/fields/{field}/catalog/dja"
os.makedirs(folder, exist_ok=True)
cat_dja = dja_sepp.s3.find_files(bucket='grizli-v2', 
                                 path='JwstMosaics/v7', 
                                 regex=f"{field}-fix.photoz.tar.gz")[0]
print(cat_dja)
s3.download_file('grizli-v2', f"JwstMosaics/v7/{cat_dja}", f"{folder}/{cat_dja}")
with tarfile.open(f"{folder}/{cat_dja}") as f:
    fits = [name for name in f.getnames() if '.fits' in name]
    for file in fits:
        f.extract(file, folder)
os.remove(f"{folder}/{cat_dja}")

gds-grizli-v7.2-fix.photoz.tar.gz


#### Download PSF

In [7]:
folder = f"{home}/fields/{field}/psfex"
os.makedirs(folder, exist_ok=True)
files = dja_sepp.s3.find_files(bucket='aurelien-sepp', 
                               path=f'{field}/psfex', 
                               regex=".+\.psf")
for file in files:
    print(file)
    s3.download_file('aurelien-sepp', f"{field}/psfex/{file}", f"{folder}/{file}")

primer-uds-north-grizli-v7.2-f090w-clear_drc_cat_star_psf.psf
primer-uds-north-grizli-v7.2-f115w-clear_drc_cat_star_psf.psf
primer-uds-north-grizli-v7.2-f140m-clear_drc_cat_star_psf.psf
primer-uds-north-grizli-v7.2-f150w-clear_drc_cat_star_psf.psf
primer-uds-north-grizli-v7.2-f200w-clear_drc_cat_star_psf.psf
primer-uds-north-grizli-v7.2-f277w-clear_drc_cat_star_psf.psf
primer-uds-north-grizli-v7.2-f356w-clear_drc_cat_star_psf.psf
primer-uds-north-grizli-v7.2-f410m-clear_drc_cat_star_psf.psf
primer-uds-north-grizli-v7.2-f430m-clear_drc_cat_star_psf.psf
primer-uds-north-grizli-v7.2-f444w-clear_drc_cat_star_psf.psf
primer-uds-north-grizli-v7.2-f460m-clear_drc_cat_star_psf.psf
