### imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

from joblib import Parallel, delayed
import dask.dataframe as dd
from dask.diagnostics import ProgressBar

from astropy.coordinates import SkyCoord
import astropy.units as u
from astroquery.mast import Observations
from astropy.io import fits
from astropy.wcs import WCS

import requests
import os
import time
import glob
from tqdm import tqdm
tqdm.pandas()

In [2]:
folder = 'E:/GZ-DESI/data'

In [3]:
df = pd.read_parquet(f'{folder}/2023-03-15-cats/definitive-merged-interesting-params.parquet', engine = 'pyarrow')

In [4]:
df.category.value_counts()

merger               185278
major_interaction     11594
minor_interaction       267
Name: category, dtype: int64

In [5]:
df_merged = (
    df
    .query('category == "merger"')
    [['id_str','ra', 'dec']]
)

df_major = (
    df
    .query('category == "major_interaction"')
    [['id_str','ra', 'dec']]
)
df_minor =(
    df
    .query('category == "minor_interaction"')
    [['id_str','ra', 'dec']]
)

In [6]:
# df_merged_coords = (
#     df_merged
#     .assign(coords = df_merged.apply(lambda row: SkyCoord(ra = row.ra * u.deg, dec = row.dec * u.deg, frame = 'icrs'), axis = 1))
# )

# df_major_coords = (
#     df_major
#     .assign(coords = df_major.apply(lambda row: SkyCoord(ra = row.ra * u.deg, dec = row.dec * u.deg, frame = 'icrs'), axis = 1))
# )

# df_minor_coords = (
#     df_minor
#     .assign(coords = df_minor.apply(lambda row: SkyCoord(ra = row.ra * u.deg, dec = row.dec * u.deg, frame = 'icrs'), axis = 1))
# )

In [7]:
df_merged.iloc[0]

id_str    390393_113
ra        315.670729
dec        10.126369
Name: 0, dtype: object

In [8]:
# http://www.legacysurvey.org/viewer/fits-cutout?ra=315.670729&dec=10.126369&layer=ls-dr10&pixscale=0.262

In [9]:
def get_fits(row, category = 'mergers'):
    
    ra = row.ra
    dec = row.dec
    id_str = row.id_str
    
    save_dir = f'E:/GZ-DESI/images/{category}/{id_str}-cutout.fits'
    if os.path.exists(save_dir):
        return save_dir
    
    url = f'http://www.legacysurvey.org/viewer/fits-cutout?ra={ra}&dec={dec}&layer=ls-dr10&pixscale=0.262'
    
    for i in range(5):
        try:
            r = requests.get(url)
        except:
            time.sleep(1)
            continue
        
        if r.status_code == 200:
            break
        else:
            time.sleep(1)
    
    if i >= 4:
        return 'Failed'
    
    with open(save_dir, 'wb') as f:
        f.write(r.content)
    
    return save_dir

In [10]:
df_merg_dict = df_merged.set_index('id_str').to_dict(orient = 'index')
# df_merg_dict

In [11]:
df_tmp = df_merged.assign(local_paths = df_merged.id_str.apply(lambda x: f'E:/GZ-DESI/images/mergers/{x}-cutout.fits'))

In [12]:
df_exists = (
    df_tmp
    .assign(existing = df_tmp.local_paths.progress_apply(lambda x: os.path.exists(x)))
)

100%|██████████| 185278/185278 [01:29<00:00, 2065.14it/s]


In [13]:
df_exists.existing.value_counts()

True     172254
False     13024
Name: existing, dtype: int64

In [14]:
df_red = df_exists.query('existing == False').drop(columns = ['local_paths', 'existing'])

In [15]:
# df_merg_dict = df_red.set_index('id_str').to_dict(orient = 'index')

In [16]:
# paths_dict = {}
# for i in tqdm(list(df_merg_dict.keys())):
#     paths_dict[i] = get_fits([df_merg_dict[i]['ra'], df_merg_dict[i]['dec'], i])

In [17]:
# df_merged_paths = (
#     df_merged
#     .assign(fits_path = df_merged.progress_apply(lambda row: get_fits([row.ra, row.dec, row.id_str], 'mergers'), axis = 1))
# )

In [24]:
files = glob.glob('E:/GZ-DESI/images/mergers/*-cutout.fits')
progress_check = len(files) / len(df_merged)
print(f'Progress is {progress_check * 100}%.')

Progress is 100.0%.


In [19]:
# files

In [20]:
# results = Parallel(n_jobs=4)(delayed(get_fits)(i) for i in tqdm(zip(df_merged['ra'], df_merged['dec'],df_merged['id_str'])))

In [21]:
ddf = dd.from_pandas(df_red, npartitions = 8)

In [22]:
dask_series = ddf.apply(get_fits, axis = 1, meta = 'string')
ddf['im_paths'] = dask_series



In [23]:
with ProgressBar():
    df_merged_paths = ddf.compute()
df_merged_paths.head()

[########################################] | 100% Completed |  1hr 20min 45.7s


Unnamed: 0,id_str,ra,dec,im_paths
68402,49620_2679,56.881606,-58.227757,E:/GZ-DESI/images/mergers/49620_2679-cutout.fits
68403,51151_791,57.405297,-57.835407,E:/GZ-DESI/images/mergers/51151_791-cutout.fits
68404,447498_2823,9.277042,20.781403,E:/GZ-DESI/images/mergers/447498_2823-cutout.fits
68405,447498_2757,9.291057,20.781243,E:/GZ-DESI/images/mergers/447498_2757-cutout.fits
68406,448846_1830,9.191563,20.970002,E:/GZ-DESI/images/mergers/448846_1830-cutout.fits


In [None]:
# df_merged_paths = (
#     df_merged
#     .assign(fits_path = df_merged.progress_apply(lambda row: get_fits(row.ra, row.dec, row.id_str, 'mergers'), axis = 1))
# )