In [1]:
import pandas as pd
import sys
import time
from tqdm import tqdm
tqdm.pandas()

from astroquery.irsa import Irsa
from astropy.coordinates import SkyCoord
import astropy.units as u

  import sys


In [2]:
folder = 'E:/GZ-DESI/data'

In [3]:
df_int = pd.read_parquet(f'{folder}/2023-03-15-cats/definitive-merged-interesting-params.parquet', engine = 'pyarrow')

Catalogue keyword = cosmos2015

### Finding a Valid Query

In [32]:
limits_cosmos = [150.11916667 - 2, 150.11916667 + 2, 2.20583333 - 2, 2.20583333 + 2.20583333]

I get everything from the Catalogue! The columns I want are:

    1. id - Running Object Number
    2. SSFR_BEST - The Best Fit log Specific SFR Using BC03 Templates. Taken at the minimum Chi Squared
    3. SSFR_MED_MAX68 - Upper limit on the 68% Confidence Interval
    4. SSFR_MED_MIN68 - Lower limit on the 68% confidence interval.
    5. SSFR_MED - log sSFR from BC03 best-fit template. median of the PDF
    6. SFR_BEST - log SFR from BC03 best-fit template. Taken at the minimum chi2
    7. SFR_MED_MAX68 -  upper limit, 68% confidence level
    8. SFR_MED_MIN68 -  lower limit, 68% confidence level
    9. SFR_MED -  log sSFR from BC03 best-fit template. median of the PDF
    10. MASS_BEST - 	 log Stellar mass from BC03 best-fit template
    11. MASS_MED_MAX68 - 	 upper limit, 68% confidence level
    12. MASS_MED_MIN68 -  lower limit, 68% confidence level
    13. MASS_MED - log Stellar mass from BC03 best-fit template. median of the PDF
    14. AGE -  BC03 age
    15. TYPE - 	 Type: 0 = galaxy, 1 = star, 2 = X-ray source
    16. ZPDF - 	 photo-z measured using the galaxy templates
    17. FLAG_DEEP - 1: Ultra-deep stripes, 0: deep stripes
    18. FLAG_SHALLOW - Shallow Flag
    19. l_r - 	 log(dust corr lum in erg/s/Hz) in r filter
    20. l_k - 	 log(dust corr lum in erg/s/Hz) in NUV filter
    21. l_nu -  log(dust corr lum in erg/s/Hz) in NUV filter
    22. dist - I have assumed this is the seperation from my coordinates to the catalogue ones.

In [34]:
def get_table(ra, dec, reg_limits):
    
    if ra < limits_cosmos[0] or ra > limits_cosmos[1] or dec < limits_cosmos[2] or dec > limits_cosmos[3]:
        return 'outwith_cosmos'
    
    coord = SkyCoord(ra = ra * u.deg, dec = dec * u.deg)
    
    table = Irsa.query_region(coord, catalog = 'cosmos2015', radius = 5 * u.arcsec)
    if len(table) == 0:
        return 'null'
    
    for attempt in range(5):
        try:
            table_df = table.to_pandas().sort_values('dist', ascending = True)
            break
        except:
            time.sleep(5)
    
    if attempt == 4:
        return 'Failed'
    
    table_red = table_df[[
        'id',
        'ssfr_best',
        'ssfr_med_max68',
        'ssfr_med_min68',
        'ssfr_med',
        'sfr_best',
        'sfr_med_max68',
        'sfr_med_min68',
        'sfr_med',
        'mass_best',
        'mass_med_max68',
        'mass_med_min68',
        'mass_med',
        'age',
        'type',
        'zpdf',
        'flag_deep',
        'flag_shallow',
        'l_r',
        'l_k',
        'l_nu',
        'dist',
    ]].iloc[0]
    
    table_dict = table_red.to_dict()
    
    time.sleep(0.01)
    
    return table_dict

In [25]:
df_red = df_int[['id_str', 'ra', 'dec', 'category']]

In [26]:
df_dict = {}
for i in tqdm(range(len(df_red))):
    df_dict[df_red.id_str.iloc[i]] = {'ra' : df_red.ra.iloc[i], 'dec' : df_red.dec.iloc[i]}

100%|██████████| 197139/197139 [00:05<00:00, 33645.18it/s]


In [30]:
df_dict['390393_113']['ra']

315.6707291645635

In [40]:
results_dict = {key: get_table(df_dict[key]['ra'], df_dict[key]['dec'], limits_cosmos) for key in tqdm(list(df_dict.keys()))}

 91%|█████████ | 179423/197139 [05:05<00:30, 587.04it/s] 
  0%|          | 0/197139 [05:27<?, ?it/s]
100%|██████████| 197139/197139 [07:51<00:00, 417.93it/s] 


In [44]:
results_df = pd.DataFrame.from_dict(results_dict, orient = 'index').reset_index().rename(columns = {'index': 'id_str', 0 : 'matched'})

In [48]:
df_in_cosmos = results_df.query('matched != "outwith_cosmos" and matched != "null"')

In [50]:
dict_cosmos = df_in_cosmos[['id_str', 'matched']].set_index('id_str').to_dict()['matched']

In [53]:
exp_df = pd.DataFrame.from_dict(dict_cosmos, orient = 'index').reset_index().rename(columns = {'index' : 'id_str'})

In [54]:
exp_df

Unnamed: 0,id_str,id,ssfr_best,ssfr_med_max68,ssfr_med_min68,ssfr_med,sfr_best,sfr_med_max68,sfr_med_min68,sfr_med,...,mass_med,age,type,zpdf,flag_deep,flag_shallow,l_r,l_k,l_nu,dist
0,341048_283,235022.0,-42.46,-12.532,-12.668,-12.6,-31.311,-1.432,-1.568,-1.5,...,11.15,8000000000.0,0.0,0.21,0.0,0.0,29.53,29.933,26.985,0.153464
1,341048_405,245922.0,-21.221,-13.032,-13.168,-13.1,-10.03,-1.832,-1.968,-1.9,...,11.15,9000000000.0,0.0,0.22,0.0,0.0,29.54,29.956,26.99,0.185488
2,341048_403,246058.0,-19.776,-12.532,-12.668,-12.6,-8.535,-1.332,-1.468,-1.4,...,11.25,8000000000.0,0.0,0.21,0.0,0.0,29.63,30.03,27.087,0.151886
3,342489_2385,442649.0,-12.053,-12.032,-12.168,-12.1,-0.685,-0.632,-0.768,-0.7,...,11.35,10000000000.0,2.0,0.302,1.0,0.0,29.853,30.15,27.895,0.601622
4,345365_4079,837900.0,0.0,,,,,,,,...,,,-9.0,,0.0,0.0,,,,0.030736
5,345366_3828,843301.0,0.0,,,,,,,,...,,,1.0,,0.0,1.0,,,,0.031042
6,343927_4621,678588.0,-13.286,-13.232,-13.368,-13.3,-2.488,-2.432,-2.568,-2.5,...,10.8,3500000000.0,0.0,0.33,1.0,0.0,29.49,29.835,27.227,0.057584
7,346805_3892,1018742.0,0.0,,,,,,,,...,,,0.0,,0.0,0.0,,,,0.07149
8,346805_3893,1026511.0,0.0,,,,,,,,...,,,0.0,,0.0,0.0,,,,0.062083
9,348245_485,1058803.0,-31.613,-16.175,-16.944,-16.826,-20.484,-5.075,-5.744,-5.626,...,11.141,5500000000.0,0.0,0.33,0.0,0.0,29.628,29.995,27.144,0.032961


In [64]:
full_df = exp_df.merge(df_int, on = 'id_str', how = 'left')

In [65]:
len(full_df)

21

In [59]:
full_df_dedup = full_df.drop_duplicates('id', keep = 'first')

In [60]:
len(full_df_dedup)

21

In [61]:
full_df_dedup.to_csv('C:/Users/oryan/Documents/mergers_in_desi/data/desi-cosmos-matched-df.csv')