In [99]:
import pandas as pd
import sys
import time
from tqdm import tqdm
tqdm.pandas()

from astroquery.irsa import Irsa
from astropy.coordinates import SkyCoord
import astropy.units as u

In [2]:
folder = 'C:/Users/oryan/Documents/esac-project/data'

In [3]:
df = pd.read_csv(f'{folder}/interacting-catalogue.csv', index_col = 0)

Catalogue keyword = cosmos2015

### Test Calling IRSA

In [22]:
test_coords = (
    SkyCoord(
        ra = df.RA.iloc[0] * u.deg,
        dec = df.Dec.iloc[0] * u.deg,
        frame = 'fk5',
    )
)

In [24]:
table = Irsa.query_region(test_coords, catalog = 'cosmos2015', radius = 5 * u.arcsec)



### Finding a Valid Query

In [36]:
limits_cosmos = [150.11916667 - 2, 150.11916667 + 2, 2.20583333 - 2, 2.20583333 + 2.20583333]

I get everything from the Catalogue! The columns I want are:

    1. id - Running Object Number
    2. SSFR_BEST - The Best Fit log Specific SFR Using BC03 Templates. Taken at the minimum Chi Squared
    3. SSFR_MED_MAX68 - Upper limit on the 68% Confidence Interval
    4. SSFR_MED_MIN68 - Lower limit on the 68% confidence interval.
    5. SSFR_MED - log sSFR from BC03 best-fit template. median of the PDF
    6. SFR_BEST - log SFR from BC03 best-fit template. Taken at the minimum chi2
    7. SFR_MED_MAX68 -  upper limit, 68% confidence level
    8. SFR_MED_MIN68 -  lower limit, 68% confidence level
    9. SFR_MED -  log sSFR from BC03 best-fit template. median of the PDF
    10. MASS_BEST - 	 log Stellar mass from BC03 best-fit template
    11. MASS_MED_MAX68 - 	 upper limit, 68% confidence level
    12. MASS_MED_MIN68 -  lower limit, 68% confidence level
    13. MASS_MED - log Stellar mass from BC03 best-fit template. median of the PDF
    14. AGE -  BC03 age
    15. TYPE - 	 Type: 0 = galaxy, 1 = star, 2 = X-ray source
    16. ZPDF - 	 photo-z measured using the galaxy templates
    17. FLAG_DEEP - 1: Ultra-deep stripes, 0: deep stripes
    18. FLAG_SHALLOW - Shallow Flag
    19. l_r - 	 log(dust corr lum in erg/s/Hz) in r filter
    20. l_k - 	 log(dust corr lum in erg/s/Hz) in NUV filter
    21. l_nu -  log(dust corr lum in erg/s/Hz) in NUV filter
    22. dist - I have assumed this is the seperation from my coordinates to the catalogue ones.

In [105]:
def get_table(id_str,ra, dec, reg_limits):
    
    if ra < limits_cosmos[0] or ra > limits_cosmos[1] or dec < limits_cosmos[2] or dec > limits_cosmos[3]:
        return 'outwith_cosmos'
    
    coord = SkyCoord(ra = ra * u.deg, dec = dec * u.deg)
    
    table = Irsa.query_region(coord, catalog = 'cosmos2015', radius = 5 * u.arcsec)
    if len(table) == 0:
        return 'null'
    
    for attempt in range(5):
        try:
            table_df = table.to_pandas().sort_values('dist', ascending = True)
            break
        except:
            time.sleep(5)
    
    if attempt == 4:
        return 'Failed'
    
    table_red = table_df[[
        'id',
        'ssfr_best',
        'ssfr_med_max68',
        'ssfr_med_min68',
        'ssfr_med',
        'sfr_best',
        'sfr_med_max68',
        'sfr_med_min68',
        'sfr_med',
        'mass_best',
        'mass_med_max68',
        'mass_med_min68',
        'mass_med',
        'age',
        'type',
        'zpdf',
        'flag_deep',
        'flag_shallow',
        'l_r',
        'l_k',
        'l_nu',
        'dist',
    ]].iloc[0]
    
    table_dict = table_red.to_dict()
    
    time.sleep(0.01)
    
    return table_dict

In [106]:
df_red = df[['SourceID', 'RA', 'Dec']]

In [107]:
df_results = (
    df_red
    .assign(dict_results = df_red.progress_apply(lambda row: get_table(row.SourceID, row.RA, row.Dec, limits_cosmos), axis = 1))
)

100%|██████████| 21926/21926 [2:08:08<00:00,  2.85it/s]


In [113]:
df_in_cosmos = df_results.query('dict_results != "outwith_cosmos" and dict_results != "null"')

In [120]:
dict_cosmos = df_in_cosmos[['SourceID', 'dict_results']].set_index('SourceID').to_dict()['dict_results']

In [130]:
exp_df = pd.DataFrame.from_dict(dict_cosmos, orient = 'index').reset_index().rename(columns = {'index' : 'SourceID'})

In [131]:
exp_df

Unnamed: 0,SourceID,id,ssfr_best,ssfr_med_max68,ssfr_med_min68,ssfr_med,sfr_best,sfr_med_max68,sfr_med_min68,sfr_med,...,mass_med,age,type,zpdf,flag_deep,flag_shallow,l_r,l_k,l_nu,dist
0,4000705532455,590539.0,-8.847,-8.463,-8.865,-8.712,0.508,0.835,0.477,0.655,...,9.329,7.187010e+08,0.0,0.536,0.0,1.0,28.886,28.866,28.634,0.760531
1,4000705532984,610283.0,-10.040,-9.922,-10.065,-9.990,0.536,0.486,0.335,0.411,...,10.500,3.000000e+09,0.0,0.230,0.0,1.0,29.517,29.731,28.687,0.685933
2,4000705533312,621053.0,-10.258,-10.232,-10.368,-10.300,-0.058,-0.032,-0.168,-0.100,...,10.150,3.500000e+09,0.0,0.310,0.0,1.0,29.057,29.312,28.098,0.585371
3,4000705533383,617834.0,-9.445,-9.322,-9.470,-9.398,0.132,0.179,0.034,0.107,...,9.508,4.500000e+09,0.0,0.621,0.0,1.0,28.740,28.785,28.273,0.415082
4,4000705539435,880099.0,-9.392,-9.337,-9.489,-9.419,0.369,0.370,0.228,0.297,...,9.710,1.608980e+09,0.0,0.260,1.0,0.0,29.013,29.102,28.507,0.803465
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3804,6000536185363,892445.0,-9.499,-9.427,-9.568,-9.498,0.037,0.173,0.032,0.102,...,9.551,3.000000e+09,0.0,0.350,1.0,0.0,28.752,28.803,28.203,1.228642
3805,6000536185496,898134.0,-9.248,-8.910,-9.279,-9.164,0.670,0.918,0.557,0.683,...,9.902,2.300000e+09,0.0,0.868,1.0,0.0,29.262,29.261,28.826,1.358730
3806,6000536185585,904074.0,-8.617,-8.532,-8.670,-8.601,1.241,1.368,1.230,1.299,...,9.903,5.708840e+08,0.0,0.636,1.0,0.0,29.534,29.390,29.368,1.480435
3807,6000536185603,906057.0,-9.673,-9.632,-9.768,-9.700,1.555,1.667,1.530,1.598,...,11.249,3.500000e+09,2.0,0.920,1.0,0.0,30.359,30.443,29.727,1.535701


In [134]:
full_df = exp_df.merge(df, on = 'SourceID', how = 'left')

In [139]:
full_df_dedup = full_df.drop_duplicates('id', keep = 'first')

In [140]:
full_df_dedup.to_csv('C:/Users/oryan/Documents/esac-project/data/cosmos-matched-df.csv')