# Matching COSMOS 2020

Originally, I matched COSMOS with the 2015 catalogue. This had significantly worse measurements of redshift than using the 2020 catalogue with EAZY. Going to redo the matching based on this and sack using the 2015 catalgoue completely.

In [1]:
import pandas as pd
import sys
import time
import numpy as np
from tqdm import tqdm
tqdm.pandas()

from astropy.io import fits
from astropy.table import Table
from astropy.coordinates import SkyCoord
import astropy.units as u

from IPython.display import clear_output

In [2]:
with fits.open('E:/temporary-cosmos-big-data/COSMOS2020_CLASSIC_R1_v2.1_p3.fits.gz') as hdul:
    data = hdul[1].data
    header = hdul[1].header

In [3]:
folder = 'C:/Users/oryan/Documents/mergers-in-cosmos'
data_folder = f'{folder}/data'

In [4]:
df_tmp = pd.read_csv(f'{data_folder}/categorised_sources.csv', index_col = 0)

In [5]:
df_tmp

Unnamed: 0_level_0,SourceID,RA,Dec,category
col1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,4000704963129,187.983807,58.010524,stage4
1,4000704963271,187.980323,58.026481,stage1
2,4000704963422,187.938181,58.039949,stage1
3,4000705131622,217.216271,34.015278,stage2
4,4000705131630,217.207839,34.015576,stage4
...,...,...,...,...
21921,6000536185496,150.500839,2.662972,stage1
21922,6000536185585,150.487245,2.671166,stage3
21923,6000536185603,150.508543,2.672499,stage1
21924,6000536185717,150.486897,2.687377,stage4


### Finding the Ones in COSMOS

In [6]:
limits_cosmos = [150.11916667 - 2, 150.11916667 + 2, 2.20583333 - 2, 2.20583333 + 2.20583333]

In [7]:
def getting_cosmos_params(ra, dec, limits_cosmos, init_dict):
    export_dict = init_dict.copy()
    if ra < limits_cosmos[0] or ra > limits_cosmos[1] or dec < limits_cosmos[2] or dec > limits_cosmos[3]:
        return 'outwith_cosmos'
    
    record = data[(data['ALPHA_J2000'] < (ra + (30 / 3600))) & (data['ALPHA_J2000'] > (ra - (30 / 3500))) & (data['DELTA_J2000'] < (dec + (30 / 3600))) & (data['DELTA_J2000'] > (dec - (30 / 3600)))]
    
    table = Table(record)
    
    df = table.to_pandas()[['ID','ALPHA_J2000', 'DELTA_J2000', 'ez_z_phot', 'lp_type']]
    df = df.query('lp_type == 0').drop(columns = 'lp_type')
    
    if len(df) < 0.5:
        return 'null'
    
    df_sep = (
        df
        .assign(ang_sep = df.apply(lambda row: abs(SkyCoord(ra = row.ALPHA_J2000 * u.deg, dec = row.DELTA_J2000 * u.deg, frame = 'icrs').separation(SkyCoord(ra = ra * u.deg, dec = dec * u.deg, frame = 'icrs')).to(u.deg)) / u.deg, axis = 1))
    )
        
    prim_id = df_sep.sort_values(by = 'ang_sep', ascending = True)
    
    prim_gal_id = prim_id['ID'].iloc[0]
    z_prim = prim_id['ez_z_phot'].iloc[0]
    
    prim_galaxy_record = record[record['ID'] == prim_gal_id]
    
    sec_z = prim_id.assign(z_diff = prim_id.ez_z_phot.apply(lambda x: abs(x - z_prim)))
    
    sec_id_df = sec_z.sort_values(by = 'z_diff', ascending = True).query('z_diff != 0.0')
    
    sec_test = sec_id_df.query('z_diff < 0.05')
    if len(sec_test) < 0.5:
        for i in prim_galaxy_record.names:
            export_dict[f'{i}_1'] = prim_galaxy_record[i][0]
            export_dict[f'{i}_2'] = None
        return init_dict
    
    sec_gal_id = sec_test['ID'].iloc[0]
    
    sec_galaxy_record = record[record['ID'] == sec_gal_id]
    
    for i in prim_galaxy_record.names:
        export_dict[f'{i}_1'] = prim_galaxy_record[i][0]
        export_dict[f'{i}_2'] = sec_galaxy_record[i][0]
    
    return export_dict

In [8]:
init_dict = {}
for i in range(2):
    for j in data.names:
        init_dict[f'{j}_{i+1}'] = None

In [9]:
matched_dict = {}
for i in tqdm(range(len(df_tmp))):
    row = df_tmp.iloc[i]
    sourceid = row.SourceID
    ra = row.RA
    dec = row.Dec
    
    matched_dict[sourceid] = getting_cosmos_params(ra, dec, limits_cosmos, init_dict)

100%|██████████| 21518/21518 [1:35:25<00:00,  3.76it/s]  


In [10]:
df_found = pd.DataFrame.from_dict(matched_dict, orient = 'index').rename(columns = {0 : 'dict_result'})

In [11]:
df_red = df_found.query('dict_result != "outwith_cosmos" and dict_result != "null"')

In [12]:
dict_cosmos = df_red.to_dict()['dict_result']

In [13]:
exp_df = pd.DataFrame.from_dict(dict_cosmos, orient = 'index').reset_index().rename(columns = {'index' : 'SourceID'})

In [14]:
exp_df_dna = exp_df[~np.isnan(exp_df['ID_1'])]

In [16]:
exp_df_dna.to_csv('C:/Users/oryan/Documents/mergers-in-cosmos/data/catalogue-matched-cosmos-2020.csv')