In [1]:
import pandas as pd
import numpy as np

from tqdm import tqdm

import sys

from astropy.io import fits
from astropy.table import Table
from astropy.cosmology import FlatLambdaCDM
from astropy.coordinates import SkyCoord
import astropy.units as u

In [2]:
data_folder = 'C:/Users/oryan/Documents/mergers-in-cosmos/back-to-basics-data'
folder = 'C:/Users/oryan/Documents/mergers-in-cosmos'
student_folder = 'C:/Users/oryan/Documents/mergers-in-cosmos/followup-data'
results_folder = 'C:/Users/oryan/Documents/mergers-in-cosmos/back-to-basics-results'
cosmos_folder = 'E:/cosmos-data'

### Getting Cosmos Data

In [3]:
df = pd.read_csv(f'{data_folder}/cosmos-primary-data.csv', index_col = 0)

### Filtering Redshift

In [4]:
df_z = df[['ID', 'ALPHA_J2000', 'DELTA_J2000', 'ez_z_phot', 'lp_zBEST']]
df_z.head()

Unnamed: 0,ID,ALPHA_J2000,DELTA_J2000,ez_z_phot,lp_zBEST
4000705532455,816891,150.679846,2.196543,0.519525,0.5304
4000705532984,857121,150.673667,2.226291,0.219858,0.2235
4000705533312,873195,150.668102,2.242849,0.346519,0.3024
4000705533383,861738,150.645118,2.237538,0.619803,0.6224
4000705539529,1284864,149.686223,2.637412,0.705821,0.6984


In [5]:
def picking_z(ez_z, lp_z):
    if np.isnan(ez_z):
        if not np.isnan(lp_z):
            return lp_z
        else:
            return np.nan
    
    if np.isnan(lp_z):
        if not np.isnan(ez_z):
            return ez_z
        else:
            return np.nan
    
    if lp_z < ez_z:
        return lp_z
    elif ez_z <= lp_z:
        return ez_z
    else:
        return 'what'

In [6]:
df_chosen_z = (
    df_z
    .assign(redshift = df_z.apply(lambda row: picking_z(row.ez_z_phot, row.lp_zBEST), axis = 1))
)

In [7]:
df_z_filtered = df_chosen_z[['ID', 'ALPHA_J2000', 'DELTA_J2000', 'redshift']].dropna().query('redshift <= 1.5').drop_duplicates('ID').reset_index().rename(columns = {'index':'SourceID'})

In [8]:
df_z_filtered

Unnamed: 0,SourceID,ID,ALPHA_J2000,DELTA_J2000,redshift
0,4000705532455,816891,150.679846,2.196543,0.519525
1,4000705532984,857121,150.673667,2.226291,0.219858
2,4000705533312,873195,150.668102,2.242849,0.302400
3,4000705533383,861738,150.645118,2.237538,0.619803
4,4000705539529,1284864,149.686223,2.637412,0.698400
...,...,...,...,...,...
3530,6000534269373,1193387,150.588619,2.546616,0.883769
3531,6000535064806,1041748,150.435644,2.404769,1.091646
3532,6000535781610,1231637,150.428931,2.588176,0.498800
3533,6000535864481,1042682,150.505365,2.407106,1.440600


### Importing Data

In [9]:
with fits.open(f'{cosmos_folder}/COSMOS2020_CLASSIC_R1_v2.1_p3.fits.gz') as hdul:
    data = hdul[1].data

### Finding the Secondaries

In [10]:
cosmo = FlatLambdaCDM(H0=70 * u.km / u.s / u.Mpc, Tcmb0=2.725 * u.K, Om0=0.3)

In [11]:
global data, cosmo

In [27]:
sec_dict = {}

In [28]:
def getting_secondary(ra, dec, z, prim_id):
    record = data[(data['ALPHA_J2000'] > ra - 0.1) & (data['ALPHA_J2000'] < ra + 0.1) & (data['DELTA_J2000'] > dec - 0.1) & (data['DELTA_J2000'] < dec + 0.1)]
    
    df = Table(record).to_pandas()[['ID', 'ALPHA_J2000', 'DELTA_J2000', 'ez_z_phot', 'lp_zBEST']]
    
    df_z = (
        df
        .assign(redshift = df.apply(lambda row: picking_z(row.ez_z_phot, row.lp_zBEST), axis = 1))
        .drop(columns = ['ez_z_phot', 'lp_zBEST'])
        .dropna()
        .query('ID != @prim_id')
    )
    
    df_diff = (
        df_z
        .assign(del_z = df_z.redshift.apply(lambda x: abs((z - x)*2.998e5)))
    )
    
    df_red = df_diff.query('del_z <= 1000')
    
    if len(df_red) == 0.0:
        return np.nan
    
    if len(df_red) > 1:
        c1 = SkyCoord(ra = ra * u.deg, dec = dec * u.deg, frame = 'fk5', distance = cosmo.comoving_distance(z))
        df_sep = (
            df_red
            .assign(separations = df_red.apply(lambda row: np.float64((c1.separation_3d(SkyCoord(ra = row.ALPHA_J2000 * u.deg, dec = row.DELTA_J2000 * u.deg, frame = 'fk5', distance = cosmo.comoving_distance(row.redshift)))).to(u.kpc) / (u.kpc)), axis = 1))
        )
        df_sep = df_sep.query('separations <= 750')
        if len(df_sep) == 0.0:
            return np.nan
        df_red = df_sep.sort_values('separations', ascending = True)
    else:
        df_sep = (
            df_red
            .assign(separations = df_red.apply(lambda row: np.float64((c1.separation_3d(SkyCoord(ra = row.ALPHA_J2000 * u.deg, dec = row.DELTA_J2000 * u.deg, frame = 'fk5', distance = cosmo.comoving_distance(row.redshift)))).to(u.kpc) / (u.kpc)), axis = 1))
        )
        df_red = df_sep.query('separations <= 750')
        if len(df_red) == 0.0:
            return np.nan
    
    id_sec = df_red.ID.iloc[0]
    
    return id_sec

In [29]:
done_ids = list(sec_dict.keys())
for i in tqdm(list(df_z_filtered.SourceID)):
    row = df_z_filtered.query('SourceID == @i')
    ra = row.ALPHA_J2000.iloc[0]
    dec = row.DELTA_J2000.iloc[0]
    z = row.redshift.iloc[0]
    prim_id = row.ID.iloc[0]
    
    sec_dict[i] = getting_secondary(ra, dec, z, prim_id)

100%|██████████| 3535/3535 [2:48:04<00:00,  2.85s/it]  


In [31]:
df_secs = pd.DataFrame.from_dict(sec_dict, orient = 'index')

In [32]:
df_secs.dropna()

Unnamed: 0,0
4000705532984,846770.0
4000705539529,1298521.0
4000707045178,271755.0
4000707047768,260900.0
4000707048117,252331.0
...,...
6000435703307,863206.0
6000443709301,987251.0
6000513404870,826072.0
6000516624203,821410.0


In [33]:
# df_secs.to_csv(f'{data_folder}/secondary-ids-redone-2.csv')

In [34]:
df_secs.rename(columns = {0:'sec_id'}).dropna().drop_duplicates('sec_id').astype(np.int64).to_csv(f'{data_folder}/secondary-ids-redone-2.csv')

In [37]:
tmp_df = pd.read_csv(f'{data_folder}/secondary-ids-redone-2.csv', index_col = 0)

In [38]:
tmp_df

Unnamed: 0,sec_id
4000705532984,846770
4000705539529,1298521
4000707045178,271755
4000707047768,260900
4000707048117,252331
...,...
6000435703307,863206
6000443709301,987251
6000513404870,826072
6000516624203,821410
