In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import glob
import sys
from tqdm import tqdm
tqdm.pandas()
import cv2 as cv

from astropy.coordinates import SkyCoord
import astropy.units as u
from astropy.cosmology import FlatLambdaCDM
from astropy.io import fits
from astropy.table import Table

import numpy as np
from scipy import special
from scipy.stats import kstwobign, anderson
import scipy.stats.distributions as dist


plt.rcParams['font.size'] = 14

## Importing the Data

In [2]:
combine_fold = 'C:/Users/oryan/Documents/mergers-in-cosmos/cats-to-combine'
data_folder = 'C:/Users/oryan/Documents/mergers-in-cosmos/back-to-basics-data'
fig_folder = 'C:/Users/oryan/Documents/mergers-in-cosmos/paper-source/figures'
cos_dat_fold = 'E:/cosmos-data'

In [3]:
df_agn_sample = pd.read_csv(f'{combine_fold}/source-agn-cat.csv', index_col = 0)

In [4]:
with fits.open(f'{cos_dat_fold}/COSMOS2020_CLASSIC_R1_v2.1_p3.fits.gz') as hdul:
    data = hdul[1].data

In [5]:
agn_cat = pd.read_csv(f'{combine_fold}/source-agn-cat.csv', index_col = 0)

In [6]:
df_cat = pd.read_csv(f'{combine_fold}/dor-all-restaged-full.csv', index_col = 0)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [7]:
df_cat.head()

Unnamed: 0,ORyanID_23,ID,category,ALPHA_J2000,DELTA_J2000,X_IMAGE,Y_IMAGE,ERRX2_IMAGE,ERRY2_IMAGE,ERRXY_IMAGE,...,ez_ssfr_p025,ez_ssfr_p160,ez_ssfr_p500,ez_ssfr_p840,ez_ssfr_p975,ez_Av_p025,ez_Av_p160,ez_Av_p500,ez_Av_p840,ez_Av_p975
0,4000705532455,816891,stage4,150.679846,2.196543,9200.399414,21737.232422,5.2e-05,5.9e-05,-1e-05,...,-8.931825,-8.911355,-8.899668,-8.883307,-8.87299,0.529107,0.560142,0.599624,0.63092,0.65233
1,4000705533383,860686,stage3,150.644522,2.237226,10047.978516,22713.351562,5.1e-05,4.6e-05,2e-06,...,-9.314209,-9.106611,-8.941274,-8.713107,-8.541205,0.34127,0.545498,0.732327,0.906665,1.020098
2,4000705539941,1301188,stage2,149.702462,2.654895,32637.949219,32737.013672,6.4e-05,0.000104,-3e-06,...,-9.794128,-9.784252,-9.766013,-9.743796,-9.727879,0.018737,0.018737,0.018737,0.018737,0.018737
3,4000705540034,1304538,stage3,149.726828,2.658172,32053.712891,32815.46875,7.1e-05,8.6e-05,-6e-06,...,-9.480532,-9.443615,-9.402433,-9.352081,-9.325659,0.302629,0.376027,0.440394,0.508153,0.567727
4,4000705540064,1310001,stage3,149.70123,2.660343,32667.441406,32867.789062,2.6e-05,2e-05,-1e-05,...,-9.882174,-9.855921,-9.816085,-9.697225,-9.573506,0.455258,0.482555,0.522239,0.600383,0.660279


In [8]:
agn_cat.head()

Unnamed: 0,ID,agn_clsf
0,765389,SFG
1,715417,HLAGN
2,1485262,SFG
3,998663,HLAGN
0,888234,AGN


In [9]:
len(agn_cat)

1362

In [10]:
df_cat_red = df_cat[['ORyanID_23', 'ALPHA_J2000', 'DELTA_J2000' , 'lp_mass_best', 'ez_sfr', 'ID', 'category', 'ez_z_phot', 'lp_zBEST']]

In [11]:
df_merg = (
    agn_cat
    .merge(df_cat_red, on = 'ID', how = 'left')
)

In [12]:
df_merg

Unnamed: 0,ID,agn_clsf,ORyanID_23,ALPHA_J2000,DELTA_J2000,lp_mass_best,ez_sfr,category,ez_z_phot,lp_zBEST
0,765389,SFG,4001118546106_extra,149.842572,2.144862,10.34840,0.548020,stage1,0.355458,0.3538
1,715417,HLAGN,4001092092552,149.896032,2.107181,8.97183,0.225094,stage2,1.093160,1.0968
2,1485262,SFG,4000755601428,150.299900,2.836496,8.81727,0.200669,stage1,0.782980,
3,998663,HLAGN,4000940892426,150.558353,2.362698,9.55510,1.317938,stage2,0.929886,
4,888234,AGN,4001158041630,149.998418,2.260853,10.25861,0.410131,stage2,0.624140,0.6364
...,...,...,...,...,...,...,...,...,...,...
1357,500953,AGN,4000987774551,149.627155,1.901416,9.24133,0.316888,stage3,0.668197,0.6591
1358,410793,AGN,4000897806754,149.761901,1.811414,9.83359,1.661041,stage3,1.264383,1.0009
1359,467138,AGN,4001044268219,149.813590,1.865208,9.45527,0.258185,stage4,0.420318,0.4209
1360,241559,SFG,4001120874958,149.764582,1.644018,9.08612,-0.171674,stage4,0.381223,


## Finding Control Galaxies

In [13]:
def picking_z(ez_z, lp_z):
    if np.isnan(ez_z):
        if not np.isnan(lp_z):
            return lp_z
        else:
            return np.nan
    
    if np.isnan(lp_z):
        if not np.isnan(ez_z):
            return ez_z
        else:
            return np.nan
    
    if lp_z < ez_z:
        return lp_z
    elif ez_z <= lp_z:
        return ez_z
    else:
        return 'what'

In [14]:
def find_controls(id_str, lp_mass, ra, dec, ez_z_phot, lp_zBEST, cat_ids, control_ids, data):
    data_red = data[(data['lp_mass_best'] >= lp_mass - 0.01) & (data['lp_mass_best'] <= lp_mass + 0.01)]
    
    if len(data_red) < 0.5:
        return np.nan
    
    data_df = (Table(data_red)).to_pandas()
    
    data_red = data_df[['ID', 'ALPHA_J2000', 'DELTA_J2000', 'ez_z_phot', 'lp_zBEST']]
    
    z = picking_z(ez_z_phot, lp_zBEST)
    
    if np.isnan(z):
        print('Totally NaN redshift!')
        sys.exit()
    
    data_z = (
        data_red
        .assign(redshift = data_red.apply(lambda row: picking_z(row.ez_z_phot, row.lp_zBEST), axis = 1))
        .drop(columns = ['ez_z_phot', 'lp_zBEST'])
    )
    
    data_dna = data_z.dropna()
    
    if len(data_dna) < 0.5:
        return np.nan
    
    min_z = z - 0.001
    max_z = z + 0.001
    data_z_red = data_dna.query('redshift >= @min_z and redshift <= @max_z')
    
    controls = list(data_z_red.ID)
            
    if len(controls) > 0.5:
        return controls
    else:
        return np.nan

In [15]:
agn_dict = df_cat_red.set_index('ID').to_dict(orient = 'index')

In [16]:
control_sample = {}
control_ids = []

known_ids = list(df_merg['ID'])

In [21]:
done_ids = list(control_sample.keys())
for i in tqdm(list(agn_dict.keys())):
    if i in done_ids:
        continue
    
    control_sample[i] = find_controls(i, agn_dict[i]['lp_mass_best'], agn_dict[i]['ALPHA_J2000'], agn_dict[i]['DELTA_J2000'], agn_dict[i]['ez_z_phot'], agn_dict[i]['lp_zBEST'], known_ids, control_ids, data)
    control_ids.append(control_sample[i])

100%|██████████| 4181/4181 [1:54:37<00:00,  1.64s/it]  


In [18]:
df_agn_control = pd.DataFrame.from_dict(control_sample, orient = 'index').reset_index().rename(columns = {'index':'ID', 0 : 'control_ID'})
df_agn_control

TypeError: object of type 'float' has no len()

In [22]:
with fits.open(f'{cos_dat_fold}/chandra_COSMOS_legacy_opt_NIR_counterparts_20160113_4d.fits') as hdul:
    chandra_cat = hdul[1].data

In [23]:
with fits.open(f'{cos_dat_fold}/VLA_3GHz_counterpart_array_20170210_paper_delvecchio_et_al.fits') as hdul:
    vla_cat = hdul[1].data

In [24]:
with fits.open(f'{cos_dat_fold}/VLA_3GHz_counterpart_array_20170210_paper_smolcic_et_al.fits') as hdul:
    smol_rec = hdul[1].data

In [31]:
def checking_agn_control(control_id, data, smol_cat, vla_cat, chandra_cat):
    
    if np.isnan(control_id):
        return np.nan
    
    entry = data[data['ID'] == int(control_id)]
    ra = entry['ALPHA_J2000']
    dec = entry['DELTA_J2000']
    
    arcsec = 15 / (60 * 60)
    
    smol_entry = smol_cat[(smol_cat['RA_VLA_J2000'] >= ra - arcsec) & (smol_cat['RA_VLA_J2000'] <= ra + arcsec) & (smol_cat['DEC_VLA_J2000'] >= dec - arcsec) & (smol_cat['DEC_VLA_J2000'] <= dec + arcsec)]
    
    if len(smol_entry) > 0.5:
        clsfs = [smol_entry['SFG'][0], smol_entry['Clean_SFG'][0], smol_entry['Radio_excess'][0]]

        if clsfs[0] == 'true' and clsfs[1] == 'true':
            return 'SFG'
        elif clsfs[0] == 'false' and clsfs[1] == 'false':
            return 'AGN'
        elif clsfs[0] == 'true' and clsfs[1] == 'false':
            return 'SFG'
        elif clsfs[0] == 'false' and clsfs[1] == 'true':
            print('Something wrong: 1')
            sys.exit()
        elif clsfs[3] == 'true':
            return 'Radio_Excess'
    
    vla_entry = vla_cat[(vla_cat['RA_VLA3'] >= ra - arcsec) & (vla_cat['RA_VLA3'] <= ra + arcsec) & (vla_cat['DEC_VLA3'] >= dec - arcsec) & (vla_cat['DEC_VLA3'] <= dec + arcsec)]
    
    if len(vla_entry) > 0.5:
        ans = vla_entry['CLASS'][0]
        if len(ans) < 0.5:
            return 'SFG'
        else:
            return ans
    
    chandra_entry = chandra_cat[(chandra_cat['RA_x'] >= ra - arcsec) & (chandra_cat['RA_x'] <= ra + arcsec) & (chandra_cat['DEC_x'] >= dec - arcsec) & (chandra_cat['DEC_x'] <= dec + arcsec)]
    
    if len(chandra_entry) > 0.5:
        hard_lum = chandra_entry['Lx_210'][0]
        if hard_lum < 42:
            return 'SFG'
        elif hard_lum >= 42:
            return 'AGN'
    
    return 'unknown'

In [32]:
control_clsf = {}
checked_id = []

In [40]:
for i in tqdm(list(control_sample.keys())):
    
    if i in checked_id:
        continue
                
    checked_id.append(i)
    potential_candidates = control_sample[i]
    
    if type(potential_candidates) == float:
        control_clsf[i] = np.nan
        continue
    
    for j in potential_candidates:
        clsf = checking_agn_control(j, data, smol_rec, vla_cat, chandra_cat)
        if clsf == 'unknown':
            continue
        else:
            control_clsf[i] = clsf
            break
    try:
        tmp = control_clsf[i]
    except:
        control_clsf[i] = np.nan

100%|██████████| 4181/4181 [1:48:22<00:00,  1.56s/it]  


In [41]:
df_agn_control = pd.DataFrame.from_dict(control_clsf, orient = 'index').reset_index().rename(columns = {'index':'ID', 0 : 'control_ID'})
df_agn_control

Unnamed: 0,ID,control_ID
0,860686,
1,1301188,AGN
2,1304538,SFG
3,1310001,
4,1308927,SFG
...,...,...
4173,1520733,
4174,1520749,SFG
4175,1523385,SFG
4176,1524909,SFG


In [50]:
df_agn_control.drop_duplicates(subset = 'ID', keep = 'first').control_ID.value_counts()

SFG    1972
AGN    1426
Name: control_ID, dtype: int64

In [48]:
df_agn_control.to_csv(f'{data_folder}/agn-control-sample.csv')

### Plotting the Control AGN Fraction

In [54]:
df_agn_control.control_ID.value_counts()['AGN'] / len(df_agn_control)

0.34131163235998085

In [52]:
pd.read_csv(f'{data_folder}/agn-control-sample.csv', index_col = 0)

Unnamed: 0,ID,control_ID
0,860686,
1,1301188,AGN
2,1304538,SFG
3,1310001,
4,1308927,SFG
...,...,...
4173,1520733,
4174,1520749,SFG
4175,1523385,SFG
4176,1524909,SFG
