In [1]:
import pandas as pd
import numpy as np
from IPython.display import clear_output

from astropy.io import fits
from astropy.table import Table
from astropy.cosmology import FlatLambdaCDM
from astropy.coordinates import SkyCoord
import astropy.units as u

import sys
import os
import glob
from tqdm import tqdm
tqdm.pandas()

In [2]:
folder = 'C:/Users/oryan/Documents/mergers-in-cosmos'
data_folder = f'{folder}/data'
results = f'{folder}/results'

drive_folder = 'E:/cosmos-data'

In [3]:
with fits.open(f'{drive_folder}/COSMOS2020_CLASSIC_R1_v2.1_p3.fits.gz') as hdul:
    data_rec = hdul[1].data

In [None]:
data_rec[~np.isnan(data_rec['HSC_r_MAG_ISO'])]

In [5]:
data_rec.columns

ColDefs(
    name = 'ID'; format = 'K'
    name = 'ALPHA_J2000'; format = 'D'; unit = 'deg'
    name = 'DELTA_J2000'; format = 'D'; unit = 'deg'
    name = 'X_IMAGE'; format = 'D'; unit = 'pix'
    name = 'Y_IMAGE'; format = 'D'; unit = 'pix'
    name = 'ERRX2_IMAGE'; format = 'D'; unit = 'pix**2'
    name = 'ERRY2_IMAGE'; format = 'D'; unit = 'pix**2'
    name = 'ERRXY_IMAGE'; format = 'D'; unit = 'pix**2'
    name = 'FLUX_RADIUS'; format = 'E'; unit = 'pix'
    name = 'KRON_RADIUS'; format = 'E'
    name = 'FLAG_HSC'; format = 'I'
    name = 'FLAG_SUPCAM'; format = 'I'
    name = 'FLAG_UVISTA'; format = 'I'
    name = 'FLAG_UDEEP'; format = 'I'
    name = 'FLAG_COMBINED'; format = 'I'
    name = 'EBV_MW'; format = 'D'
    name = 'CFHT_u_FLUX_APER2'; format = 'E'; unit = 'uJy'
    name = 'CFHT_u_FLUXERR_APER2'; format = 'E'; unit = 'uJy'
    name = 'CFHT_u_FLUX_APER3'; format = 'E'; unit = 'uJy'
    name = 'CFHT_u_FLUXERR_APER3'; format = 'E'; unit = 'uJy'
    name = 'CFHT_u_FLUX_AUTO

In [4]:
data_rec = data_rec[data_rec['lp_type'] == 0]

In [8]:
data_tab = Table(data_rec)

In [17]:
data = data_tab['ID', 'ALPHA_J2000', 'DELTA_J2000', 'ez_z_phot']

NameError: name 'data_tab' is not defined

In [10]:
del data_tab

In [11]:
del data_rec

In [None]:
ids = list(data['ID'])

In [45]:
def calc_sep(ra1, dec1, ra2, dec2, conversion):
    
    c1 = SkyCoord(ra = ra1 * u.deg, dec = dec1 * u.deg, frame = 'fk5')
    c2 = SkyCoord(ra = ra2 * u.deg, dec = dec2 * u.deg, frame = 'fk5')
    
    ang_sep = c1.separation(c2).to(u.arcmin)
        
    proj_sep = ang_sep * conversion
    
    return float(proj_sep.to(u.Mpc) / u.Mpc)

In [64]:
ang = 0.1
cosmo = FlatLambdaCDM(H0=70, Om0=0.3, Tcmb0=2.725)
results_dict = {}
counter = 0

for i in tqdm(ids):
    tmp_dict = {'IDs': [], 'N_1': np.nan, 'N_2': np.nan, 'N_3': np.nan, 'N_4': np.nan, 'N_5': np.nan}
    entry = data[data['ID'] == i]
    if np.isnan(entry['ALPHA_J2000']) or np.isnan(entry['DELTA_J2000']) or np.isnan(entry['ez_z_phot']):
        continue
    
    ra = entry['ALPHA_J2000']
    dec = entry['DELTA_J2000']
    z_phot = entry['ez_z_phot']
    conversion = cosmo.kpc_proper_per_arcmin(z_phot)

    
    record = data[(data['ALPHA_J2000'] < (ra + ang)) & (data['ALPHA_J2000'] > (ra - ang)) & (data['DELTA_J2000'] < (dec + ang)) & (data['DELTA_J2000'] > (dec - ang))]
    record = record[record['ID'] != i]
    
    record = record[(record['ez_z_phot'] > z_phot - 0.005) & (record['ez_z_phot'] < z_phot + 0.005)]
    
    record_df = record.to_pandas()
    
    if len(record_df) == 0.0:
        results_dict[i] = tmp_dict
        continue
    
    record_df = (
        record_df
        .assign(seperations = record_df.apply(lambda row: calc_sep(ra, dec, row.ALPHA_J2000, row.DELTA_J2000, conversion), axis = 1))
    )
    
    record_df = record_df.sort_values('seperations', ascending = True)
    
    record_df = record_df[:5][['ID', 'seperations']]
        
    for j in range(len(record_df)):
        tmp_dict['IDs'].append(record_df.ID.iloc[j])
        tmp_dict[f'N_{j+1}'] = j+1 / (np.pi * (record_df.seperations.iloc[j])**2)
        
    results_dict[i] = tmp_dict
    
    if len(results_dict) > 50000:
        results_df = pd.DataFrame.from_dict(results_dict, orient = 'index').reset_index().rename(columns = {'index' : 'ID'})
        
        results_df.to_csv(f'{results}/full-sample-{counter * 50}-{(counter + 1) * 50}.csv')
        
        counter += 1
        results_dict = {}
        del results_df

  0%|          | 51/1677646 [00:04<43:31:43, 10.71it/s]


SystemExit: 

In [None]:
csv_files = glob.glob(f'{results}/full-sample-*-*.csv')

In [None]:
for counter, i in enumerate(csv_files):
    if counter == 0:
        df = pd.read_csv(i, index_col = 0)
        continue
    df_tmp = pd.read_csv(i, index_col = 0)
    df = pd.concat([df, df_tmp])
    
df.to_csv(f'{results}/full-sample-all.csv')