In [1]:
import pandas as pd
import numpy as np

from astropy.io import fits
from astropy.table import Table
from astropy.coordinates import SkyCoord
import astropy.units as u

import sys
from tqdm import tqdm

### Getting Results

In [2]:
data_folder = 'C:/Users/oryan/Documents/mergers-in-cosmos/back-to-basics-data'
folder = 'C:/Users/oryan/Documents/mergers-in-cosmos'
student_folder = 'C:/Users/oryan/Documents/mergers-in-cosmos/followup-data'
results_folder = 'C:/Users/oryan/Documents/mergers-in-cosmos/back-to-basics-results'
cosmos_folder = 'E:/cosmos-data'

### Getting NaN Values

In [3]:
df = pd.read_csv(f'{data_folder}/cosmos-2020ids-nans.csv', index_col = 0).reset_index().rename(columns = {'index':'SourceID'})

In [4]:
df_nans = df.assign(isnan = df.cosmos_2020ids.apply(lambda x: 'y' if np.isnan(x) else 'n'))

In [5]:
df_red = df_nans.query('isnan == "y"').drop(columns = 'isnan')

### Importing Student Data

In [6]:
df_stu = pd.read_csv(f'{student_folder}/group_cosmos_with_main_cosmos_matches_OUT1.csv')[['SourceID', 'ra_1', 'dec_1']]
df_stu

Unnamed: 0,SourceID,ra_1,dec_1
0,4000705532455,150.679866,2.196563
1,4000705532984,150.673687,2.226308
2,4000705533312,150.668120,2.242867
3,4000705533383,150.645145,2.237538
4,4000705539435,149.700540,2.634988
...,...,...,...
3798,6000536185363,150.499015,2.652113
3799,6000536185496,150.501189,2.663116
3800,6000536185585,150.487626,2.671314
3801,6000536185603,150.508960,2.672579


In [7]:
df_merge = df_red.merge(df_stu, on = 'SourceID', how = 'left').drop(columns = 'cosmos_2020ids')

In [8]:
df_merge

Unnamed: 0,SourceID,ra_1,dec_1
0,4000705539435,149.700540,2.634988
1,4000705539677,149.732807,2.643731
2,4000715723636,150.381335,2.290568
3,4000715723843,150.378633,2.293920
4,4000719090236,149.891444,2.146811
...,...,...,...
189,6000535121007,149.712670,2.359003
190,6000535781610,150.428893,2.585682
191,6000535864481,150.506115,2.405708
192,6000535892106,150.126982,2.648765


### Getting COSMOS2020 Data

In [9]:
with fits.open(f'{cosmos_folder}/COSMOS2020_CLASSIC_R1_v2.1_p3.fits.gz') as hdul:
    data = hdul[1].data

In [10]:
global data

In [11]:
def get_closest(ra,dec):
    
    table = data[(data['ALPHA_J2000'] > ra - (10 / (60*60))) & (data['ALPHA_J2000'] < ra + (10 / (60*60))) & (data['DELTA_J2000'] > dec - (10 / (60*60))) & (data['DELTA_J2000'] < dec + (10 / (60*60)))]
    
    if len(table) == 0.0:
        return np.nan
    
    table_df = Table(table).to_pandas()[['ID', 'ALPHA_J2000', 'DELTA_J2000']]
    
    coords = SkyCoord(ra = ra * u.deg, dec = dec * u.deg, frame = 'fk5')
    
    coords_df = table_df.assign(coords = table_df.apply(lambda row: SkyCoord(ra = row.ALPHA_J2000 * u.deg, dec = row.DELTA_J2000 * u.deg, frame = 'fk5'), axis = 1))
    
    seps = coords_df.assign(separation = coords_df.coords.apply(lambda x: abs(x.separation(coords).arcsecond)))
    
    id2020 = seps.sort_values('separation', ascending = True).ID.iloc[0]
    
    return id2020

In [12]:
matched_dict = {}

In [13]:
done_ids = list(matched_dict.keys())
for i in tqdm(list(df_merge.SourceID)):
    
    if i in done_ids:
        continue
    
    
    row = df_merge.query('SourceID == @i')
    ra = row.ra_1.iloc[0]
    dec = row.dec_1.iloc[0]
    
    matched_dict[i] = get_closest(ra, dec)
    

100%|██████████| 194/194 [02:19<00:00,  1.39it/s]


In [14]:
df_nonans = pd.DataFrame.from_dict(matched_dict, orient = 'index').reset_index().rename(columns = {'index': 'SourceID', 0 : 'cosmos_2020ids'}).dropna()

In [17]:
df_nonans

Unnamed: 0,SourceID,cosmos_2020ids
0,4000705539435,1280765.0
1,4000705539677,1287394.0
2,4000715723636,918737.0
3,4000715723843,920848.0
4,4000719090236,761874.0
...,...,...
188,6000535064806,1041748.0
189,6000535121007,997393.0
190,6000535781610,1231637.0
191,6000535864481,1042682.0


In [20]:
df_export = pd.concat([df.dropna(), df_nonans]).astype(np.int64)

In [21]:
df_export

Unnamed: 0,SourceID,cosmos_2020ids
0,4000705532455,816891
1,4000705532984,857121
2,4000705533312,873195
3,4000705533383,861738
5,4000705539529,1284864
...,...,...
188,6000535064806,1041748
189,6000535121007,997393
190,6000535781610,1231637
191,6000535864481,1042682


In [22]:
df_export.to_csv(f'{data_folder}/cosmos-2020ids.csv')