In [89]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import glob
import cv2 as cv
import os
from IPython.display import clear_output

from astropy.io import fits
import astropy.units as u
from astropy.coordinates import SkyCoord
from astropy.wcs.utils import skycoord_to_pixel
from astropy.wcs import WCS
from astropy.table import Table
from tqdm import tqdm

import numpy as np

In [2]:
combine_fold = 'C:/Users/oryan/Documents/mergers-in-cosmos/cats-to-combine'
data_folder = 'C:/Users/oryan/Documents/mergers-in-cosmos/back-to-basics-data'
fig_folder = 'C:/Users/oryan/Documents/mergers-in-cosmos/paper-source/figures'
fits_folder = 'E:/cosmos-fits'
cosmos_folder = 'E:/cosmos-data'

In [9]:
df_cat = pd.read_csv(f'{combine_fold}/cosmos2020-categorised-interactions.csv', index_col = 0)

In [67]:
df_red_tmp = df_cat.query('ez_sfr < 10')
df_red = df_red_tmp.query('lp_mass_best > 6.5')

In [68]:
df_clsf = pd.read_csv(f'{data_folder}/secondaries-identified.csv', index_col = 0).reset_index().rename(columns = {'index' : 'ORyan23_ID', "0" : 'clsf'})

In [8]:
with fits.open(f'{cosmos_folder}/COSMOS2020_CLASSIC_R1_v2.1_p3.fits.gz') as hdul:
    data = hdul[1].data

### Looking at Images

In [69]:
df_check = (
    df_red.query('category == "stage1"')
)

In [71]:
df_secs = df_check.merge(df_clsf, on = 'ORyan23_ID', how = 'left')

In [74]:
df_ns = df_secs.query('clsf == "n"')

In [14]:
def picking_z(ez_z, lp_z):
    if np.isnan(ez_z):
        if not np.isnan(lp_z):
            return lp_z
        else:
            return np.nan
    
    if np.isnan(lp_z):
        if not np.isnan(ez_z):
            return ez_z
        else:
            return np.nan
    
    if lp_z < ez_z:
        return lp_z
    elif ez_z <= lp_z:
        return ez_z
    else:
        return 'what'

In [15]:
def getting_secondary(ra, dec, z, prim_id):
    record = data[(data['ALPHA_J2000'] > ra - (15 / (60*60))) & (data['ALPHA_J2000'] < ra + (15 / (60*60))) & (data['DELTA_J2000'] > dec - (15 / (60*60))) & (data['DELTA_J2000'] < dec + (15 / (60*60)))]
    
    df = Table(record).to_pandas()[['ID', 'ALPHA_J2000', 'DELTA_J2000', 'ez_z_phot', 'lp_zBEST']]
    
    df_z = (
        df
        .assign(redshift = df.apply(lambda row: picking_z(row.ez_z_phot, row.lp_zBEST), axis = 1))
        .drop(columns = ['ez_z_phot', 'lp_zBEST'])
        .dropna()
        .query('ID != @prim_id')
    )   
    
    return df_z

In [61]:
cat_dict = {}

In [97]:
for i in tqdm(range(len(df_ns))):
    
    sourceid = df_ns.ORyan23_ID.iloc[i]
    if sourceid in list(cat_dict.keys()):
        continue
    
    fig, ax = plt.subplots(figsize = (8,8))
    file_path = f'{fits_folder}/{sourceid}.fits'
    assert os.path.exists(file_path)

    ra = df_ns.query('ORyan23_ID == @sourceid')['ALPHA_J2000'].iloc[0]
    dec = df_ns.query('ORyan23_ID == @sourceid')['DELTA_J2000'].iloc[0]
    z_ez = df_ns.query('ORyan23_ID == @sourceid')['ez_z_phot'].iloc[0]
    z_lp = df_ns.query('ORyan23_ID == @sourceid')['lp_zBEST'].iloc[0]
    id_str = df_ns.query('ORyan23_ID == @sourceid')['ID'].iloc[0]
    category = df_ns.query('ORyan23_ID == @sourceid')['category'].iloc[0]

    redshift = picking_z(z_ez, z_lp)

    prim_coord = SkyCoord(ra = ra * u.deg, dec = dec * u.deg, frame = 'fk5')

    df_secs = getting_secondary(ra, dec, redshift, id_str)

    with fits.open(file_path) as hdul:
        im_data = hdul[0].data
        header = hdul[0].header

    w = WCS(header)

    df_coords = (
        df_secs
        .assign(coords = df_secs.apply(lambda row: SkyCoord(ra = row.ALPHA_J2000 * u.deg, dec = row.DELTA_J2000 * u.deg, frame = 'fk5'), axis = 1))
    )

    id_list = list(df_coords.ID)
    coords_list = list(df_coords.coords)
    z_list = list(df_coords.redshift)

    pixs_list = []
    for j in coords_list:
        pixs_list.append(skycoord_to_pixel(j, w, origin = 0))
    pixs_arr = np.array(pixs_list)

    prim_pix = np.array(skycoord_to_pixel(prim_coord, w, origin = 0))

    ax.imshow(np.log10(im_data), origin = 'lower')
    ax.scatter(prim_pix[0], prim_pix[1], s = 10, color='black')
    ax.annotate(np.round(redshift, 3), (prim_pix[0], prim_pix[1]), color = 'black')
    ax.scatter(pixs_arr[:,0], pixs_arr[:,1], s = 10, color = 'red')
    for i, txt in enumerate(z_list):
        ax.annotate(f'{np.round(txt, 3)}', (pixs_arr[i,0], pixs_arr[i,1]), color = 'red')
    ax.set_xticks([])
    ax.set_yticks([])
    fig.tight_layout()
    plt.show()
    
    print(f"Secondary in image: {df_clsf.query('ORyan23_ID == @sourceid').clsf.iloc[0]}")
    print(f'Classified as {category}')
    answered = False
    while not answered:
        test = input('Is this correct?')
        if test == 'y':
            cat_dict[sourceid] = category
            answered = True
        elif test == 'n':
            cat_dict[sourceid] = input('What is the stage?')
            answered = True
        else:
            print('Please input y or n.')
    
    plt.close()
    clear_output(wait = True)    

100%|██████████| 381/381 [11:56<00:00,  1.88s/it]


In [98]:
df_corr_stage = pd.DataFrame.from_dict(cat_dict, orient = 'index').reset_index().rename(columns = {'index' : 'ORyan23_ID', '0' : 'corr_stage'})

In [99]:
df_corr_stage.to_csv(f'{combine_fold}/correcting-stage2.csv')