# Matching GZ: DESI to MPA-JHU
Notebook that checks if any of the galaxies in the DESI sample exist in the MPA-JHU. Uses Pandas and FITS_Recs.

In [1]:
import pandas as pd
from tqdm import tqdm
tqdm.pandas()

import sys

from astropy.io import fits
from astropy.table import Table

In [2]:
mpa_folder = 'C:/Users/oryan/Documents/mergers-in-cosmos/data'

In [3]:
folder = 'E:/GZ-DESI/data'

In [4]:
df_int = pd.read_parquet(f'{folder}/2023-03-15-cats/definitive-merged-interesting-params.parquet', engine = 'pyarrow')

In [5]:
with fits.open(f'{mpa_folder}/MPA-JHU_full_catalogue.fits') as hdul:
    mpa_data = hdul[1].data
    mpa_header = hdul[1].header

In [6]:
hdul.close()

In [7]:
df_red = df_int[['id_str', 'ra', 'dec', 'category']]
del df_int

In [8]:
df_red

Unnamed: 0,id_str,ra,dec,category
0,390393_113,315.670729,10.126369,merger
1,388975_4015,315.879343,10.010424,merger
2,388975_4016,315.878811,10.011117,merger
3,388976_4771,316.075869,10.014121,merger
4,390397_3464,316.671962,10.263765,merger
...,...,...,...,...
197134,442690_1552,170.788929,19.708979,merger
197135,442690_1559,170.789902,19.709089,merger
197136,441332_3549,171.055134,19.566569,merger
197137,442689_2324,170.688068,19.747605,merger


In [9]:
row = df_red.iloc[0]

In [10]:
row

id_str      390393_113
ra          315.670729
dec          10.126369
category        merger
Name: 0, dtype: object

In [12]:
mask = (mpa_data['RA'] > row.ra - 0.00139) & (mpa_data['RA'] < row.ra + 0.00139) & (mpa_data['DEC'] > row.dec - 0.00139) & (mpa_data['DEC'] < row.dec + 0.00139)

In [13]:
matched_data = mpa_data[mask]

### Finding the Matches

In [14]:
mpa_data['SPECOBJID_1'][0]

'299489676975171584'

In [15]:
mpa_red = {}
for i in tqdm(range(len(mpa_data))):
    mpa_red[mpa_data['SPECOBJID_1'][i]] = {'ra' : mpa_data['RA'][i], 'dec' : mpa_data['DEC'][i]}

100%|██████████| 1472581/1472581 [01:47<00:00, 13680.39it/s]


In [16]:
del mpa_data

In [17]:
mpa_df = pd.DataFrame.from_dict(mpa_red, orient = 'index').reset_index().rename(columns = {'index' : 'specobjid_1'})

In [18]:
del mpa_red

In [19]:
mpa_df.head()

Unnamed: 0,specobjid_1,ra,dec
0,299489676975171584,146.714203,-1.041304
1,299489951853078528,146.919449,-0.990492
2,299490226730985472,146.902283,-0.984913
3,299490501608892416,146.859833,-0.808902
4,299490776486799360,146.763397,-0.810433


In [20]:
mpa_table = Table.from_pandas(mpa_df)

In [21]:
del mpa_df

In [22]:
%%time
mask = (mpa_table['ra'] > row.ra - 0.00139) & (mpa_table['ra'] < row.ra + 0.00139) & (mpa_table['dec'] > row.dec - 0.00139) & (mpa_table['dec'] < row.dec + 0.00139)

Wall time: 6 ms


In [24]:
def in_mpa(ra, dec, mpa_table):
    mask = (mpa_table['ra'] > ra - 0.00139) & (mpa_table['ra'] < ra + 0.00139) & (mpa_table['dec'] > dec - 0.00139) & (mpa_table['dec'] < dec + 0.00139)
    
    matched_data = mpa_table[mask]
    
    if len(matched_data) > 0:
        if len(matched_data) == 1:
            return matched_data['specobjid_1'][0]
        elif len(matched_data) > 1:
            return 'multiple_matches'
        else:
            return 'failed'
    
    return 'no_match'

In [25]:
df_match = (
    df_red
    .assign(matched = df_red.progress_apply(lambda row: in_mpa(row.ra, row.dec, mpa_table), axis = 1))
)

100%|██████████| 197139/197139 [18:29<00:00, 177.64it/s]


In [26]:
del mpa_table, df_red

In [27]:
df_match.matched.value_counts()

no_match               174421
multiple_matches         2129
1323095439964137472         4
2913975538861238272         3
846812007641409536          3
                        ...  
2194466936962705408         1
2194462813794101248         1
2194453742823172096         1
2194451268922009600         1
2809258876082350080         1
Name: matched, Length: 17812, dtype: int64

In [28]:
df_match.to_csv('C:/Users/oryan/Documents/mergers_in_desi/data/desi-mpa-matched.csv')

### Investigating Multiple Matches