Minor script to prepare catalogs for the voidiness analysis

In [1]:
import pandas as pd
from astropy.cosmology import WMAP9 as cosmo # Used to calculate comoving dist
from footprint_filter import filter_by_footprint
from astropy.table import Table

from io import StringIO


In [2]:
voids = pd.read_excel('exported_dataFrames/voids.xlsx')

In [3]:
def filter_by_redshift(voids_df, data_df):
    too_close = data_df.z < min(voids_df.z)
    data_df = data_df[~too_close]

    too_far = data_df.z > max(voids_df.z)
    return data_df[~too_far]

def add_cmvd(data_df):
    """Adds the comoving distance to data_df"""
    cmvd = cosmo.comoving_distance(data_df['z']) # Comoving distance to void center
    data_df['cmvd_Mpc'] = cmvd.value # add it to data table    
    return data_df

def filter_by_z_ra_dec(data_df, voids, cmvd_add = True, footprint_points_fn = "exported_dataFrames/footprint_points.xlsx"):
    
    if type(data_df) == str:
        data_df = pd.read_excel(data_df)
    else:
        assert isinstance(data_df, pd.DataFrame)
        
    data_df = filter_by_redshift(voids,data_df)

    if cmvd_add:
        data_df = add_cmvd(data_df)
    data_df = filter_by_footprint(data_df, footprint_points_fn)
    return data_df

In [4]:
type('hello') == str

True

In [5]:
## 4LAC catalog
four_lac_GRS = filter_by_z_ra_dec('More GRS/FINALCorrectedRedshifts.xlsx', voids)

# Save to excel file
four_lac_GRS.to_excel('exported_dataFrames/z_ra_dec_filtered_4lac.xlsx', index=False)

In [6]:
## xray catalog
xray_agns = filter_by_z_ra_dec('exported_dataFrames/xray_agns.xlsx', voids)
xray_agns.to_excel('exported_dataFrames/z_ra_dec_filtered_xray_agns.xlsx', index = False)

In [7]:
## Massive SDSS DR16 Catalog

dat = Table.read('SDSS Data Used/DR16Q_Superset_v3.fits', format='fits')
# We lose data on this steps since some colums have multiple data points per row
multidim_cols = [name for name in dat.colnames if len(dat[name].shape) >1]# For bookkeeping purposes

# Grab the non multidimensional columsn
singledim_cols = [name for name in dat.colnames if len(dat[name].shape) <=1] 
sdss_dr16 = dat[singledim_cols].to_pandas()

In [8]:
# NOTE: Check coordinate system used. Ensure all data tables are using the same systme
# Note. Some redshifts are negative. Why? They are specific negative values. 
sdss_dr16 = sdss_dr16.rename(columns={"RA": 'RAdeg', "DEC": "DEdeg", "Z":'z'})


# Convert bytes to string
sdss_dr16['AUTOCLASS_PQN'] = sdss_dr16['AUTOCLASS_PQN'].apply(lambda x: x.decode('utf-8'))

# Filter by object type. We only want to keep  QSO's and Galaxies
is_quasar = sdss_dr16.AUTOCLASS_PQN == "QSO"
is_galaxy = sdss_dr16.AUTOCLASS_PQN == "GALAXY"

# Filter by galaxy or quasar
sdss_dr16 = sdss_dr16[is_galaxy | is_quasar]

In [9]:
# Now the big fanxy filter

sdss_dr16 = filter_by_z_ra_dec(sdss_dr16, voids)

In [10]:
sdss_dr16.to_excel('exported_dataFrames/z_ra_dec_filtered_SDSS_DR16.xlsx')