In [1]:
import numpy as np
from astropy.table import Table
import time
import os

In [2]:
#	plot setting
import matplotlib.pyplot as plt
import matplotlib as mpl
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "last_expr"

mpl.rcParams["axes.titlesize"] = 14
mpl.rcParams["axes.labelsize"] = 20
plt.rcParams['savefig.dpi'] = 500
plt.rc('font', family='serif')

In [3]:
def convert_size(size_bytes):
    import math
    if size_bytes == 0:
        return "0B"
    size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
    i = int(math.floor(math.log(size_bytes, 1024)))
    p = math.pow(1024, i)
    s = round(size_bytes / p, 2)
    return "%s %s" % (s, size_name[i])

In [5]:
# path_cat = '../data/GLADE+.fits'
path_cat = '/data3/jehwang/wisematch/light_GLADE+.fits'
initial_size = os.path.getsize(path_cat)
print(f"Initial Size of {os.path.basename(path_cat)}: {convert_size(initial_size)}")

Initial Size of light_GLADE+.fits: 3.52 GB


In [6]:
st = time.time()
cat = Table.read(path_cat, format='fits')
delt = time.time() - st
print(f"Time to read GLADE+.fits: {delt:.3f} seconds")

Time to read GLADE+.fits: 13.707 seconds


In [7]:
n_total = len(cat)
print(f"Total number of objects: {n_total}")

Total number of objects: 23181758


In [10]:
distance_cut = 1000.

# indx_distance_cut = np.where(cat['col33'] < distance_cut)
indx_distance_cut = np.where(cat['d_L'] < distance_cut)

_cat = cat[indx_distance_cut]
print(f"Number of objects with distance < {distance_cut:.1f} Mpc: {len(_cat)}/{n_total} ({1e2*len(_cat)/n_total:1.1f}%)")

Number of objects with distance < 1000.0 Mpc: 11224913/23181758 (48.4%)


In [11]:
print(f"Number of objects with distance < {distance_cut:.1f} Mpc: {len(_cat)}/{n_total} ({1e2*len(_cat)/n_total:1.1f}%)")

Number of objects with distance < 1000.0 Mpc: 11224913/23181758 (48.4%)


In [None]:
selected_columns = [
	'col1',	#	GLADEname
	'col8',	#	Object type flag
	'col9',	#	RA
	'col10',#	Dec
	'col29',#	z_cmb
	'col30',#	z flag
	'col32',#	z_err
	'col33',#	d_L
	'col34',#	d_L err
	'col36',
	'col37',
	'col38',
	'col39',
	'col40',
]
print(f"Number of selected columns: {len(selected_columns)}/{len(cat.colnames)}")

Number of selected columns: 14/40


In [17]:
subcat = _cat[selected_columns]
subcat.meta['UPDATE'] = '2023-04-19'
subcat.meta['MODIFY_AUTHOR'] = 'Gregory Paek'
subcat.meta['DISTNACE_CUT'] = 1000.

subcat.write(f'{os.path.dirname(path_cat)}/GLADE+_230419.fits',)



In [27]:
newcat = f'{os.path.dirname(path_cat)}/GLADE+_230419.fits'
lightened_size = os.path.getsize(newcat)
convert_size(lightened_size)

'782.81 MB'

In [26]:
print(f"Saved {1e2*(1-lightened_size/initial_size):.1f} %")

Saved 88.8 %


In [28]:
st = time.time()
cat = Table.read(newcat, format='fits')
delt = time.time() - st
print(f"Time to read lightened GLADE+.fits: {delt:.3f} seconds")

Time to read lightened GLADE+.fits: 1.062 seconds
