In [1]:
from astropy.table import Table
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import numpy as np
import time
from sklearn import neighbors
from vast.voidfinder._voidfinder_cython_find_next import MaskChecker
import pickle

ModuleNotFoundError: No module named 'vast'

In [4]:
def load_data(mask_file, file1, file2, file3, file4):
    # load mask
    with open(mask_file, "rb") as f:
        mask, mask_resolution = pickle.load(f)
        
    # load data tables
    data_table_V1 = Table.read(file1, format="ascii.commented_header")
    data_table_V1max = Table.read(file2, format="ascii.commented_header")
    data_table_V2 = Table.read(file3, format="ascii.commented_header")
    data_table_V2max = Table.read(file4, format="ascii.commented_header")
    
    return mask, mask_resolution, data_table_V1, data_table_V1max, data_table_V2, data_table_V2max

In [5]:
def plot_data(data_table_V1, data_table_V1max, title):
    fig = plt.figure(figsize=(12, 12))
    ax = fig.add_subplot(projection='3d')

    ax.scatter(data_table_V1['x'], data_table_V1['y'], data_table_V1['z'],
               color='maroon', s=0.1, alpha=0.5,
               label='holes')
    ax.scatter(data_table_V1max['x'], data_table_V1max['y'], data_table_V1max['z'],
               color='navy', s=10, alpha=1.0,
               label='max')
    ax.set(xlabel='X [Mpc/h]', ylabel='Y [Mpc/h]', zlabel='Z [Mpc/h]')

    ax.legend(loc='upper right', fontsize=10)

    plt.title(title)
    plt.show()

In [6]:
def calc_volume_boundaries(void_cat_A, void_cat_B):
    x_min = np.minimum(np.min(void_cat_A['x']), np.min(void_cat_B['x']))
    x_max = np.maximum(np.max(void_cat_A['x']), np.max(void_cat_B['x']))
    y_min = np.minimum(np.min(void_cat_A['y']), np.min(void_cat_B['y']))
    y_max = np.maximum(np.max(void_cat_A['y']), np.max(void_cat_B['y']))
    z_min = np.minimum(np.min(void_cat_A['z']), np.min(void_cat_B['z']))
    z_max = np.maximum(np.max(void_cat_A['z']), np.max(void_cat_B['z']))
    return x_min, x_max, y_min, y_max, z_min, z_max

In [27]:
def generate_grid_points(x_min, x_max, y_min, y_max, z_min, z_max, spacing=1.0):
    """Creates a rectangular grid of points in 3D space.  Default spacing is set at 1.0 Mpc"""
    x_range = np.arange(x_min, x_max, spacing)
    y_range = np.arange(y_min, y_max, spacing)
    z_range = np.arange(z_min, z_max, spacing)
    
    X, Y, Z = np.meshgrid(x_range, y_range, z_range, indexing='ij')
    grid_points = np.column_stack((X.ravel(), Y.ravel(), Z.ravel()))
    
    return grid_points

In [11]:
def calculate_points(data_table_V1, data_table_V2):
    
    # calculate the boundaries of the void catalogs
    x_min, x_max, y_min, y_max, z_min, z_max = calc_volume_boundaries(data_table_V1, data_table_V2)

    # generate the grid points within the boundaries
    point_coords = generate_grid_points(x_min, x_max, y_min, y_max, z_min, z_max)

    return point_coords

In [12]:
def plot_3d_points(pts):
   
    fig = plt.figure(figsize=(12, 12))
    ax = fig.add_subplot(projection='3d')
    ax.scatter(pts[0, ::100], pts[1, ::100], pts[2, ::100],
               color='blue', s=10, alpha=0.5)
    ax.set(xlabel='X [Mpc/h]',
           ylabel='Y [Mpc/h]',
           zlabel='Z [Mpc/h]')
    plt.title("Points from Void Volume")
    plt.show()

In [15]:
def get_points_in_mask(pts, mask, mask_resolution, rmin, rmax):
    start_time = time.time()

    points_boolean = np.ones(pts.shape[1], dtype=bool)

    mask_checker = MaskChecker(0, mask, mask_resolution, rmin, rmax)

    for i in range(pts.shape[1]):
        curr_pt = pts[:,i].reshape((1,3))
        not_in_mask = mask_checker.not_in_mask(curr_pt)
        points_boolean[i] = not bool(not_in_mask)
                                        
    points_in_mask = pts[:,points_boolean]
    (var, n_points) = points_in_mask.shape
    print(time.time() - start_time)
    print('\nPoints in Mask Shape:', points_in_mask.shape)
    print('\nSum of Points IN:', np.sum(points_boolean))
    print('\nSum of Points OUT:', np.sum(~points_boolean))
    print('\nBoolean Shape:', points_boolean.shape)
    print('\nPoints in Mask:',points_in_mask)
    
    return points_in_mask

In [16]:
def plot_points_in_mask(points_in_mask):
    fig = plt.figure(figsize=(12, 12))
    ax = fig.add_subplot(projection='3d')
    ax.scatter(points_in_mask[0, ::100], points_in_mask[1, ::100], points_in_mask[2, ::100],
               color='red', s=10, alpha=0.5)
    ax.set(xlabel='X [Mpc/h]', ylabel='Y [Mpc/h]', zlabel='Z [Mpc/h]')
    plt.title("Points in Mask")
    plt.show()

In [28]:
def kd_tree(point_coords, void_catalog):
    """Create a KDTree to find the number of points in and out of a catalogue.
    
    Parameters
    ----------
    point_coords : ndarray of shape (3,N)
        The list of points to query the given void catalogue. N is the number of points given. 
    catalogue : Astropy Table
        The given void catalogue.
    
    Returns
    -------
    sphere_tree : neighbors.KDTree object
        The KDTree object representing the void catalogue.
    """
    cx = void_catalog['x']
    cy = void_catalog['y']
    cz = void_catalog['z']

    sphere_coords = np.array([cx, cy, cz]).T

    sphere_tree = neighbors.KDTree(sphere_coords)

    return sphere_tree

In [20]:
def point_query(point_coords, sphere_tree, void_catalog):

    dist, idx = sphere_tree.query(point_coords.T, k = 1)

    true_inside = dist < void_cat['radius'][idx]

    return true_inside

In [29]:
def compare_void_catalogs(data_table_V1, data_table_V2, points_in_mask, U=1):
    start_time = time.time()

    count_in_V1 = np.zeros(U)
    count_out_V1 = np.zeros(U)

    count_in_V2 = np.zeros(U)
    count_out_V2 = np.zeros(U)

    inside_both = np.zeros(U)
    inside_neither = np.zeros(U)
    inside_a = np.zeros(U)
    inside_b = np.zeros(U)

    points_in_mask_copy = points_in_mask.copy()

    kdTree_V1 = kd_tree(data_table_V1)
    kdTree_V2 = kd_tree(data_table_V2)

    for i in range(U):

        # need to randomize to let the KDTree work efficiently (per Lorenzo) so we shift by 3

        delta = np.random.rand(3)

        points_in_mask_copy[0] = points_in_mask[0] + delta[0]
        points_in_mask_copy[1] = points_in_mask[1] + delta[1]
        points_in_mask_copy[2] = points_in_mask[2] + delta[2]

        true_inside_V1 = point_query(points_in_mask_copy, kdTree_V1, data_table_V1)

        count_in_V1[i] = np.sum(true_inside_V1)

        # the "~" inverts the array, so we can use the inverse of the trues to find all the falses
        count_out_V1[i] = np.sum(~true_inside_V1)

        true_inside_V2 = point_query(points_in_mask_copy, kdTree_V2, data_table_V2)

        count_in_V2[i] = np.sum(true_inside_V2)

        count_out_V2[i] = np.sum(~true_inside_V2)

        # number of points that are inside both A and B
        inside_A_and_B = np.logical_and(true_inside_V1, true_inside_V2)
        inside_both[i] = np.sum(inside_A_and_B)

        # number of points that are in neither A and B
        not_inside_A_and_B = np.logical_and(~true_inside_V1, ~true_inside_V2)
        inside_neither[i] = np.sum(not_inside_A_and_B)

        # number of points that are in A but not B
        inside_A = np.logical_and(true_inside_V1, ~true_inside_V2)
        inside_a[i] = np.sum(inside_A)

        # number of points that are not in A but are in B
        inside_B = np.logical_and(~true_inside_V1, true_inside_V2)
        inside_b[i] = np.sum(inside_B)

    return {
        "Number of points inside V1": count_in_V1,
        "Number of points outside V1": count_out_V1,
        "Number of points inside V2": count_in_V2,
        "Number of points outside V2": count_out_V2,
        "Number of points inside both": inside_both,
        "Number of points inside neither": inside_neither,
        "Number of points inside A": inside_a,
        "Number of points inside B": inside_b,
        "Total number of points": n_points
    }

In [26]:
def calculate_ratios(count_in_V1, count_in_V2, inside_A_and_B, not_inside_A_and_B, inside_A, inside_B, n_points):
    r_A = count_in_V1 / n_points
    r_B = count_in_V2 / n_points
    r_AB = np.sum(inside_A_and_B) / n_points
    r_notAB = np.sum(not_inside_A_and_B) / n_points
    r_A_not_B = np.sum(inside_A) / n_points
    r_B_not_A = np.sum(inside_B) / n_points
    Sum = r_AB + r_notAB + r_A_not_B + r_B_not_A
    return r_A, r_B, r_AB, r_notAB, r_A_not_B, r_B_not_A, Sum

In [None]:
def main():
    # define input files
    mask_file = ''
    file1 = ''
    file2 = ''
    file3 = ''
    file4 = ''
    
    # use load_data func
    mask, mask_resolution, data_table_V1, data_table_V1max, data_table_V2, data_table_V2max = load_data(mask_file, file1, file2, file3, file4)
    
    # plot data for V1 & V2
    plot_data(data_table_V1, data_table_V1max, "Voids Vol1 and Vol1Max")
    plot_data(data_table_V2, data_table_V2max, "Voids Vol2 and Vol2Max")
    
    # calculate volume boundaries
    x_min, x_max, y_min, y_max, z_min, z_max = calc_volume_boundaries(data_table_V1, data_table_V2)

    # generate grid points within the boundaries
    point_coords = generate_grid_points(x_min, x_max, y_min, y_max, z_min, z_max)

    # plot generated grid points
    plot_3d_points(point_coords)
    
    # get points inside mask
    rmin = 0
    rmax = 100
    points_in_mask = get_points_in_mask(point_coords, mask, mask_resolution, rmin, rmax)
    
    # plot points inside mask
    plot_points_in_mask(points_in_mask, rmax)
    
    # do kdtree & point query
    kdtree = kd_tree(data_table_V1, data_table_V1max, data_table_V2, data_table_V2max)
    distances, indices = point_query(points_in_mask, kdtree)
    
    # compare the void catalogs and calculate ratios
    compare_void_catalogs(indices, distances, data_table_V1, data_table_V1max, data_table_V2, data_table_V2max)
    calculate_ratios(distances, indices, point_coords, data_table_V1, data_table_V1max, data_table_V2, data_table_V2max)