In [1]:
import numpy as np

from data_utils import compute_pairs_euclidean_distances, load_numpy_data, save_numpy_array

from psf_constants import PROCESSED_TRAIN_2M_ZERNIKE_COMPLEX_FIELDS_FILE_PREFIX, \
                          PROCESSED_TRAIN_5M_ZERNIKE_COMPLEX_FIELDS_FILE_PREFIX, \
                          PROCESSED_TRAIN_9M_ZERNIKE_COMPLEX_FIELDS_FILE_PREFIX, \
                          PROCESSED_TRAIN_14M_ZERNIKE_COMPLEX_FIELDS_FILE_PREFIX, \
                          PROCESSED_TRAIN_20M_ZERNIKE_COMPLEX_FIELDS_FILE_PREFIX, \
                          PROCESSED_VALIDATION_20M_ZERNIKE_COMPLEX_FIELDS_FILE_PATH, \
                          TRAIN_2M_ZERNIKE_OUTPUT_FLUXES_FILE_PREFIX, \
                          TRAIN_5M_ZERNIKE_OUTPUT_FLUXES_FILE_PREFIX, \
                          TRAIN_9M_ZERNIKE_OUTPUT_FLUXES_FILE_PREFIX, \
                          TRAIN_14M_ZERNIKE_OUTPUT_FLUXES_FILE_PREFIX, \
                          TRAIN_20M_ZERNIKE_OUTPUT_FLUXES_FILE_PREFIX, \
                          ZERNIKE_TRAIN_EUCLIDEAN_DISTANCES_PREFIX, \
                          ZERNIKE_TRAIN_EUCLIDEAN_DISTANCES_PAIRS_PREFIX, \
                          PSF_TRAIN_FILE_SUFFIXES, \
                          NUMPY_SUFFIX

In [2]:
def create_path(prefix, suffix):
    data_path = f"{prefix}{suffix}{NUMPY_SUFFIX}"
    return data_path

def create_random_pair_indexes(array_n_points, pairs_per_subset=10000):
    selected_pairs = np.random.randint(0, array_n_points, size=(pairs_per_subset, 2))
    selected_pairs = selected_pairs[selected_pairs[:, 0] != selected_pairs[:, 1]]
    
    while selected_pairs.shape[0] < pairs_per_subset:
        more_pairs = np.random.randint(0, 100, size=(pairs_per_subset, 2))
        more_pairs = more_pairs[more_pairs[:, 0] != more_pairs[:, 1]]
        selected_pairs = np.concatenate((selected_pairs, more_pairs))
        
    return selected_pairs[:pairs_per_subset]

# 1. Brute Force

In [4]:
for suffix in PSF_TRAIN_FILE_SUFFIXES:

    # Define data paths
    m2_fluxes_path = create_path(TRAIN_2M_ZERNIKE_OUTPUT_FLUXES_FILE_PREFIX, suffix)
    m2_complex_fields_path = create_path(PROCESSED_TRAIN_2M_ZERNIKE_COMPLEX_FIELDS_FILE_PREFIX, suffix)
    m5_fluxes_path = create_path(TRAIN_5M_ZERNIKE_OUTPUT_FLUXES_FILE_PREFIX, suffix)
    m5_complex_fields_path = create_path(PROCESSED_TRAIN_5M_ZERNIKE_COMPLEX_FIELDS_FILE_PREFIX, suffix)
    m9_fluxes_path = create_path(TRAIN_9M_ZERNIKE_OUTPUT_FLUXES_FILE_PREFIX, suffix)
    m9_complex_fields_path = create_path(PROCESSED_TRAIN_9M_ZERNIKE_COMPLEX_FIELDS_FILE_PREFIX, suffix)
    m14_fluxes_path = create_path(TRAIN_14M_ZERNIKE_OUTPUT_FLUXES_FILE_PREFIX, suffix)
    m14_complex_fields_path = create_path(PROCESSED_TRAIN_14M_ZERNIKE_COMPLEX_FIELDS_FILE_PREFIX, suffix)
    m20_fluxes_path = create_path(TRAIN_20M_ZERNIKE_OUTPUT_FLUXES_FILE_PREFIX, suffix)
    m20_complex_fields_path = create_path(PROCESSED_TRAIN_20M_ZERNIKE_COMPLEX_FIELDS_FILE_PREFIX, suffix)

    # Load data
    m2_fluxes = load_numpy_data(m2_fluxes_path)
    m2_complex_fields = load_numpy_data(m2_complex_fields_path)
    m5_fluxes = load_numpy_data(m5_fluxes_path)
    m5_complex_fields = load_numpy_data(m5_complex_fields_path)
    m9_fluxes = load_numpy_data(m9_fluxes_path)
    m9_complex_fields = load_numpy_data(m9_complex_fields_path)
    m14_fluxes = load_numpy_data(m14_fluxes_path)
    m14_complex_fields = load_numpy_data(m14_complex_fields_path)
    m20_fluxes = load_numpy_data(m20_fluxes_path)
    m20_complex_fields = load_numpy_data(m20_complex_fields_path)

    # Select pairs to compute euclidean distances
    n_points = m2_fluxes.shape[0]
    selected_pairs = create_random_pair_indexes(n_points, pairs_per_subset=10000)

    # Compute euclidean distances
    m2_fluxes_euclidean_distances = compute_pairs_euclidean_distances(m2_fluxes,
                                                                      selected_pairs)
    m2_euclidean_distances = compute_pairs_euclidean_distances(m2_complex_fields,
                                                               selected_pairs,
                                                               is_complex_field=True)

    m5_fluxes_euclidean_distances = compute_pairs_euclidean_distances(m5_fluxes,
                                                                      selected_pairs)
    m5_euclidean_distances = compute_pairs_euclidean_distances(m5_complex_fields,
                                                               selected_pairs,
                                                               is_complex_field=True)

    m9_fluxes_euclidean_distances = compute_pairs_euclidean_distances(m9_fluxes,
                                                                      selected_pairs)
    m9_euclidean_distances = compute_pairs_euclidean_distances(m9_complex_fields,
                                                               selected_pairs,
                                                               is_complex_field=True)

    m14_fluxes_euclidean_distances = compute_pairs_euclidean_distances(m14_fluxes,
                                                                       selected_pairs)
    m14_euclidean_distances = compute_pairs_euclidean_distances(m14_complex_fields,
                                                                selected_pairs,
                                                                is_complex_field=True)

    m20_fluxes_euclidean_distances = compute_pairs_euclidean_distances(m20_fluxes,
                                                                       selected_pairs)
    m20_euclidean_distances = compute_pairs_euclidean_distances(m20_complex_fields,
                                                                selected_pairs,
                                                                is_complex_field=True)
    
    # Merge in columns
    euclidean_distances = np.concatenate((m2_fluxes_euclidean_distances.reshape(-1, 1),
                                          m2_euclidean_distances.reshape(-1, 1),
                                          m5_fluxes_euclidean_distances.reshape(-1, 1),
                                          m5_euclidean_distances.reshape(-1, 1),
                                          m9_fluxes_euclidean_distances.reshape(-1, 1),
                                          m9_euclidean_distances.reshape(-1, 1),
                                          m14_fluxes_euclidean_distances.reshape(-1, 1),
                                          m14_euclidean_distances.reshape(-1, 1),
                                          m20_fluxes_euclidean_distances.reshape(-1, 1),
                                          m20_euclidean_distances.reshape(-1, 1),), 
                                          axis=1)

    # Save arrays
    pairs_path = create_path(ZERNIKE_TRAIN_EUCLIDEAN_DISTANCES_PAIRS_PREFIX, suffix)
    euclidean_distances_path = create_path(ZERNIKE_TRAIN_EUCLIDEAN_DISTANCES_PREFIX, suffix)
    save_numpy_array(selected_pairs, pairs_path, single_precision=False)
    save_numpy_array(euclidean_distances, euclidean_distances_path)