In [1]:
import numpy as np

from data_utils import normalize_data, \
                       flatten_data, \
                       save_numpy_array, \
                       save_scaler
                      

# 0. Objective

With this notebook we will process and save the flux, amplitude and phase data, dividing them into train, validation and test datasets.
The dataset sizes are the following:
- Train: 70000
- Validation: 10000
- Test: 10000

# 1. Flux Data

In [22]:
from constants import ORIGINAL_FLUXES_FILE

In [None]:
TRAIN_SIZE = 70000
VALIDATION_SIZE = 10000
TEST_SIZE = 10000

## 1.1 Flux Data for Fully Connected Architectures
For the FC Architectures we need to:
 - Normalize
 - Flatten

In [2]:
# Load flux data
fluxes_array = np.load(ORIGINAL_FLUXES_FILE)

Check the shape of the data, there should be 90000 data points

In [3]:
fluxes_array.shape

(90000, 55, 24)

Now process the data

In [4]:
# Normalize data
fc_normalized_fluxes_array, fc_flux_scaler = normalize_data(fluxes_array)

In [6]:
# Flatten data
fc_flattened_normalized_fluxes_array = flatten_data(fc_normalized_fluxes_array)

In [23]:
# Split data
train_fc_fluxes, val_fc_fluxes, test_fc_fluxes = split_flux(fc_flattened_normalized_fluxes_array,
                                                            TRAIN_SIZE,
                                                            VALIDATION_SIZE,
                                                            TEST_SIZE)

Save data and scalers

In [None]:
# Load data paths
from constants import TRAIN_FC_FLUXES_PATH, \
                      VALIDATION_FC_FLUXES_PATH, \
                      TEST_FC_FLUXES_PATH, \
                      FC_FLUX_SCALER_PATH

# Save train fluxes
save_numpy_array(train_fc_fluxes, TRAIN_FC_FLUXES_PATH)
# Save validation fluxes
save_numpy_array(val_fc_fluxes, VALIDATION_FC_FLUXES_PATH)
# Save test fluxes
save_numpy_array(test_fc_fluxes, TEST_FC_FLUXES_PATH)
# Save fully connected flux scaler
save_scaler(fc_flux_scaler, FC_FLUX_SCALER_PATH)