In [1]:
import numpy as np

from data_utils import normalize_data, \
                       flatten_data, \
                       add_row_padding, \
                       split_fluxes, \
                       fuse_amplitude_and_phase, \
                       save_numpy_array, \
                       save_scaler

from plot_utils import plot_fluxes

# 0. Objective

With this notebook we will process and save the flux, amplitude and phase data, dividing them into train, validation and test datasets.
The dataset sizes are the following:
- Train: 70000
- Validation: 10000
- Test: 10000

# 1. Flux Data

In [2]:
from constants import ORIGINAL_FLUXES_FILE

In [3]:
TRAIN_SIZE = 70000
VALIDATION_SIZE = 10000
TEST_SIZE = 10000

## 1.1 Flux Data for Fully Connected Architectures
For the FC Architectures we need to:
 - Normalize
 - Flatten

In [4]:
# Load flux data
fluxes_array = np.load(ORIGINAL_FLUXES_FILE)

Check the shape of the data, there should be 90000 data points

Process_data

In [5]:
# Normalize data
fc_normalized_fluxes_array, fc_flux_scaler = normalize_data(fluxes_array)

In [8]:
# Flatten data
fc_flattened_normalized_fluxes_array = flatten_data(fc_normalized_fluxes_array)

In [15]:
# Split data
train_fc_fluxes, val_fc_fluxes, test_fc_fluxes = split_fluxes(fc_flattened_normalized_fluxes_array,
                                                              TRAIN_SIZE,
                                                              VALIDATION_SIZE,
                                                              TEST_SIZE)

In [16]:
train_fc_fluxes.shape

(70000, 1320)

In [17]:
val_fc_fluxes.shape

(10000, 1320)

In [18]:
test_fc_fluxes.shape

(10000, 1320)

Save data and scalers

In [20]:
# Load data paths
from constants import TRAIN_FC_FLUXES_PATH, \
                      VALIDATION_FC_FLUXES_PATH, \
                      TEST_FC_FLUXES_PATH, \
                      FC_FLUX_SCALER_PATH, \
                      TRAIN_FILE_SUFFIXES, \
                      NUMPY_SUFFIX

# Save train fluxes
start = 0
end = 10000
for train_file in TRAIN_FILE_SUFFIXES:
    filename = f"{TRAIN_FC_FLUXES_PATH}{train_file}{NUMPY_SUFFIX}"
    save_numpy_array(train_fc_fluxes[start:end], filename)
    start += 10000
    end += 10000

# Save validation fluxes
save_numpy_array(val_fc_fluxes, VALIDATION_FC_FLUXES_PATH)
# Save test fluxes
save_numpy_array(test_fc_fluxes, TEST_FC_FLUXES_PATH)
# Save fully connected flux scaler
save_scaler(fc_flux_scaler, FC_FLUX_SCALER_PATH)

## 1.2 Flux data for CNN Architectures

For the CNN Architectures we need to:
- Normalize

In [16]:
# Load flux data
fluxes_array = np.load(ORIGINAL_FLUXES_FILE)

Process data

In [17]:
# Normalize data
conv_normalized_fluxes_array, conv_flux_scaler = normalize_data(fluxes_array)

In [18]:
# Split data
train_conv_fluxes, val_conv_fluxes, test_conv_fluxes = split_fluxes(conv_normalized_fluxes_array,
                                                                    TRAIN_SIZE,
                                                                    VALIDATION_SIZE,
                                                                    TEST_SIZE)

In [19]:
train_conv_fluxes.shape

(70000, 55, 24)

In [20]:
val_conv_fluxes.shape

(10000, 55, 24)

In [21]:
test_conv_fluxes.shape

(10000, 55, 24)

Save data and scalers

In [22]:
# Load data paths
from constants import TRAIN_CNN_FLUXES_PATH, \
                      VALIDATION_CNN_FLUXES_PATH, \
                      TEST_CNN_FLUXES_PATH, \
                      CNN_FLUX_SCALER_PATH, \
                      TRAIN_FILE_SUFFIXES, \
                      NUMPY_SUFFIX

# Save train fluxes
start = 0
end = 10000
for train_file in TRAIN_FILE_SUFFIXES:
    filename = f"{TRAIN_CNN_FLUXES_PATH}{train_file}{NUMPY_SUFFIX}"
    save_numpy_array(train_conv_fluxes[start:end], filename)
    start += 10000
    end += 10000

# Save validation fluxes
save_numpy_array(val_conv_fluxes, VALIDATION_CNN_FLUXES_PATH)

# Save test fluxes
save_numpy_array(test_conv_fluxes, TEST_CNN_FLUXES_PATH)

# Save fully connected flux scaler
save_scaler(conv_flux_scaler, CNN_FLUX_SCALER_PATH)

## 1.3 Flux data for Autoencoder Architecture

For the Autoencoder Architecture we need to:
- Add padding
- Normalize

In [23]:
# Load flux data
fluxes_array = np.load(ORIGINAL_FLUXES_FILE)

Process data

In [24]:
# Add padding to data
padded_fluxes = add_row_padding(fluxes_array, top_rows=1, bottom_rows=0)

In [25]:
# Normalize data
normalized_padded_fluxes_array, autoencoder_flux_scaler = normalize_data(padded_fluxes)

In [26]:
# Split data
train_autoencoder_fluxes, val_autoencoder_fluxes, test_autoencoder_fluxes = split_fluxes(normalized_padded_fluxes_array,
                                                                                         TRAIN_SIZE,
                                                                                         VALIDATION_SIZE,
                                                                                         TEST_SIZE)

In [27]:
train_autoencoder_fluxes.shape

(70000, 56, 24)

In [28]:
val_autoencoder_fluxes.shape

(10000, 56, 24)

In [29]:
test_autoencoder_fluxes.shape

(10000, 56, 24)

Save data and scalers

In [30]:
# Load data paths
from constants import TRAIN_AUTOENCODER_FLUXES_PATH, \
                      VALIDATION_AUTOENCODER_FLUXES_PATH, \
                      TEST_AUTOENCODER_FLUXES_PATH, \
                      AUTOENCODER_FLUX_SCALER_PATH, \
                      TRAIN_FILE_SUFFIXES, \
                      NUMPY_SUFFIX

# Save train fluxes
start = 0
end = 10000
for train_file in TRAIN_FILE_SUFFIXES:
    filename = f"{TRAIN_AUTOENCODER_FLUXES_PATH}{train_file}{NUMPY_SUFFIX}"
    save_numpy_array(train_autoencoder_fluxes[start:end], filename)
    start += 10000
    end += 10000

# Save train fluxes
save_numpy_array(train_autoencoder_fluxes, TRAIN_AUTOENCODER_FLUXES_PATH)
# Save validation fluxes
save_numpy_array(val_autoencoder_fluxes, VALIDATION_AUTOENCODER_FLUXES_PATH)
# Save test fluxes
save_numpy_array(test_autoencoder_fluxes, TEST_AUTOENCODER_FLUXES_PATH)
# Save fully connected flux scaler
save_scaler(autoencoder_flux_scaler, AUTOENCODER_FLUX_SCALER_PATH)

# 2. Amplitude and Phase Data

## 2.1 Amplitude and Phase for Fully Connected Architecture

For the FC architectures we need to:
- Normalize
- Stack amplitude and phase

In [29]:
from constants import ORIGINAL_SLM_FOLDER, \
                      ORIGINAL_AMPLITUDE_FILENAME, \
                      ORIGINAL_PHASE_FILENAME, \
                      TRAIN_FILE_SUFFIXES, \
                      VALIDATION_FILE_SUFFIX, \
                      TEST_FILE_SUFFIX, \
                      NUMPY_SUFFIX

from plot_utils import plot_amp_phase_prediction

In [27]:
# Load data
amplitudes = []
phases = []

for file_number in TRAIN_FILE_SUFFIXES + [VALIDATION_FILE_SUFFIX] + [TEST_FILE_SUFFIX]:
    amp_filename = f"{ORIGINAL_SLM_FOLDER}{file_number}/{ORIGINAL_AMPLITUDE_FILENAME}"
    phase_filename = f"{ORIGINAL_SLM_FOLDER}{file_number}/{ORIGINAL_PHASE_FILENAME}"
    amplitudes.append(np.float32(np.load(amp_filename)))
    phases.append(np.float32(np.load(phase_filename)))

amplitudes = np.concatenate(amplitudes, axis=0)
phases = np.concatenate(phases, axis=0)


Now process the data

In [31]:
# Normalize data
normalized_amplitudes, fc_amplitude_scaler = normalize_data(amplitudes)
normalized_phases, fc_phase_scaler = normalize_data(phases)

In [33]:
normalized_amplitudes.shape
normalized_phases.shape

(90000, 96, 96)

In [34]:
# Stack amplitude and phase
amp_phase_array = fuse_amplitude_and_phase(normalized_amplitudes, normalized_phases)
amp_phase_array.shape

(90000, 2, 96, 96)

Save data

In [36]:
from constants import TRAIN_FC_AMP_PHASE_PATH, \
                      VALIDATION_FC_AMP_PHASE_PATH, \
                      TEST_FC_AMP_PHASE_PATH, \
                      FC_AMP_SCALER_PATH, \
                      FC_PHASE_SCALER_PATH

In [38]:
start = 0
end = 10000
# Save train data
for train_file in TRAIN_FILE_SUFFIXES:
    filename = f"{TRAIN_FC_AMP_PHASE_PATH}{train_file}{NUMPY_SUFFIX}"
    save_numpy_array(amp_phase_array[start:end], filename)
    start += 10000
    end += 10000

# Save validation data
save_numpy_array(amp_phase_array[start:end], VALIDATION_FC_AMP_PHASE_PATH)
start += 10000
end += 10000

# Save test data
save_numpy_array(amp_phase_array[start:end], TEST_FC_AMP_PHASE_PATH)

# Save scalers
save_scaler(fc_amplitude_scaler, FC_AMP_SCALER_PATH)
save_scaler(fc_phase_scaler, FC_PHASE_SCALER_PATH)

## 2.2 Amplitude and Phase for Convolutional Architecture

For CNN Architecture we need to:
- Normalize
- Stack amplitude and phase

In [2]:
from constants import ORIGINAL_SLM_FOLDER, \
                      ORIGINAL_AMPLITUDE_FILENAME, \
                      ORIGINAL_PHASE_FILENAME, \
                      TRAIN_FILE_SUFFIXES, \
                      VAL_AMP_PHASE_FILE_SUFFIX, \
                      TEST_AMP_PHASE_FILE_SUFFIX, \
                      NUMPY_SUFFIX

In [3]:
# Load data
amplitudes = []
phases = []

for file_number in TRAIN_FILE_SUFFIXES + [VAL_AMP_PHASE_FILE_SUFFIX] + [TEST_AMP_PHASE_FILE_SUFFIX]:
    amp_filename = f"{ORIGINAL_SLM_FOLDER}{file_number}/{ORIGINAL_AMPLITUDE_FILENAME}"
    phase_filename = f"{ORIGINAL_SLM_FOLDER}{file_number}/{ORIGINAL_PHASE_FILENAME}"
    amplitudes.append(np.float32(np.load(amp_filename)))
    phases.append(np.float32(np.load(phase_filename)))

amplitudes = np.concatenate(amplitudes, axis=0)
phases = np.concatenate(phases, axis=0)

Now process data

In [4]:
# Normalize data
normalized_amplitudes, cnn_amplitude_scaler = normalize_data(amplitudes)
normalized_phases, cnn_phase_scaler = normalize_data(phases)

In [5]:
normalized_amplitudes.shape
normalized_phases.shape

(90000, 96, 96)

In [6]:
# Stack amplitude and phase
amp_phase_array = fuse_amplitude_and_phase(normalized_amplitudes, normalized_phases)
amp_phase_array.shape

(90000, 2, 96, 96)

Save data

In [7]:
from constants import TRAIN_CNN_AMP_PHASE_PATH, \
                      VALIDATION_CNN_AMP_PHASE_PATH, \
                      TEST_CNN_AMP_PHASE_PATH, \
                      CNN_AMP_SCALER_PATH, \
                      CNN_PHASE_SCALER_PATH

In [8]:
start = 0
end = 10000
# Save train data
for train_file in TRAIN_FILE_SUFFIXES:
    filename = f"{TRAIN_CNN_AMP_PHASE_PATH}{train_file}{NUMPY_SUFFIX}"
    save_numpy_array(amp_phase_array[start:end], filename)
    start += 10000
    end += 10000

# Save validation data
save_numpy_array(amp_phase_array[start:end], VALIDATION_CNN_AMP_PHASE_PATH)
start += 10000
end += 10000

# Save test data
save_numpy_array(amp_phase_array[start:end], TEST_CNN_AMP_PHASE_PATH)

# Save scalers
save_scaler(cnn_amplitude_scaler, CNN_AMP_SCALER_PATH)
save_scaler(cnn_phase_scaler, CNN_PHASE_SCALER_PATH)

## 2.3 Amplitude and Phase for Autoencoder + Convolutional Architecture

For the Autoencoder + Convolutional Architecture we need to:
- Add padding (8 top and 8 bottom rows)
- Normalize
- Stack amplitude and phase
- Swap axes

In [2]:
from constants import ORIGINAL_SLM_FOLDER, \
                      ORIGINAL_AMPLITUDE_FILENAME, \
                      ORIGINAL_PHASE_FILENAME, \
                      TRAIN_FILE_SUFFIXES, \
                      VAL_AMP_PHASE_FILE_SUFFIX, \
                      TEST_AMP_PHASE_FILE_SUFFIX, \
                      NUMPY_SUFFIX

In [3]:
# Load data
amplitudes = []
phases = []

for file_number in TRAIN_FILE_SUFFIXES + [VAL_AMP_PHASE_FILE_SUFFIX] + [TEST_AMP_PHASE_FILE_SUFFIX]:
    amp_filename = f"{ORIGINAL_SLM_FOLDER}{file_number}/{ORIGINAL_AMPLITUDE_FILENAME}"
    amplitudes.append(np.float32(np.load(amp_filename)))

amplitudes = np.concatenate(amplitudes, axis=0)

Process data

First amplitudes

In [4]:
# Add padding
amplitudes = add_row_padding(amplitudes, top_rows=8, bottom_rows=8)

In [5]:
# Normalize data
normalized_amplitudes, autoencoder_amplitude_scaler = normalize_data(amplitudes)

In [6]:
from constants import AUTOENCODER_AMP_SCALER_PATH
save_numpy_array(normalized_amplitudes, "temp_amp.npy")
save_scaler(autoencoder_amplitude_scaler, AUTOENCODER_AMP_SCALER_PATH)

Now phases

In [3]:
# Load data
phases = []

for file_number in TRAIN_FILE_SUFFIXES + [VAL_AMP_PHASE_FILE_SUFFIX] + [TEST_AMP_PHASE_FILE_SUFFIX]:
    phase_filename = f"{ORIGINAL_SLM_FOLDER}{file_number}/{ORIGINAL_PHASE_FILENAME}"
    phases.append(np.float32(np.load(phase_filename)))
    
phases = np.concatenate(phases, axis=0)

In [4]:
phases = add_row_padding(phases, top_rows=8, bottom_rows=8)

In [5]:
normalized_phases, autoencoder_phase_scaler = normalize_data(phases)

In [6]:
from constants import AUTOENCODER_PHASE_SCALER_PATH
save_numpy_array(normalized_phases, "temp_phase.npy")
save_scaler(autoencoder_phase_scaler, AUTOENCODER_PHASE_SCALER_PATH)

In [7]:
normalized_phases.shape

(90000, 112, 96)

Second part: Stack amplitude and phase

In [3]:
# Stack amplitude and phase
amplitudes = np.load("temp_amp.npy")[0:70000]
phases = np.load("temp_phase.npy")[0:70000]

amp_phase_array = fuse_amplitude_and_phase(amplitudes, phases)
amp_phase_array.shape

amp_phase_array = np.swapaxes(amp_phase_array, 1, 3)
amp_phase_array = np.swapaxes(amp_phase_array, 1, 2)

In [4]:
amp_phase_array.shape

(70000, 112, 96, 2)

Save data

In [3]:
from constants import TRAIN_AUTOENCODER_AMP_PHASE_PATH, \
                      VALIDATION_AUTOENCODER_AMP_PHASE_PATH, \
                      TEST_AUTOENCODER_AMP_PHASE_PATH

In [6]:
start = 0
end = 10000
# Save train data
for train_file in TRAIN_FILE_SUFFIXES:
    filename = f"{TRAIN_AUTOENCODER_AMP_PHASE_PATH}{train_file}{NUMPY_SUFFIX}"
    save_numpy_array(amp_phase_array[start:end], filename)
    start += 10000
    end += 10000

In [4]:
# Stack amplitude and phase
amplitudes = np.load("temp_amp.npy")[70000:]
phases = np.load("temp_phase.npy")[70000:]

amp_phase_array = fuse_amplitude_and_phase(amplitudes, phases)
print(amp_phase_array.shape)

amp_phase_array = np.swapaxes(amp_phase_array, 1, 3)
amp_phase_array = np.swapaxes(amp_phase_array, 1, 2)

(20000, 2, 112, 96)


In [5]:
print(amp_phase_array.shape)

(20000, 112, 96, 2)


In [7]:
# Save validation data
save_numpy_array(amp_phase_array[0:10000], VALIDATION_AUTOENCODER_AMP_PHASE_PATH)
# Save test data
save_numpy_array(amp_phase_array[10000:20000], TEST_AUTOENCODER_AMP_PHASE_PATH)