In [1]:
import os
import numpy as np
import h5py
import tensorflow as tf
import keras
from keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Reshape, Conv1DTranspose, UpSampling1D
from keras.models import Model

def load_and_preprocess_data(hdf5_file, file_idx, spatial_idx, time_start_idx, time_end_idx):
    """
    Load and preprocess data from the HDF5 file.
    
    Parameters:
        hdf5_file (str): Path to the HDF5 file.
        file_idx (int): Index of the file.
        spatial_idx (int): Index of the spatial chunk to load.
        time_start_idx (int): Start index for time chunks.
        time_end_idx (int): End index for time chunks (inclusive).
    Returns:
        raw_data (np.array): Raw data (5000x100).
        fft_data (np.array): FFT data (5000x100) (magnitude spectrum, normalized).
    """
    with h5py.File(hdf5_file, 'r') as f:
        raw_data = []
        for t in range(time_start_idx, time_end_idx + 1):
            chunk_name = f'chunk_{file_idx}_{t}_{spatial_idx}'
            chunk = f[chunk_name][:]
            raw_data.append(chunk)
    
    raw_data = np.concatenate(raw_data, axis=0)  # Combine time chunks
    
    # Perform FFT on the concatenated time chunk
    fft_data = np.abs(np.fft.fft(raw_data, axis=0))  # FFT along time axis
    
    # Normalize raw and FFT data
    raw_mean, raw_std = np.mean(raw_data), np.std(raw_data)
    raw_data = (raw_data - raw_mean) / (raw_std + 1e-6)
    
    fft_mean, fft_std = np.mean(fft_data), np.std(fft_data)
    fft_data = (fft_data - fft_mean) / (fft_std + 1e-6)
    
    return raw_data, fft_data

def create_tf_dataset(hdf5_file, batch_size=32):
    """
    Create a TensorFlow dataset for efficient data loading.
    
    Parameters:
        hdf5_file (str): Path to the HDF5 file.
        batch_size (int): Batch size for training.
    
    Returns:
        tf.data.Dataset: Preprocessed and batched dataset
    """
    def generator():
        with h5py.File(hdf5_file, 'r') as raw_f:
            num_files = len(raw_f.keys()) // (87 * 30) - 1
            for file_idx in range(num_files):
                for spatial_idx in range(87):
                    for time_idx in range(0, 27, 3):
                        raw_data, fft_data = load_and_preprocess_data(
                            hdf5_file, file_idx+1, spatial_idx, time_idx, time_idx + 4
                        )
                        yield raw_data, fft_data

    dataset = tf.data.Dataset.from_generator(
        generator, 
        output_signature=(
            tf.TensorSpec(shape=(5000, 100), dtype=tf.float32),
            tf.TensorSpec(shape=(5000, 100), dtype=tf.float32)
        )
    )
    
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

def create_fft_autoencoder(input_dim=(5000, 100), model_name='fft_autoencoder'):
    """
    Create an autoencoder neural network for FFT data.
    
    Parameters:
        input_dim (tuple): Shape of the input data.
        model_name (str): Name of the model.
    
    Returns:
        model: Compiled autoencoder model.
    """
    # Enable mixed precision training for potential speed improvement
    keras.mixed_precision.set_global_policy('mixed_float16')
    
    input_layer = Input(shape=input_dim)
    
    # Encoder
    x = Conv1D(32, kernel_size=3, activation='relu', padding='same')(input_layer)
    x = MaxPooling1D(pool_size=2, padding='same')(x)
    x = Conv1D(16, kernel_size=3, activation='relu', padding='same')(x)
    x = MaxPooling1D(pool_size=2, padding='same')(x)
    x = Flatten()(x)
    
    # Latent space
    latent = Dense(128, activation='relu')(x)
    
    # Decoder
    x = Dense(np.prod(input_dim), activation='relu')(latent)
    x = Reshape((input_dim[0] // 4, input_dim[1] * 4))(x)
    x = Conv1DTranspose(16, kernel_size=3, activation='relu', padding='same')(x)
    x = UpSampling1D(size=2)(x)
    x = Conv1DTranspose(32, kernel_size=3, activation='relu', padding='same')(x)
    x = UpSampling1D(size=2)(x)
    output_layer = Conv1D(input_dim[1], kernel_size=3, activation='sigmoid', padding='same')(x)
    
    model = Model(input_layer, output_layer)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss='mse')
    
    # Optional: Plot model architecture
    try:
        keras.utils.plot_model(model, to_file=f"{model_name}_architecture.png", show_shapes=True)
    except ImportError:
        print("Could not plot model architecture. Ensure you have pydot and graphviz installed.")
    
    return model

def train_fft_autoencoder(
    hdf5_file, 
    model_name='fft_autoencoder.keras',
    epochs=20,
    batch_size=32,
    save_every=5
):
    """
    Train FFT autoencoder with optimized data loading and training.
    
    Parameters:
        hdf5_file (str): Path to the HDF5 file.
        model_name (str): Name to save the model.
        epochs (int): Number of training epochs.
        batch_size (int): Batch size for training.
        save_every (int): Save model every n epochs.
    """
    # Check GPU availability
    print("GPU Available:", tf.test.is_gpu_available())
    
    # Create dataset
    fft_dataset = create_tf_dataset(hdf5_file, batch_size)
    
    # Load existing model or create new one
    if os.path.exists(model_name):
        print(f"Loading existing model: {model_name}")
        fft_autoencoder = keras.models.load_model(model_name)
    else:
        print("Creating new FFT autoencoder model")
        fft_autoencoder = create_fft_autoencoder()
    
    # Callbacks
    early_stopping = keras.callbacks.EarlyStopping(
        patience=3, 
        monitor='loss', 
        restore_best_weights=True
    )
    
    model_checkpoint = keras.callbacks.ModelCheckpoint(
        model_name, 
        save_best_only=True, 
        monitor='loss'
    )
    
    # Tensorboard for monitoring (optional)
    log_dir = os.path.join("logs", "fft_autoencoder_training")
    os.makedirs(log_dir, exist_ok=True)
    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir)
    
    # Training
    history = fft_autoencoder.fit(
        fft_dataset, 
        epochs=epochs, 
        callbacks=[
            early_stopping, 
            model_checkpoint, 
            tensorboard_callback
        ]
    )
    
    print(f"Training completed. Model saved as {model_name}")
    return history


2024-12-06 15:17:58.371021: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-06 15:17:58.386925: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-06 15:17:58.393021: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-06 15:17:58.404186: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:


hdf5_file_path = 'raw_data.h5'
    
# Train FFT autoencoder
train_fft_autoencoder(
    hdf5_file=hdf5_file_path,
    model_name='autoencoder2.keras',
    epochs=10,
    batch_size=2
)


Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU Available: True


I0000 00:00:1733523480.810676    1200 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1733523480.832049    1200 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1733523480.832129    1200 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1733523481.022593    1200 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1733523481.022741    1200 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-12-06

Creating new FFT autoencoder model
You must install pydot (`pip install pydot`) for `plot_model` to work.
Epoch 1/10


I0000 00:00:1733523484.496546    1260 service.cc:146] XLA service 0x7f030c00f120 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1733523484.496597    1260 service.cc:154]   StreamExecutor device (0): Quadro RTX 3000, Compute Capability 7.5
2024-12-06 15:18:04.553638: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-12-06 15:18:04.847971: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 90101
I0000 00:00:1733523489.217418    1260 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


   3524/Unknown [1m160s[0m 43ms/step - loss: 0.7710

2024-12-06 15:20:41.798350: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-12-06 15:20:41.798429: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
2024-12-06 15:20:41.798447: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 2196331488759118441
2024-12-06 15:20:41.798500: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 10304361227340309533
  self.gen.throw(value)


[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m166s[0m 45ms/step - loss: 0.7710
Epoch 2/10
[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 0.7280

2024-12-06 15:23:08.455703: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
2024-12-06 15:23:08.455748: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 2196331488759118441
2024-12-06 15:23:08.455786: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 10304361227340309533


[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 41ms/step - loss: 0.7280
Epoch 3/10
[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 41ms/step - loss: 0.7208
Epoch 4/10
[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - loss: 0.7166

2024-12-06 15:28:04.419555: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
2024-12-06 15:28:04.419614: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 2196331488759118441
2024-12-06 15:28:04.419699: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 10304361227340309533


[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 43ms/step - loss: 0.7166
Epoch 5/10
[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - loss: 0.7145

2024-12-06 15:30:33.981279: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 2196331488759118441
2024-12-06 15:30:33.981393: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 10304361227340309533


[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 42ms/step - loss: 0.7145
Epoch 6/10
[1m3523/3524[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 43ms/step - loss: 0.7135

2024-12-06 15:33:10.525130: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 2196331488759118441
2024-12-06 15:33:10.525208: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 10304361227340309533


[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 44ms/step - loss: 0.7135
Epoch 7/10
[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 41ms/step - loss: 0.7116
Epoch 8/10
[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - loss: 0.7117

2024-12-06 15:38:11.897152: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
2024-12-06 15:38:11.897233: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 2196331488759118441
2024-12-06 15:38:11.897282: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 10304361227340309533


[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 44ms/step - loss: 0.7117
Epoch 9/10
[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 0.7104

2024-12-06 15:41:04.416663: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 2196331488759118441
2024-12-06 15:41:04.416795: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 10304361227340309533


[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 50ms/step - loss: 0.7104
Epoch 10/10
[1m3524/3524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 44ms/step - loss: 0.7105
Training completed. Model saved as autoencoder2.keras


<keras.src.callbacks.history.History at 0x7f04215bf8c0>