In [1]:
import os
from os.path import isfile
import sys
import glob

os.environ["CUDA_VISIBLE_DEVICES"]="3"
#os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"
os.environ["TF_GPU_ALLOCATOR"]="cuda_malloc_async"

import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Concatenate, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import L1, L2
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.preprocessing import MinMaxScaler

import mlflow
import mlflow.tensorflow

2024-09-29 19:30:55.279754: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-09-29 19:30:55.392818: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
mlflow.tensorflow.autolog()
mlflow.set_experiment("HSEL+CPL T")

<Experiment: artifact_location='file:///data/nature_run/work/src/mlruns/394114635930480291', creation_time=1727650112205, experiment_id='394114635930480291', last_update_time=1727650112205, lifecycle_stage='active', name='HSEL+CPL T', tags={}>

In [3]:
def get_data(filelist):
    hsel_val = []
    scalar_val = []
    table_val = []
    cpl_val = []

    for i in filelist:
        # Keys: 'mh', 'hsel', 'scalar', 'table']
        tmp = np.load(i)
        hsel_val.append(tmp["hsel"])
        scalar_val.append(tmp["scalar"])
        table_val.append(tmp["table"])
        cpl_val.append(tmp["cpl"])

    return np.vstack(hsel_val), np.vstack(cpl_val), np.vstack(scalar_val), np.vstack(table_val)


train = ['/data/nature_run/fulldays_reduced/all_cpl_20060815.npz',
         '/data/nature_run/fulldays_reduced/all_cpl_20060915.npz',
         '/data/nature_run/fulldays_reduced/all_cpl_20061015.npz',
         '/data/nature_run/fulldays_reduced/all_cpl_20060515.npz',
         '/data/nature_run/fulldays_reduced/all_cpl_20060715.npz',
         '/data/nature_run/fulldays_reduced/all_cpl_20061115.npz',
         '/data/nature_run/fulldays_reduced/all_cpl_20060315.npz', 
         '/data/nature_run/fulldays_reduced/all_cpl_20061215.npz',
         '/data/nature_run/fulldays_reduced/all_cpl_20060615.npz']

test = ['/data/nature_run/fulldays_reduced/all_cpl_20060803.npz']

validation = ['/data/nature_run/fulldays_reduced/all_cpl_20060803.npz']

hsel_train, cpl_train, scalar_train, table_train = get_data(train)
hsel_test, cpl_test, scalar_test, table_test = get_data(test)
hsel_val, cpl_val, scalar_val, table_val = get_data(validation)



In [4]:
#hsel_train, table_train, scalar_train, hsel_test, table_test, scalar_test = dataload("mh")

In [5]:
#hsel_train, table_train, scalar_train, hsel_test, table_test, scalar_test = dataload("mh")
#hsel_val = hsel_test.copy()
#table_val = table_test.copy()
#scalar_val = scalar_test.copy()


In [6]:
print(hsel_train.shape)
print(cpl_train.shape)
print(table_train.shape)
print(scalar_train.shape)

(8414706, 1957)
(8414706, 733)
(8414706, 72, 3)
(8414706, 44)


In [7]:
# What should we do
# 1) Only use train scalars
# 2) Scale seperateley


# Presssure ####
spress_test = scalar_test[:, 3].reshape(-1, 1)
spress_train = scalar_train[:, 3].reshape(-1, 1)
spress_val = scalar_val[:, 3].reshape(-1, 1)

mins = np.min(spress_train, axis=0)
maxs = np.max(spress_train, axis=0)
np.savez("spress_scalar_cpl_hsel.npz", mins=mins, maxs=maxs)

spress_test = (spress_test - mins)/(maxs - mins)
spress_train = (spress_train - mins)/(maxs - mins)
spress_val = (spress_val - mins)/(maxs - mins)


# Labels ####
q_train = table_train[:, :, 1]
q_test = table_test[:, :, 1]
q_val = table_val[:, :, 1]

# Spectra ####
mins = np.min(hsel_train, axis=0)
maxs = np.max(hsel_train, axis=0)

np.savez("minimac_scaling_factors_cpl_hsel.npz", maxs=maxs, mins=mins)

hsel_train = np.nan_to_num(hsel_train)
hsel_test = np.nan_to_num(hsel_test)
hsel_val = np.nan_to_num(hsel_val)

hsel_train = (hsel_train - mins)/(maxs - mins)
hsel_test = (hsel_test - mins)/(maxs - mins)
hsel_val = (hsel_val - mins)/(maxs - mins)

# CPL #####
mins = np.min(cpl_train, axis=0)
maxs = np.max(cpl_train, axis=0)
np.savez("minimac_scaling_factors_cpl_cpl_hsel.npz", maxs=maxs, mins=mins)

cpl_train = (cpl_train - mins)/(maxs - mins)
cpl_test = (cpl_test - mins)/(maxs - mins)
cpl_val = (cpl_val - mins)/(maxs - mins)

with tf.device('/cpu:0'):
    # train
    x_train = {'rad': tf.convert_to_tensor(hsel_train, np.float32),
               'spress': tf.convert_to_tensor(spress_train, np.float32),
               'cpl': tf.convert_to_tensor(cpl_train, np.float32)}
    del hsel_train
    del spress_train
    
    y_train =  tf.convert_to_tensor(q_train, np.float32)
    del q_train

    
    # Val
    x_val = {'rad': tf.convert_to_tensor(hsel_val, np.float32),
             'spress': tf.convert_to_tensor(spress_val, np.float32),
             'cpl': tf.convert_to_tensor(cpl_val, np.float32)}
    del hsel_val
    del spress_val
    
    y_val =  tf.convert_to_tensor(q_val, np.float32)
    del q_val

    
    # Test
    x_test = {'rad': tf.convert_to_tensor(hsel_test, np.float32),
              'spress': tf.convert_to_tensor(spress_test, np.float32),
              'cpl': tf.convert_to_tensor(cpl_test, np.float32)}
    del hsel_test
    del spress_test
    
    y_test =  tf.convert_to_tensor(q_test, np.float32)
    del q_test


2024-09-29 20:41:23.961126: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-09-29 20:41:24.541657: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0
2024-09-29 20:41:24.542800: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31017 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:3e:00.0, compute capability: 7.0


In [None]:
# print(x_test)
# print(x_val)
# print(y_train.shape)
#a = np.argwhere(np.isnan(x_train['rad']))
#print(np.sum(np.isnan(x_train['rad'])))
#np.nan_to_num(x_train['rad'], copy=False)
#print(np.sum(np.isnan(x_train['rad'])))
#print(a)
#print(len(a))

np.sum(np.isnan(x_train['rad']))

In [8]:
def build_ae_model(config):
    # ATMS 22
    mh = Input(shape=(config["shape"],), name="rad")
    spress = Input(shape=(1,), name="spress")
    #cpl = Input(shape=(733,), name="cpl")
    # [ha, hb, hc, hd, hw, mh]

    encoder = load_model("encoder_3_mae.keras")
    encoder.trainable = False

    #cpl_reduced = Dense(128, activation=config["activation"])(cpl)
    #cpl_reduced = Dense(32, activation=config["activation"])(cpl_reduced)
    
    enc = encoder(mh)
    x = Concatenate()([enc, spress])
    for i in range(config["num_layers"]):
        x =  Dropout(config["dropout"])(Dense(config["num_neurons"], 
                                             activation=config["activation"])(x))
    outputs = Dense(config['output'], name="Temp")(x)

    model = Model(inputs=[mh, spress], outputs=outputs, name="Temp")
    model.compile(optimizer="adam", loss='mae')

    return model

config = {'shape': 1957,
          'output': 72,
          'dropout': np.random.choice([0.01, 0.1, 0.2, 0.5]),
          'num_layers': np.random.choice([3, 4, 5, 10]),
          'num_neurons': np.random.choice([16, 32, 64, 128]),
          'activation': np.random.choice(['gelu', 'gelu', 'relu'])}

model = build_ae_model(config)
model.summary()


Model: "Temp"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 rad (InputLayer)               [(None, 1957)]       0           []                               
                                                                                                  
 model_1 (Functional)           (None, 64)           275468      ['rad[0][0]']                    
                                                                                                  
 spress (InputLayer)            [(None, 1)]          0           []                               
                                                                                                  
 concatenate (Concatenate)      (None, 65)           0           ['model_1[0][0]',                
                                                                  'spress[0][0]']              

In [9]:
def build_ae_cpl_model(config):
    # ATMS 22
    mh = Input(shape=(config["shape"],), name="rad")
    spress = Input(shape=(1,), name="spress")
    cpl = Input(shape=(733,), name="cpl")
    # [ha, hb, hc, hd, hw, mh]

    encoder = load_model("encoder_3_mae.keras")
    encoder.trainable = False

    cpl_reduced = Dense(128, activation=config["activation"])(cpl)
    cpl_reduced = Dense(32, activation=config["activation"])(cpl_reduced)
    
    enc = encoder(mh)
    x = Concatenate()([enc, spress, cpl_reduced])
    for i in range(config["num_layers"]):
        x =  Dropout(config["dropout"])(Dense(config["num_neurons"], 
                                             activation=config["activation"])(x))
    outputs = Dense(config['output'], name="Temp")(x)

    model = Model(inputs=[mh, spress, cpl], outputs=outputs, name="Temp")
    model.compile(optimizer="adam", loss='mae')

    return model

config = {'shape': 1957,
          'output': 72,
          'dropout': np.random.choice([0.01, 0.1, 0.2, 0.5]),
          'num_layers': np.random.choice([3, 4, 5, 10]),
          'num_neurons': np.random.choice([16, 32, 64, 128]),
          'activation': np.random.choice(['gelu', 'gelu', 'relu'])}

model = build_ae_model(config)
model.summary()


Model: "Temp"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 rad (InputLayer)               [(None, 1957)]       0           []                               
                                                                                                  
 model_1 (Functional)           (None, 64)           275468      ['rad[0][0]']                    
                                                                                                  
 spress (InputLayer)            [(None, 1)]          0           []                               
                                                                                                  
 concatenate_1 (Concatenate)    (None, 65)           0           ['model_1[0][0]',                
                                                                  'spress[0][0]']              

In [12]:
print("DING")

DING


In [None]:
# AE

for _ in range(100):
    config = {'shape': 1957,
              'output': 72,
              'dropout': np.random.choice([0.01, 0.1, 0.2, 0.5]),
              'num_layers': np.random.choice([3, 4, 5, 10]),
              'num_neurons': np.random.choice([16, 32, 64, 128]),
              'activation': np.random.choice(['gelu', 'gelu', 'relu'])}

    model = build_ae_cpl_model(config)
    
    with mlflow.start_run() as run:
        model_checkpoint_callback = ModelCheckpoint(
            filepath=f"models/model_{run.info.run_id}.keras",
            save_weights_only=False,
            monitor='val_loss',
            mode='min',
            save_best_only=True)
        callback = EarlyStopping(monitor='val_loss', patience=8, verbose=0)
        mlflow.log_param("Autoencoder", True)
        mlflow.log_param("CPL", True)

        history = model.fit(x_train, y_train, epochs=1000, batch_size=2000,
                            validation_data=(x_val, y_val),
                            verbose=2,
                            callbacks=[callback, model_checkpoint_callback])
        
        evaled_test = model.evaluate(x_train, y_train, batch_size=100, verbose=0)
        evaled_train = model.evaluate(x_test, y_test, batch_size=100, verbose=0)

        mlflow.log_metric("test_loss", evaled_test)
        mlflow.log_metric("train_loss", evaled_train)

        plt.plot(history.history["loss"])
        plt.plot(history.history["val_loss"])
        plt.title('Model Loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plotname = f"plots/loss_plot_{run.info.run_id}.png"
        plt.savefig(plotname)
        plt.close()

        # Log the plot as an artifact
        mlflow.log_artifact(plotname)
        #model_path = "modles"
        #mlflow.tensorflow.log_model(tf_saved_model_dir=model_path, artifact_path="model")






Epoch 1/1000
4208/4208 - 71s - loss: 17.0987 - val_loss: 3.8035 - 71s/epoch - 17ms/step
Epoch 2/1000
4208/4208 - 67s - loss: 10.7509 - val_loss: 2.8777 - 67s/epoch - 16ms/step
Epoch 3/1000
4208/4208 - 67s - loss: 9.2868 - val_loss: 2.5219 - 67s/epoch - 16ms/step
Epoch 4/1000
4208/4208 - 67s - loss: 8.9797 - val_loss: 2.8552 - 67s/epoch - 16ms/step
Epoch 5/1000
4208/4208 - 67s - loss: 8.8654 - val_loss: 2.5955 - 67s/epoch - 16ms/step
Epoch 6/1000
4208/4208 - 67s - loss: 8.7610 - val_loss: 2.5380 - 67s/epoch - 16ms/step
Epoch 7/1000
4208/4208 - 66s - loss: 8.6600 - val_loss: 2.5675 - 66s/epoch - 16ms/step
Epoch 8/1000
4208/4208 - 66s - loss: 8.5505 - val_loss: 2.4078 - 66s/epoch - 16ms/step
Epoch 9/1000
4208/4208 - 68s - loss: 8.4273 - val_loss: 2.6757 - 68s/epoch - 16ms/step
Epoch 10/1000
4208/4208 - 67s - loss: 8.3239 - val_loss: 2.5347 - 67s/epoch - 16ms/step
Epoch 11/1000
4208/4208 - 66s - loss: 8.2187 - val_loss: 2.6594 - 66s/epoch - 16ms/step
Epoch 12/1000
4208/4208 - 67s - loss: 8







Epoch 1/1000
4208/4208 - 74s - loss: 28.7892 - val_loss: 4.5848 - 74s/epoch - 18ms/step
Epoch 2/1000
4208/4208 - 73s - loss: 18.0973 - val_loss: 3.9696 - 73s/epoch - 17ms/step
Epoch 3/1000
4208/4208 - 73s - loss: 17.4812 - val_loss: 3.6570 - 73s/epoch - 17ms/step
Epoch 4/1000
4208/4208 - 71s - loss: 17.0973 - val_loss: 3.6564 - 71s/epoch - 17ms/step
Epoch 5/1000
4208/4208 - 72s - loss: 16.7762 - val_loss: 3.2790 - 72s/epoch - 17ms/step
Epoch 6/1000
4208/4208 - 71s - loss: 16.5026 - val_loss: 3.3719 - 71s/epoch - 17ms/step
Epoch 7/1000
4208/4208 - 72s - loss: 16.2153 - val_loss: 3.6873 - 72s/epoch - 17ms/step
Epoch 8/1000
4208/4208 - 72s - loss: 15.9383 - val_loss: 3.8297 - 72s/epoch - 17ms/step
Epoch 9/1000
4208/4208 - 73s - loss: 15.6741 - val_loss: 3.1847 - 73s/epoch - 17ms/step
Epoch 10/1000
4208/4208 - 72s - loss: 15.3892 - val_loss: 3.1500 - 72s/epoch - 17ms/step
Epoch 11/1000
4208/4208 - 73s - loss: 15.1308 - val_loss: 3.3478 - 73s/epoch - 17ms/step
Epoch 12/1000
4208/4208 - 73s 



Epoch 1/1000
4208/4208 - 67s - loss: 23.0762 - val_loss: 82.8756 - 67s/epoch - 16ms/step
Epoch 2/1000
4208/4208 - 64s - loss: 15.8771 - val_loss: 79.5110 - 64s/epoch - 15ms/step
Epoch 3/1000
4208/4208 - 64s - loss: 15.3577 - val_loss: 76.5975 - 64s/epoch - 15ms/step
Epoch 4/1000
4208/4208 - 65s - loss: 15.0387 - val_loss: 71.7499 - 65s/epoch - 15ms/step
Epoch 5/1000
4208/4208 - 65s - loss: 14.7555 - val_loss: 67.6575 - 65s/epoch - 16ms/step
Epoch 6/1000
4208/4208 - 65s - loss: 14.4949 - val_loss: 64.1202 - 65s/epoch - 15ms/step
Epoch 7/1000
4208/4208 - 65s - loss: 14.2486 - val_loss: 54.2958 - 65s/epoch - 15ms/step
Epoch 8/1000
4208/4208 - 66s - loss: 13.9816 - val_loss: 44.7883 - 66s/epoch - 16ms/step
Epoch 9/1000
4208/4208 - 65s - loss: 13.6498 - val_loss: 34.6009 - 65s/epoch - 15ms/step
Epoch 10/1000
4208/4208 - 65s - loss: 13.1513 - val_loss: 23.8670 - 65s/epoch - 15ms/step
Epoch 11/1000
4208/4208 - 65s - loss: 12.6099 - val_loss: 17.1588 - 65s/epoch - 15ms/step
Epoch 12/1000
4208/



Epoch 1/1000
4208/4208 - 62s - loss: 54.1343 - val_loss: 27.5678 - 62s/epoch - 15ms/step
Epoch 2/1000
4208/4208 - 60s - loss: 35.2138 - val_loss: 8.6915 - 60s/epoch - 14ms/step
Epoch 3/1000
4208/4208 - 61s - loss: 33.6544 - val_loss: 6.5966 - 61s/epoch - 14ms/step
Epoch 4/1000
4208/4208 - 61s - loss: 33.0350 - val_loss: 8.3694 - 61s/epoch - 14ms/step
Epoch 5/1000
4208/4208 - 61s - loss: 32.4472 - val_loss: 7.1464 - 61s/epoch - 14ms/step
Epoch 6/1000
4208/4208 - 61s - loss: 31.8794 - val_loss: 8.0719 - 61s/epoch - 14ms/step
Epoch 7/1000
4208/4208 - 60s - loss: 31.2983 - val_loss: 7.5150 - 60s/epoch - 14ms/step
Epoch 8/1000
4208/4208 - 58s - loss: 30.7157 - val_loss: 7.2136 - 58s/epoch - 14ms/step
Epoch 9/1000
4208/4208 - 59s - loss: 30.1479 - val_loss: 6.9073 - 59s/epoch - 14ms/step
Epoch 10/1000
4208/4208 - 60s - loss: 29.5793 - val_loss: 6.8589 - 60s/epoch - 14ms/step
Epoch 11/1000
4208/4208 - 59s - loss: 28.9892 - val_loss: 6.9688 - 59s/epoch - 14ms/step
INFO:tensorflow:Assets writte



Epoch 1/1000
4208/4208 - 61s - loss: 13.4683 - val_loss: 38.8202 - 61s/epoch - 14ms/step
Epoch 2/1000
4208/4208 - 61s - loss: 8.3675 - val_loss: 29.6031 - 61s/epoch - 15ms/step
Epoch 3/1000
4208/4208 - 60s - loss: 7.2843 - val_loss: 17.1520 - 60s/epoch - 14ms/step
Epoch 4/1000
4208/4208 - 58s - loss: 6.9167 - val_loss: 13.5880 - 58s/epoch - 14ms/step
Epoch 5/1000
4208/4208 - 59s - loss: 6.7443 - val_loss: 11.2717 - 59s/epoch - 14ms/step
Epoch 6/1000
4208/4208 - 59s - loss: 6.6368 - val_loss: 9.9399 - 59s/epoch - 14ms/step
Epoch 7/1000
4208/4208 - 60s - loss: 6.5525 - val_loss: 7.5683 - 60s/epoch - 14ms/step
Epoch 8/1000
4208/4208 - 59s - loss: 6.4103 - val_loss: 7.6240 - 59s/epoch - 14ms/step
Epoch 9/1000
4208/4208 - 59s - loss: 6.3271 - val_loss: 5.3699 - 59s/epoch - 14ms/step
Epoch 10/1000
4208/4208 - 58s - loss: 6.2530 - val_loss: 4.7295 - 58s/epoch - 14ms/step
Epoch 11/1000
4208/4208 - 60s - loss: 6.1287 - val_loss: 2.7637 - 60s/epoch - 14ms/step
Epoch 12/1000
4208/4208 - 60s - los



Epoch 1/1000
4208/4208 - 65s - loss: 9.8670 - val_loss: 6.9518 - 65s/epoch - 15ms/step
Epoch 2/1000
4208/4208 - 63s - loss: 4.7081 - val_loss: 7.3880 - 63s/epoch - 15ms/step
Epoch 3/1000
4208/4208 - 63s - loss: 4.4406 - val_loss: 5.6030 - 63s/epoch - 15ms/step
Epoch 4/1000
4208/4208 - 64s - loss: 4.3621 - val_loss: 6.3253 - 64s/epoch - 15ms/step
Epoch 5/1000
4208/4208 - 63s - loss: 4.3027 - val_loss: 6.5602 - 63s/epoch - 15ms/step
Epoch 6/1000
4208/4208 - 63s - loss: 4.2229 - val_loss: 5.8129 - 63s/epoch - 15ms/step
Epoch 7/1000
4208/4208 - 65s - loss: 4.0828 - val_loss: 6.4632 - 65s/epoch - 15ms/step
Epoch 8/1000
4208/4208 - 65s - loss: 3.9916 - val_loss: 6.6799 - 65s/epoch - 15ms/step
Epoch 9/1000
4208/4208 - 64s - loss: 3.9281 - val_loss: 6.6180 - 64s/epoch - 15ms/step
Epoch 10/1000
4208/4208 - 62s - loss: 3.8866 - val_loss: 5.4781 - 62s/epoch - 15ms/step
Epoch 11/1000
4208/4208 - 63s - loss: 3.8464 - val_loss: 5.9707 - 63s/epoch - 15ms/step
Epoch 12/1000
4208/4208 - 62s - loss: 3.8



Epoch 1/1000
4208/4208 - 57s - loss: 28.5765 - val_loss: 59.5559 - 57s/epoch - 14ms/step
Epoch 2/1000
4208/4208 - 56s - loss: 21.2763 - val_loss: 48.8379 - 56s/epoch - 13ms/step
Epoch 3/1000
4208/4208 - 56s - loss: 18.6018 - val_loss: 13.7812 - 56s/epoch - 13ms/step
Epoch 4/1000
4208/4208 - 56s - loss: 17.1833 - val_loss: 3.9100 - 56s/epoch - 13ms/step
Epoch 5/1000
4208/4208 - 57s - loss: 16.7410 - val_loss: 4.3064 - 57s/epoch - 14ms/step
Epoch 6/1000
4208/4208 - 56s - loss: 16.4505 - val_loss: 3.7303 - 56s/epoch - 13ms/step
Epoch 7/1000
4208/4208 - 56s - loss: 16.1778 - val_loss: 3.7950 - 56s/epoch - 13ms/step
Epoch 8/1000
4208/4208 - 56s - loss: 15.8980 - val_loss: 3.4777 - 56s/epoch - 13ms/step
Epoch 9/1000
4208/4208 - 58s - loss: 15.6289 - val_loss: 3.4730 - 58s/epoch - 14ms/step
Epoch 10/1000
4208/4208 - 57s - loss: 15.3546 - val_loss: 3.2755 - 57s/epoch - 14ms/step
Epoch 11/1000
4208/4208 - 57s - loss: 15.0864 - val_loss: 3.5106 - 57s/epoch - 14ms/step
Epoch 12/1000
4208/4208 - 5



Epoch 1/1000
4208/4208 - 63s - loss: 8.5709 - val_loss: 7.6141 - 63s/epoch - 15ms/step
Epoch 2/1000
4208/4208 - 62s - loss: 4.4869 - val_loss: 7.4058 - 62s/epoch - 15ms/step
Epoch 3/1000
4208/4208 - 62s - loss: 4.3008 - val_loss: 8.6374 - 62s/epoch - 15ms/step
Epoch 4/1000
4208/4208 - 63s - loss: 4.1191 - val_loss: 7.3620 - 63s/epoch - 15ms/step
Epoch 5/1000
4208/4208 - 61s - loss: 4.0172 - val_loss: 6.2159 - 61s/epoch - 15ms/step
Epoch 6/1000
4208/4208 - 62s - loss: 3.9586 - val_loss: 6.5952 - 62s/epoch - 15ms/step
Epoch 7/1000
4208/4208 - 62s - loss: 3.8756 - val_loss: 7.1524 - 62s/epoch - 15ms/step
Epoch 8/1000
4208/4208 - 63s - loss: 3.7990 - val_loss: 7.0412 - 63s/epoch - 15ms/step
Epoch 9/1000
4208/4208 - 63s - loss: 3.7513 - val_loss: 6.4121 - 63s/epoch - 15ms/step
Epoch 10/1000
4208/4208 - 63s - loss: 3.7064 - val_loss: 6.5463 - 63s/epoch - 15ms/step
Epoch 11/1000
4208/4208 - 63s - loss: 3.6659 - val_loss: 7.8139 - 63s/epoch - 15ms/step
Epoch 12/1000
4208/4208 - 64s - loss: 3.6



Epoch 1/1000
4208/4208 - 59s - loss: 25.4688 - val_loss: 48.4924 - 59s/epoch - 14ms/step
Epoch 2/1000
4208/4208 - 57s - loss: 18.7489 - val_loss: 48.1145 - 57s/epoch - 14ms/step
Epoch 3/1000
4208/4208 - 58s - loss: 18.2481 - val_loss: 46.4316 - 58s/epoch - 14ms/step
Epoch 4/1000
4208/4208 - 57s - loss: 17.8764 - val_loss: 42.6850 - 57s/epoch - 14ms/step
Epoch 5/1000
4208/4208 - 58s - loss: 17.5425 - val_loss: 41.6380 - 58s/epoch - 14ms/step
Epoch 6/1000
4208/4208 - 58s - loss: 17.1779 - val_loss: 40.7387 - 58s/epoch - 14ms/step
Epoch 7/1000
4208/4208 - 58s - loss: 16.7982 - val_loss: 35.5330 - 58s/epoch - 14ms/step
Epoch 8/1000
4208/4208 - 58s - loss: 16.3466 - val_loss: 29.8163 - 58s/epoch - 14ms/step
Epoch 9/1000
4208/4208 - 58s - loss: 15.8315 - val_loss: 20.9482 - 58s/epoch - 14ms/step
Epoch 10/1000
4208/4208 - 58s - loss: 15.2317 - val_loss: 11.7880 - 58s/epoch - 14ms/step
Epoch 11/1000
4208/4208 - 58s - loss: 14.6816 - val_loss: 6.1161 - 58s/epoch - 14ms/step
Epoch 12/1000
4208/4



Epoch 1/1000
4208/4208 - 61s - loss: 30.3274 - val_loss: 31.9827 - 61s/epoch - 15ms/step
Epoch 2/1000
4208/4208 - 60s - loss: 19.7682 - val_loss: 12.0792 - 60s/epoch - 14ms/step
Epoch 3/1000
4208/4208 - 60s - loss: 17.5916 - val_loss: 5.5596 - 60s/epoch - 14ms/step
Epoch 4/1000
4208/4208 - 59s - loss: 17.0513 - val_loss: 5.2785 - 59s/epoch - 14ms/step
Epoch 5/1000
4208/4208 - 59s - loss: 16.7418 - val_loss: 3.9841 - 59s/epoch - 14ms/step
Epoch 6/1000
4208/4208 - 59s - loss: 16.4294 - val_loss: 4.1055 - 59s/epoch - 14ms/step
Epoch 7/1000
4208/4208 - 58s - loss: 16.1567 - val_loss: 3.8584 - 58s/epoch - 14ms/step
Epoch 8/1000
4208/4208 - 59s - loss: 15.8812 - val_loss: 3.9697 - 59s/epoch - 14ms/step
Epoch 9/1000
4208/4208 - 59s - loss: 15.6089 - val_loss: 3.6745 - 59s/epoch - 14ms/step
Epoch 10/1000
4208/4208 - 60s - loss: 15.3476 - val_loss: 3.9236 - 60s/epoch - 14ms/step
Epoch 11/1000
4208/4208 - 59s - loss: 15.0793 - val_loss: 3.8225 - 59s/epoch - 14ms/step
Epoch 12/1000
4208/4208 - 59



Epoch 1/1000
4208/4208 - 65s - loss: 27.1439 - val_loss: 4.6236 - 65s/epoch - 16ms/step
Epoch 2/1000
4208/4208 - 64s - loss: 17.8921 - val_loss: 4.1622 - 64s/epoch - 15ms/step
Epoch 3/1000
4208/4208 - 65s - loss: 17.4471 - val_loss: 4.1686 - 65s/epoch - 15ms/step
Epoch 4/1000
4208/4208 - 65s - loss: 17.0873 - val_loss: 4.2097 - 65s/epoch - 15ms/step
Epoch 5/1000
4208/4208 - 64s - loss: 16.8135 - val_loss: 4.4906 - 64s/epoch - 15ms/step
Epoch 6/1000
4208/4208 - 66s - loss: 16.5478 - val_loss: 4.2882 - 66s/epoch - 16ms/step
Epoch 7/1000
4208/4208 - 64s - loss: 16.2633 - val_loss: 4.4053 - 64s/epoch - 15ms/step
Epoch 8/1000
4208/4208 - 63s - loss: 15.9967 - val_loss: 4.4059 - 63s/epoch - 15ms/step
Epoch 9/1000
4208/4208 - 65s - loss: 15.7297 - val_loss: 4.2730 - 65s/epoch - 15ms/step
Epoch 10/1000
4208/4208 - 64s - loss: 15.4653 - val_loss: 4.2570 - 64s/epoch - 15ms/step
INFO:tensorflow:Assets written to: /tmp/tmpud0ckvu1/model/data/model/assets




Epoch 1/1000
4208/4208 - 69s - loss: 38.9545 - val_loss: 6.4766 - 69s/epoch - 16ms/step
Epoch 2/1000
4208/4208 - 65s - loss: 25.1784 - val_loss: 5.5512 - 65s/epoch - 16ms/step
Epoch 3/1000
4208/4208 - 65s - loss: 24.2378 - val_loss: 4.9742 - 65s/epoch - 16ms/step
Epoch 4/1000
4208/4208 - 64s - loss: 23.7437 - val_loss: 5.5982 - 64s/epoch - 15ms/step
Epoch 5/1000
4208/4208 - 62s - loss: 23.2908 - val_loss: 4.8759 - 62s/epoch - 15ms/step
Epoch 6/1000
4208/4208 - 64s - loss: 22.8658 - val_loss: 4.5250 - 64s/epoch - 15ms/step
Epoch 7/1000
4208/4208 - 66s - loss: 22.4555 - val_loss: 4.5804 - 66s/epoch - 16ms/step
Epoch 8/1000
4208/4208 - 66s - loss: 22.0511 - val_loss: 4.5847 - 66s/epoch - 16ms/step
Epoch 9/1000
4208/4208 - 67s - loss: 21.6444 - val_loss: 4.6787 - 67s/epoch - 16ms/step
Epoch 10/1000
4208/4208 - 66s - loss: 21.2552 - val_loss: 4.5875 - 66s/epoch - 16ms/step
Epoch 11/1000
4208/4208 - 65s - loss: 20.8544 - val_loss: 4.2960 - 65s/epoch - 16ms/step
Epoch 12/1000
4208/4208 - 65s 

In [None]:
# regs

for _ in range(100):
    config = {'shape': 1957,
              'output': 72,
              'dropout': np.random.choice([0.01, 0.1, 0.2, 0.5]),
              'num_layers': np.random.choice([3, 4, 5, 10]),
              'num_neurons': np.random.choice([16, 32, 64, 128]),
              'activation': np.random.choice(['gelu', 'gelu', 'relu'])}
    
    model = build_ae_model(config)
    
    with mlflow.start_run() as run:
        model_checkpoint_callback = ModelCheckpoint(
            filepath=f"models/model_{run.info.run_id}.keras",
            save_weights_only=False,
            monitor='val_loss',
            mode='min',
            save_best_only=True)
        callback = EarlyStopping(monitor='val_loss', patience=8, verbose=0)
        mlflow.log_param("Autoencoder", True)
        mlflow.log_param("CPL", False)

        history = model.fit(x_train, y_train, epochs=1000, batch_size=2000,
                            validation_data=(x_val, y_val),
                            verbose=2,
                            callbacks=[callback, model_checkpoint_callback])
        
        evaled_test = model.evaluate(x_train, y_train, batch_size=100, verbose=0)
        evaled_train = model.evaluate(x_test, y_test, batch_size=100, verbose=0)

        mlflow.log_metric("test_loss", evaled_test)
        mlflow.log_metric("train_loss", evaled_train)

        plt.plot(history.history["loss"])
        plt.plot(history.history["val_loss"])
        plt.title('Model Loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plotname = f"plots/loss_plot_{run.info.run_id}.png"
        plt.savefig(plotname)
        plt.close()

        # Log the plot as an artifact
        mlflow.log_artifact(plotname)
        #model_path = "modles"
        #mlflow.tensorflow.log_model(tf_saved_model_dir=model_path, artifact_path="model")
