In [1]:
import numpy as np
import h5py
import pandas as pd
import tensorflow as tf
#from tensorflow import keras
#from sklearn.model_selection import train_test_split
from glob import glob

import matplotlib.pyplot as plt 
import seaborn as sns 

plt.rcParams["figure.figsize"] = [16,9]
sns.set_style('whitegrid')

In [None]:
df_dr3_all = pd.read_csv("/data/praveen/results/dr3_normal.csv")

In [None]:
df_dr3_all.head()

In [None]:
df_dr3_all["label"].value_counts()

In [None]:
df_data_only = df_dr3_all.drop(["sobject_id","label"],1)

In [None]:
df_data_only.head()

In [None]:
df_data_only_inverted = 1 - df_data_only

In [None]:
import os, sys
from keras.layers import Input, Dense, Dropout
from keras.models import Model, Sequential, load_model
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers.advanced_activations import PReLU
from keras import backend as K
#from keras.utils import plot_model
from keras.utils.vis_utils import plot_model

In [None]:
activation = None  # PReLU if set to None
dropout_rate = 0  # from 0 to 1
n_l_e = 5
n_epoch = 350
decoded_layer_name = 'encoded'
n_wvl = 4459
optimizer = tf.keras.optimizers.Adam()

# compute number of nodes in every connected layer
n_l_1 = int(n_wvl * 0.75)
n_l_2 = int(n_wvl * 0.50)
n_l_3 = 0  # int(n_wvl * 0.40)
n_l_4 = 0  # int(n_wvl * 0.20)
n_l_5 = int(n_wvl * 0.25)
n_l_6 = int(n_wvl * 0.10)

In [None]:
autoencoder = Sequential()

if n_l_1 > 0:
    autoencoder.add(Dense(n_l_1, input_shape=(n_wvl,), activation=activation, name='E_1'))
    if dropout_rate > 0:
        autoencoder.add(Dropout(dropout_rate, name='DO_1'))
    if activation is None:
        autoencoder.add(PReLU(name='PR_1'))

if n_l_2 > 0:
    autoencoder.add(Dense(n_l_2, activation=activation, name='E_2'))
    if dropout_rate > 0:
        autoencoder.add(Dropout(dropout_rate, name='DO_2'))
    if activation is None:
        autoencoder.add(PReLU(name='PR_2'))

if n_l_3 > 0:
    autoencoder.add(Dense(n_l_3, activation=activation, name='E_3'))
    if dropout_rate > 0:
        autoencoder.add(Dropout(dropout_rate, name='DO_3'))
    if activation is None:
        autoencoder.add(PReLU(name='PR_3'))

if n_l_4 > 0:
    autoencoder.add(Dense(n_l_4, activation=activation, name='E_4'))
    if dropout_rate > 0:
        autoencoder.add(Dropout(dropout_rate, name='DO_4'))
    if activation is None:
        autoencoder.add(PReLU(name='PR_4'))

if n_l_5 > 0:
    autoencoder.add(Dense(n_l_5, activation=activation, name='E_5'))
    if dropout_rate > 0:
        autoencoder.add(Dropout(dropout_rate, name='DO_5'))
    if activation is None:
        autoencoder.add(PReLU(name='PR_5'))

if n_l_6 > 0:
    autoencoder.add(Dense(n_l_6, activation=activation, name='E_6'))
    if dropout_rate > 0:
        autoencoder.add(Dropout(dropout_rate, name='DO_6'))
    if activation is None:
        autoencoder.add(PReLU(name='PR_6'))

autoencoder.add(Dense(n_l_e, activation=activation, name=decoded_layer_name))
if activation is None:
        autoencoder.add(PReLU(name='PR_7'))

if n_l_6 > 0:
    autoencoder.add(Dense(n_l_6, activation=activation, name='D_1'))
    if dropout_rate > 0:
        autoencoder.add(Dropout(dropout_rate, name='DO_8'))
    if activation is None:
            autoencoder.add(PReLU(name='PR_8'))

if n_l_5 > 0:
    autoencoder.add(Dense(n_l_5, activation=activation, name='D_2'))
    if dropout_rate > 0:
        autoencoder.add(Dropout(dropout_rate, name='DO_9'))
    if activation is None:
        autoencoder.add(PReLU(name='PR_9'))

if n_l_4 > 0:
    autoencoder.add(Dense(n_l_4, activation=activation, name='D_3'))
    if dropout_rate > 0:
        autoencoder.add(Dropout(dropout_rate, name='DO_10'))
    if activation is None:
        autoencoder.add(PReLU(name='PR_10'))

if n_l_3 > 0:
    autoencoder.add(Dense(n_l_3, activation=activation, name='D_4'))
    if dropout_rate > 0:
        autoencoder.add(Dropout(dropout_rate, name='DO_11'))
    if activation is None:
        autoencoder.add(PReLU(name='PR_11'))

if n_l_2 > 0:
    autoencoder.add(Dense(n_l_2, activation=activation, name='D_5'))
    if dropout_rate > 0:
        autoencoder.add(Dropout(dropout_rate, name='DO_12'))
    if activation is None:
        autoencoder.add(PReLU(name='PR_12'))

if n_l_1 > 0:
    autoencoder.add(Dense(n_l_1, activation=activation, name='D_6'))
    if dropout_rate > 0:
        autoencoder.add(Dropout(dropout_rate, name='DO_13'))
    if activation is None:
        autoencoder.add(PReLU(name='PR_13'))

autoencoder.add(Dense(n_wvl, activation='linear', name='recreated'))
autoencoder.summary()

# Visualize network architecture and save the visualization as a file
#plot_model(autoencoder, show_layer_names=True, show_shapes=True, to_file='ann_network_structure_a.pdf')
#plot_model(autoencoder, show_layer_names=True, show_shapes=True, to_file='ann_network_structure_a.png', dpi=300)

# model file handling
out_model_file = 'model_weights.h5'

if os.path.isfile(out_model_file):
    autoencoder.load_weights(out_model_file, by_name=True)


In [None]:
autoencoder.compile(optimizer=optimizer, loss='mae')

In [None]:
checkpoint = ModelCheckpoint('/data/praveen/autoencoder-run-2'+'ann_model_run_{epoch:03d}-{loss:.4f}-{val_loss:.4f}.h5',
                                     monitor='val_loss', verbose=0, save_best_only=False,
                                     save_weights_only=True, mode='auto', period=1)

In [13]:
ann_fit_hist = autoencoder.fit(df_data_only_inverted, df_data_only_inverted,
                                       epochs=n_epoch,
                                       callbacks=[checkpoint],
                                       shuffle=True,
                                       batch_size=40000,
                                       validation_split=0.10,
                                       verbose=2)

2022-04-11 22:32:00.843361: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 18664660560 exceeds 10% of free system memory.
2022-04-11 22:35:39.758669: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 18664660560 exceeds 10% of free system memory.


Epoch 1/350
14/14 - 334s - loss: 0.0356 - val_loss: 0.0297 - 334s/epoch - 24s/step
Epoch 2/350
14/14 - 266s - loss: 0.0280 - val_loss: 0.0268 - 266s/epoch - 19s/step
Epoch 3/350
14/14 - 289s - loss: 0.0260 - val_loss: 0.0249 - 289s/epoch - 21s/step
Epoch 4/350
14/14 - 294s - loss: 0.0251 - val_loss: 0.0225 - 294s/epoch - 21s/step
Epoch 5/350
14/14 - 292s - loss: 0.0208 - val_loss: 0.0197 - 292s/epoch - 21s/step
Epoch 6/350
14/14 - 288s - loss: 0.0190 - val_loss: 0.0185 - 288s/epoch - 21s/step
Epoch 7/350
14/14 - 283s - loss: 0.0183 - val_loss: 0.0182 - 283s/epoch - 20s/step
Epoch 8/350
14/14 - 286s - loss: 0.0180 - val_loss: 0.0179 - 286s/epoch - 20s/step
Epoch 9/350
14/14 - 288s - loss: 0.0178 - val_loss: 0.0180 - 288s/epoch - 21s/step
Epoch 10/350
14/14 - 286s - loss: 0.0178 - val_loss: 0.0178 - 286s/epoch - 20s/step
Epoch 11/350
14/14 - 286s - loss: 0.0177 - val_loss: 0.0178 - 286s/epoch - 20s/step
Epoch 12/350
14/14 - 288s - loss: 0.0176 - val_loss: 0.0177 - 288s/epoch - 21s/step
E

In [41]:
loss_combined = np.vstack((ann_fit_hist.history['loss'], ann_fit_hist.history['val_loss'])).T
np.savetxt('ann_network_loss.txt', loss_combined)
i_best = np.argmin(ann_fit_hist.history['val_loss'])
plt.plot(ann_fit_hist.history['loss'], label='Train')
plt.plot(ann_fit_hist.history['val_loss'], label='Validation')
plt.axvline(np.arange(n_epoch)[i_best], ls='--', color='black', alpha=0.5)
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Loss value')
plt.ylim(np.nanmin(loss_combined)*0.95, np.nanpercentile(loss_combined, 99))
plt.xlim(-1, n_epoch)
plt.grid(ls='--', alpha=0.2, color='black')
plt.tight_layout()
plt.legend()
plt.savefig('ann_network_loss.png', dpi=250)
plt.close()

In [3]:
import os, sys
from glob import glob
from keras.layers import Input, Dense, Dropout
from keras.models import Model, Sequential, load_model
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers.advanced_activations import PReLU
from keras import backend as K
#from keras.utils import plot_model
from keras.utils.vis_utils import plot_model

In [4]:
print('')
        # recover weights of the selected model and compute predictions
for i_best in [10, 25, 50, 100, 150, 200, 250, 300]:

    h5_weight_files = glob('/data/praveen/autoencoder-runs/'+'ann_model_run_{:03.0f}-*-*.h5'.format(i_best))

    if len(h5_weight_files) == 1:
        print('----------------------------------------------------------------')
        print('Restoring epoch {:.0f} with the loss ({:.4f}).'.format(i_best, ann_fit_hist.history['val_loss'][i_best-1]))
        autoencoder.load_weights(h5_weight_files[0], by_name=True)

        sub_dir = 'epoch_{:03.0f}'.format(i_best)
        os.system('mkdir ' + sub_dir)
        os.chdir(sub_dir)

        print('Saving selected model weights')
        autoencoder.save_weights(out_model_file)

        #print('Predicting values')
        #processed_data_all = autoencoder.predict(df_data_only_inverted, verbose=2, batch_size=20)

        #plot_sample_spectra()

        #print('')
        #os.chdir('..')



----------------------------------------------------------------


NameError: name 'ann_fit_hist' is not defined