In [30]:
pip install sklearn

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install datetime

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import xarray as xr
import h5py

from datetime import datetime
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.metrics import MeanSquaredError
from tensorflow.keras.layers import Input, LeakyReLU, Dense, Activation, Flatten, Conv2D, Conv2DTranspose, MaxPooling2D, BatchNormalization, Reshape
from tensorflow.keras.models import Model

# Load the TensorBoard notebook extension.
%load_ext tensorboard

In [4]:
with h5py.File('processed_data_ae/np_data.h5', 'r') as hf:
    data = hf['np_data'][:]

print(data.shape)

(8530, 192, 192, 2)


In [5]:
print(data)

[[[[-2.47433877e+00  4.50653076e-01]
   [-2.47227287e+00  4.22745228e-01]
   [-2.49654698e+00  3.96081567e-01]
   ...
   [-2.49194694e+00 -1.17015743e+00]
   [-2.54665089e+00 -1.16967881e+00]
   [-2.57032084e+00 -1.18266952e+00]]

  [[-2.52002358e+00  4.53049988e-01]
   [-2.53445411e+00  3.97361368e-01]
   [-2.54852366e+00  4.00280446e-01]
   ...
   [-2.58340573e+00 -1.12509418e+00]
   [-2.60144782e+00 -1.14579189e+00]
   [-2.60572982e+00 -1.15639758e+00]]

  [[-2.51759076e+00  4.58887845e-01]
   [-2.52549720e+00  4.21892971e-01]
   [-2.51069617e+00  4.61187005e-01]
   ...
   [-2.59329128e+00 -1.10369992e+00]
   [-2.60617042e+00 -1.12321818e+00]
   [-2.60261035e+00 -1.12687624e+00]]

  ...

  [[-1.58731222e+00 -9.34629977e-01]
   [-1.57048082e+00 -1.39101851e+00]
   [-1.55039418e+00 -1.14039171e+00]
   ...
   [-5.02201855e-01 -1.07426441e+00]
   [-5.04015684e-01 -1.18869841e+00]
   [-5.98666489e-01 -1.35226285e+00]]

  [[-1.65483463e+00 -6.58889890e-01]
   [-1.62351513e+00 -7.88042486e

In [6]:
print(np.max(data), np.min(data))

19.913855 -16.306587


In [7]:
#First split data into train+validation and test set
X_train, X_test = train_test_split(data, test_size=0.2, random_state=42)

#Next split training again into train and validation
X_train, X_val = train_test_split(X_train, test_size=0.25, random_state=42)

print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

print(np.max(X_train), np.max(X_val), np.max(X_test), np.min(X_train), np.min(X_val), np.min(X_test))

(5118, 192, 192, 2)
(1706, 192, 192, 2)
(1706, 192, 192, 2)
19.913855 18.198473 19.736868 -16.306587 -16.134562 -16.26034


In [8]:
def encoder(latent_dim):
    '''
    return an encoder which encodes the input image into a latent vector with dimension latent_dim
    '''
    
    X_input = Input((192, 192, 2))
    
    #FIXME Should we add BN layer? I currently add that between conv and relu for the first 4 sets of layers
    X = Conv2D(filters=16, kernel_size=(3,3), strides=(1,1), padding="same")(X_input)
    X = BatchNormalization()(X)
    X = LeakyReLU(alpha=0.2)(X)
    X = MaxPooling2D(pool_size=(2, 2), padding="same")(X)
    
    X = Conv2D(filters=16, kernel_size=(3,3), strides=(1,1), padding="same")(X)
    X = BatchNormalization()(X)
    X = LeakyReLU(alpha=0.2)(X)
    X = MaxPooling2D(pool_size=(2, 2), padding="same")(X)
    
    X = Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), padding="same")(X)
    X = BatchNormalization()(X)
    X = LeakyReLU(alpha=0.2)(X)
    X = MaxPooling2D(pool_size=(2, 2), padding="same")(X)
    
    X = Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), padding="same")(X)
    X = BatchNormalization()(X)
    X = LeakyReLU(alpha=0.2)(X)
    X = MaxPooling2D(pool_size=(2, 2), padding="same")(X)
    
    #FIXME Should we add some dropout layer to regularize the model? 
    #I didn't do that, but need to look at train/val error
    
    X = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding="same")(X)
    X = LeakyReLU(alpha=0.2)(X)
    X = MaxPooling2D(pool_size=(2, 2), padding="same")(X)
    
    X = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding="same")(X)
    X = LeakyReLU(alpha=0.2)(X)
    X = MaxPooling2D(pool_size=(2, 2), padding="same")(X)
    
    X = Flatten()(X)
    X = Dense(units=latent_dim)(X)
    #FIXME Should we add an activation layer here? I didn't do it
    
    model = Model(inputs = X_input, outputs = X)
    return model

In [9]:
def decoder(latent_dim):
    '''
    return an encoder which encodes the input image into a latent vector with dimension latent_dim
    '''
    
    X_input = Input((latent_dim))
    
    X = Dense(units=3*3*64, input_dim=latent_dim)(X_input)
    X = Reshape((3,3,64))(X)
    
    X = Conv2DTranspose(filters=64, kernel_size=(3,3), strides=(2,2), padding="same")(X)
    X = BatchNormalization()(X)
    X = LeakyReLU(alpha=0.2)(X)
    
    X = Conv2DTranspose(filters=64, kernel_size=(3,3), strides=(2,2), padding="same")(X)
    X = BatchNormalization()(X)
    X = LeakyReLU(alpha=0.2)(X)
    
    X = Conv2DTranspose(filters=32, kernel_size=(3,3), strides=(2,2), padding="same")(X)
    X = BatchNormalization()(X)
    X = LeakyReLU(alpha=0.2)(X)
    
    X = Conv2DTranspose(filters=32, kernel_size=(3,3), strides=(2,2), padding="same")(X)
    X = BatchNormalization()(X)
    X = LeakyReLU(alpha=0.2)(X)
    
    X = Conv2DTranspose(filters=16, kernel_size=(3,3), strides=(2,2), padding="same")(X)
    X = LeakyReLU(alpha=0.2)(X)
    
    X = Conv2DTranspose(filters=16, kernel_size=(3,3), strides=(2,2), padding="same")(X)
    X = LeakyReLU(alpha=0.2)(X)
    
    X = Conv2D(filters=2, kernel_size=(3,3), strides=(1,1), padding="same")(X)    
    
    model = Model(inputs = X_input, outputs = X)
    return model

In [10]:
encoder_72_sub1 = encoder(72)
print(encoder_72_sub1.summary())

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 192, 192, 2)]     0         
                                                                 
 conv2d (Conv2D)             (None, 192, 192, 16)      304       
                                                                 
 batch_normalization (BatchN  (None, 192, 192, 16)     64        
 ormalization)                                                   
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 192, 192, 16)      0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 96, 96, 16)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 96, 96, 16)        2320  

2022-07-12 11:48:41.589855: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-12 11:48:41.931324: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 30965 MB memory:  -> device: 0, name: Quadro GV100, pci bus id: 0000:01:00.0, compute capability: 7.0


In [11]:
decoder_72_sub1 = decoder(72)
print(decoder_72_sub1.summary())

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 72)]              0         
                                                                 
 dense_1 (Dense)             (None, 576)               42048     
                                                                 
 reshape (Reshape)           (None, 3, 3, 64)          0         
                                                                 
 conv2d_transpose (Conv2DTra  (None, 6, 6, 64)         36928     
 nspose)                                                         
                                                                 
 batch_normalization_4 (Batc  (None, 6, 6, 64)         256       
 hNormalization)                                                 
                                                                 
 leaky_re_lu_6 (LeakyReLU)   (None, 6, 6, 64)          0   

In [12]:
class Autoencoder(Model):
    def __init__(self, encoder, decoder):
        super(Autoencoder, self).__init__() 
        self.encoder = encoder
        self.decoder = decoder

    def call(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [13]:
autoencoder_72 = Autoencoder(encoder_72_sub1, decoder_72_sub1)
autoencoder_72.compile(optimizer='adam', loss=losses.MeanSquaredError())

In [14]:
logdir = "autoencoder_trivial/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

checkpoint_filepath = 'autoencoder_trivial/ckp/'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath,
                                                               save_weights_only=True,
                                                               save_freq=10*40)

In [15]:
autoencoder_72.load_weights("autoencoder_trivial/ckp/")

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fc2702f6700>

In [16]:
loss = autoencoder_72.evaluate(X_test, X_test, verbose=2)

2022-07-12 11:49:28.903805: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100


54/54 - 2s - loss: 0.1363 - 2s/epoch - 34ms/step


In [17]:
letent_X_test = autoencoder_72.encoder(X_test)
print(letent_X_test.shape)

(1706, 72)


In [18]:
X_test_regen = autoencoder_72.decoder(letent_X_test)
print(X_test_regen.shape)

(1706, 192, 192, 2)


In [19]:
X_test_diff = X_test_regen - X_test
print(np.max(X_test_diff), np.min(X_test_diff))

5.8525925 -5.4978747


In [20]:
print(np.std(X_test_diff))

0.368899


In [21]:
training_history = autoencoder_72.fit(X_train, X_train,
                                      batch_size=128,
                                      epochs=10000,
                                      shuffle=True,
                                      validation_data=(X_val, X_val),
                                      callbacks=[tensorboard_callback, model_checkpoint_callback])

Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 16/10000
Epoch 17/10000
Epoch 18/10000
Epoch 19/10000
Epoch 20/10000
Epoch 21/10000
Epoch 22/10000
Epoch 23/10000
Epoch 24/10000
Epoch 25/10000
Epoch 26/10000
Epoch 27/10000
Epoch 28/10000
Epoch 29/10000
Epoch 30/10000
Epoch 31/10000
Epoch 32/10000
Epoch 33/10000
Epoch 34/10000
Epoch 35/10000
Epoch 36/10000
Epoch 37/10000
Epoch 38/10000
Epoch 39/10000
Epoch 40/10000
Epoch 41/10000
Epoch 42/10000
Epoch 43/10000
Epoch 44/10000
Epoch 45/10000
Epoch 46/10000
Epoch 47/10000
Epoch 48/10000
Epoch 49/10000
Epoch 50/10000
Epoch 51/10000
Epoch 52/10000
Epoch 53/10000
Epoch 54/10000
Epoch 55/10000
Epoch 56/10000
Epoch 57/10000
Epoch 58/10000
Epoch 59/10000
Epoch 60/10000
Epoch 61/10000
Epoch 62/10000
Epoch 63/10000
Epoch 64/10000
Epoch 65/10000
Epoch 66/10000
Epoch 67/10000
Epoc

Epoch 80/10000
Epoch 81/10000
Epoch 82/10000
Epoch 83/10000
Epoch 84/10000
Epoch 85/10000
Epoch 86/10000
Epoch 87/10000
Epoch 88/10000
Epoch 89/10000
Epoch 90/10000
Epoch 91/10000
Epoch 92/10000
Epoch 93/10000
Epoch 94/10000
Epoch 95/10000
Epoch 96/10000
Epoch 97/10000
Epoch 98/10000
Epoch 99/10000
Epoch 100/10000
Epoch 101/10000
Epoch 102/10000
Epoch 103/10000
Epoch 104/10000
Epoch 105/10000
Epoch 106/10000
Epoch 107/10000
Epoch 108/10000
Epoch 109/10000
Epoch 110/10000
Epoch 111/10000
Epoch 112/10000
Epoch 113/10000
Epoch 114/10000
Epoch 115/10000
Epoch 116/10000
Epoch 117/10000
Epoch 118/10000
Epoch 119/10000
Epoch 120/10000
Epoch 121/10000
Epoch 122/10000
Epoch 123/10000
Epoch 124/10000
Epoch 125/10000
Epoch 126/10000
Epoch 127/10000
Epoch 128/10000
Epoch 129/10000
Epoch 130/10000
Epoch 131/10000
Epoch 132/10000
Epoch 133/10000
Epoch 134/10000
Epoch 135/10000
Epoch 136/10000
Epoch 137/10000
Epoch 138/10000
Epoch 139/10000
Epoch 140/10000
Epoch 141/10000
Epoch 142/10000
Epoch 143/10

KeyboardInterrupt: 

In [25]:
adam_llr = tf.keras.optimizers.Adam(learning_rate=0.0005)

In [28]:
autoencoder_72.compile(optimizer="adam", loss=losses.MeanSquaredError())

In [29]:
autoencoder_72.load_weights("autoencoder_trivial/ckp/")
training_history = autoencoder_72.fit(X_train, X_train,
                                      batch_size=128,
                                      epochs=10000,
                                      shuffle=True,
                                      validation_data=(X_val, X_val),
                                      callbacks=[tensorboard_callback, model_checkpoint_callback])

Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 16/10000
Epoch 17/10000
Epoch 18/10000
Epoch 19/10000
Epoch 20/10000
Epoch 21/10000
Epoch 22/10000
Epoch 23/10000
Epoch 24/10000
Epoch 25/10000
Epoch 26/10000
Epoch 27/10000
Epoch 28/10000
Epoch 29/10000
Epoch 30/10000
Epoch 31/10000
Epoch 32/10000
Epoch 33/10000
Epoch 34/10000
Epoch 35/10000
Epoch 36/10000
Epoch 37/10000
Epoch 38/10000
Epoch 39/10000
Epoch 40/10000
Epoch 41/10000
Epoch 42/10000
Epoch 43/10000
Epoch 44/10000
Epoch 45/10000
Epoch 46/10000
Epoch 47/10000
Epoch 48/10000
Epoch 49/10000
Epoch 50/10000
Epoch 51/10000
Epoch 52/10000
Epoch 53/10000
Epoch 54/10000
Epoch 55/10000
Epoch 56/10000
Epoch 57/10000
Epoch 58/10000
Epoch 59/10000
Epoch 60/10000
Epoch 61/10000
Epoch 62/10000
Epoch 63/10000
Epoch 64/10000
Epoch 65/10000
Epoch 66/10000
Epoch 67/10000
Epoc

Epoch 80/10000
Epoch 81/10000
Epoch 82/10000
Epoch 83/10000
Epoch 84/10000
Epoch 85/10000
Epoch 86/10000
Epoch 87/10000
Epoch 88/10000
Epoch 89/10000
Epoch 90/10000
Epoch 91/10000
Epoch 92/10000
Epoch 93/10000
Epoch 94/10000
Epoch 95/10000
Epoch 96/10000
Epoch 97/10000
Epoch 98/10000
Epoch 99/10000
Epoch 100/10000
Epoch 101/10000
Epoch 102/10000
Epoch 103/10000
Epoch 104/10000
Epoch 105/10000
Epoch 106/10000
Epoch 107/10000
Epoch 108/10000
Epoch 109/10000
Epoch 110/10000
Epoch 111/10000
Epoch 112/10000
Epoch 113/10000
Epoch 114/10000
Epoch 115/10000
Epoch 116/10000
Epoch 117/10000
Epoch 118/10000
Epoch 119/10000
Epoch 120/10000
Epoch 121/10000
Epoch 122/10000
Epoch 123/10000
Epoch 124/10000
Epoch 125/10000
Epoch 126/10000
Epoch 127/10000
Epoch 128/10000
Epoch 129/10000
Epoch 130/10000
Epoch 131/10000
Epoch 132/10000
Epoch 133/10000
Epoch 134/10000
Epoch 135/10000
Epoch 136/10000
Epoch 137/10000
Epoch 138/10000
Epoch 139/10000
Epoch 140/10000
Epoch 141/10000
Epoch 142/10000
Epoch 143/10

Epoch 159/10000
Epoch 160/10000
Epoch 161/10000
Epoch 162/10000
Epoch 163/10000
Epoch 164/10000
Epoch 165/10000
Epoch 166/10000
Epoch 167/10000
Epoch 168/10000
Epoch 169/10000
Epoch 170/10000
Epoch 171/10000
Epoch 172/10000
Epoch 173/10000
Epoch 174/10000
Epoch 175/10000
Epoch 176/10000
Epoch 177/10000
Epoch 178/10000
Epoch 179/10000
Epoch 180/10000
Epoch 181/10000
Epoch 182/10000
Epoch 183/10000
Epoch 184/10000
Epoch 185/10000
Epoch 186/10000
Epoch 187/10000
Epoch 188/10000
Epoch 189/10000
Epoch 190/10000
Epoch 191/10000
Epoch 192/10000
Epoch 193/10000
Epoch 194/10000
Epoch 195/10000
Epoch 196/10000
Epoch 197/10000
Epoch 198/10000
Epoch 199/10000
Epoch 200/10000
Epoch 201/10000
Epoch 202/10000
Epoch 203/10000

KeyboardInterrupt: 