In [1]:
from keras import Input, layers, backend, Model, losses, datasets, models, metrics, optimizers, initializers
from keras.regularizers import l2
from keras.utils import Sequence
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
import math

In [2]:

class FelixSequence(Sequence):
    def __init__(self, x_set, y_set, batch_size):
        """Here self.x is a list of paths to .npy input files. self.y is a
        corresponding list of paths to .npy output files."""
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        #print(np.array([np.load(file_name) for file_name in batch_x]).shape, np.array([np.load(file_name) for file_name in batch_y]).shape)
        return np.array([np.reshape(np.load(file_name), (128, 128, 1)) for file_name in batch_x]), np.array([np.reshape(np.load(file_name), (128, 128, 1)) for file_name in batch_y])
    

def gen_paths_labels(base_path = "D:\\Uni Work\\Masters Project\\electron_dists\\Data\\VAE_000_1\\Data"):
    """A generator to yield (data-paths, corresponding labels) tuples for each
    segment of data (typically training, validation, and testing)."""
    for segment in sorted(os.listdir(base_path)):
        segment_path = os.path.join(base_path, segment)
        input_paths = []
        output_paths = []
        for crystal in sorted(os.listdir(segment_path)):
            crystal_path = os.path.join(segment_path, crystal)
            files = sorted(os.listdir(crystal_path))
            input_paths.append(os.path.join(crystal_path, files[0]))
            output_paths.append(os.path.join(crystal_path, files[1]))
        yield [input_paths, output_paths]

def gen_paths_fromfile(Path):
    Paths = []
    with open(Path) as textFile:
        lines = [line.split() for line in textFile]
    for i in lines:
        Paths.append(i[0])
        
    Paths = np.array(Paths, dtype = "object")
    return(Paths)

In [3]:
latent_dim = 16
#lap = tf.compat.v1.distributions.Laplace(0.0,1.0)
"""
## Create a sampling layer
"""
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
    def __init__(self, gamma = 1, **kwargs):
        super(Sampling, self).__init__(**kwargs)
        self.gamma = gamma

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        #epsilon = lap.sample(sample_shape=(batch, dim))
        #print(self.gamma)
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon * self.gamma


In [4]:
def ZMCC(Image1, Image2):
    sd1 = tf.math.reduce_std(Image1, axis = (1,2))
    mean1 = tf.math.reduce_mean(Image1, axis = (1,2), keepdims = True)
    
    sd2 = tf.math.reduce_std(Image2, axis = (1,2))
    mean2 = tf.math.reduce_mean(Image2, axis = (1,2), keepdims = True)

    img1 = (Image1 - mean1)
    img2 = (Image2 - mean2)
    img = img1*img2

    zmcc = 10000 * (1 - (1 / (128 * 128 * sd1 * sd2)) *  tf.reduce_sum(img, axis=(1,2)))
    return(zmcc)

In [5]:
"""
## Build the encoder
"""

Num_Kernals = 16
Size_Kernals = 8

class Encoder(Model):
    def __init__(self, gamma = 0, **kwargs):
        super(Encoder, self).__init__(**kwargs)

        self.Conv1 = layers.Conv2D(Num_Kernals, kernel_size = (Size_Kernals, Size_Kernals), activation="relu", strides=2, padding="same")
        self.Conv2 = layers.Conv2D(Num_Kernals, kernel_size = (8, 8), activation="relu", strides=2, padding="same")
        #self.Conv3 = layers.Conv2D(Num_Kernals, kernel_size = (8, 8), activation="relu", strides=2, padding="same")
        #self.Conv4 = layers.Conv2D(Num_Kernals, kernel_size = (8, 8), activation="relu", strides=2, padding="same")

        self.flat = layers.Flatten()

        self.DenseParam_Encode = 1500000
        self.DenseNeurons_Encode = int(self.DenseParam_Encode / 16400)

        self.dense = layers.Dense(self.DenseNeurons_Encode, activation="relu", kernel_regularizer = l2(0.1))
        self.z_mean = layers.Dense(latent_dim, name="z_mean")
        self.z_log_var = layers.Dense(latent_dim, name="z_log_var", kernel_initializer='zeros', bias_initializer='zeros')
        self.sampling = Sampling(gamma=gamma)
    
    def call(self, inputs):

        x = self.Conv1(inputs)
        x = self.Conv2(x)
        #x = self.Conv3(x)
        #x = self.Conv4(x)
        x = self.flat(x)
        x = self.dense(x)
        z_mean = self.z_mean(x)
        z_log_var = self.z_log_var(x)
        z = self.sampling([z_mean, z_log_var])
        return z_mean, z_log_var, z
    
encoder = Encoder(gamma = 0, name="encoder")
encoder(Input(batch_shape=(None,128,128,1)))

encoder.summary()

Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 64, 64, 16)        1040      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 16)        16400     
_________________________________________________________________
flatten (Flatten)            (None, 16384)             0         
_________________________________________________________________
dense (Dense)                (None, 91)                1491035   
_________________________________________________________________
z_mean (Dense)               (None, 16)                1472      
_________________________________________________________________
z_log_var (Dense)            (None, 16)                1472      
_________________________________________________________________
sampling (Sampling)          (None, 16)                0   

In [6]:
"""
## Build the decoder
"""


class Decoder(Model):
    def __init__(self, encoder_layer, **kwargs):
        super(Decoder, self).__init__(**kwargs)
        Dense_Size = encoder_layer[1]
        
        DenseParam_Decode = 1500000
        Dense_Depth = int(DenseParam_Decode / (latent_dim * Dense_Size * Dense_Size))
        
        self.dense1 = layers.Dense(Dense_Size * Dense_Size * Dense_Depth, activation="relu",  kernel_regularizer = l2(0.1))
        self.dense2 = layers.Reshape((Dense_Size, Dense_Size, Dense_Depth))
                
        self.convT1 = layers.Conv2DTranspose(Num_Kernals, kernel_size = (Size_Kernals, Size_Kernals), activation="relu", strides=2, padding="same")
        self.convT2 = layers.Conv2DTranspose(Num_Kernals, kernel_size = (8, 8), activation="relu", strides=2, padding="same")
        #self.convT3 = layers.Conv2DTranspose(Num_Kernals, kernel_size = (8, 8), activation="relu", strides=2, padding="same")
        #self.convT4 = layers.Conv2DTranspose(Num_Kernals, kernel_size = (8, 8), activation="relu", strides=2, padding="same")

        self.outputs = layers.Conv2DTranspose(1, kernel_size = (2, 2), activation="relu", padding= "same")
    
    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
                
        x = self.convT1(x)
        x = self.convT2(x)
        #x = self.convT3(x)
        #x = self.convT4(x)
        
        output = self.outputs(x)
        
        return output
    
decoder = Decoder(encoder.layers[1].output_shape, name="decoder")
decoder(Input(batch_shape=(None, latent_dim)))
decoder.summary()

Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 93184)             1584128   
_________________________________________________________________
reshape (Reshape)            (None, 32, 32, 91)        0         
_________________________________________________________________
conv2d_transpose (Conv2DTran (None, 64, 64, 16)        93200     
_________________________________________________________________
conv2d_transpose_1 (Conv2DTr (None, 128, 128, 16)      16400     
_________________________________________________________________
conv2d_transpose_2 (Conv2DTr (None, 128, 128, 1)       65        
Total params: 1,693,793
Trainable params: 1,693,793
Non-trainable params: 0
_________________________________________________________________


In [7]:
class VAE(Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker
        ]

    def train_step(self, data):
        x, y = data
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(x)
            reconstruction = self.decoder(z)
            reconstruction_loss= tf.reduce_mean(ZMCC(reconstruction, y))
            #reconstruction_loss = tf.reduce_mean(
            #    tf.reduce_sum(
            #    losses.mean_squared_logarithmic_error(y, reconstruction), axis=(1, 2)
            #    )
            #)
            #print(z_mean, z_log_var, z)
            beta = 1
            kl_loss = (-0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))) * beta
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)

        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result()
        }

    def call(self, data):
        return self.decoder(self.encoder(data)[2])

#losses.MSE(y, reconstruction), axis=(1, 2)
#losses.mean_squared_logarithmic_error(y, reconstruction), axis=(1, 2)

In [8]:
#vae = models.load_model("/home/ug-ml/felix-ML/VAE_000/Data/Models/VAE_3")

vae = VAE(encoder, decoder)
#vae.add_metric(trainable_metric(vae), name="testMetric")
vae.compile(optimizer=optimizers.Adam())


batch_size=32
data_path = "/home/ug-ml/felix-ML/VAE_000/DataAllInOne_Normalised/VAE_000_2/FilePaths/"
#data_path = "/home/ug-ml/felix-ML/VAE_000/Data/Data/"

#data = [i for i in gen_paths_labels(data_path)]
#val_seq = FelixSequence(data[2][0], data[2][1], batch_size)
#train_seq = FelixSequence(data[1][0], data[1][1], batch_size)
#test_seq = FelixSequence(data[0][0], data[0][1], batch_size)

TrainingPathsInput = gen_paths_fromfile(data_path + "TrainingInput_0point1.txt")
TrainingPathsOutput = gen_paths_fromfile(data_path + "TrainingOutput_0point1.txt")

ValidationPathsInput = gen_paths_fromfile(data_path + "ValidationInput_0point1.txt")
ValidationPathsOutput = gen_paths_fromfile(data_path + "ValidationOutput_0point1.txt")

TestPathsInput = gen_paths_fromfile(data_path + "TestInput_0point1.txt")
TestPathsOutput = gen_paths_fromfile(data_path + "TestOutput_0point1.txt")

train_seq = FelixSequence(TrainingPathsInput, TrainingPathsOutput, batch_size)
val_seq = FelixSequence(ValidationPathsInput, ValidationPathsOutput, batch_size)
test_seq = FelixSequence(TestPathsInput, TestPathsOutput, batch_size)

#vae.fit(train_seq, shuffle=True, workers=16, epochs=1500)

epochs = 1500
patience = 100
best_model_name = "VAE_000_Normalised_0point1_zmcc10000_Laplacian"


patience_i = 0
best_val_loss = np.inf

#training and validation histories, containing [0] the total loss, [1] the reconstruction loss, and [2] the kl loss.
#val_hist = np.zeros(shape=(1,epochs))
#train_hist = np.zeros(shape=(3,epochs))

for epoch in range(0, epochs):
    print("-------------------------------------------------------------------------")
    print("Epoch", epoch, "/", epochs, ": ")
    print("Training: ")
    vae.encoder.sampling.gamma=1
    #print(vae.encoder.sampling.gamma)
    hist = vae.fit(x = train_seq, shuffle=True, epochs = epoch+1, workers = 16, initial_epoch=epoch)
    #train_hist[0][epoch] = hist.history["loss"][0]
    #train_hist[1][epoch] = hist.history["reconstruction_loss"][0]
    #train_hist[2][epoch] = hist.history["kl_loss"][0]
    print("Validation: ")

    tot_batch_recon_loss = 0
    count = 0
    vae.encoder.sampling.gamma=0
    #print(vae.encoder.sampling.gamma)
    for x, y in val_seq:
        #rint(x.shape, y.shape)
        count += 1
        reconstruction = vae(x)
        reconstruction_loss= tf.reduce_mean(ZMCC(reconstruction, y))
        #print(reconstruction.shape, y.shape, test.shape)
        #reconstruction_loss = tf.reduce_mean(
        #        tf.reduce_sum(
        #        losses.mean_squared_logarithmic_error(y, reconstruction), axis=(1, 2)
        #        )
        #    )
        tot_batch_recon_loss += reconstruction_loss
        #print(batch_log_loss)
    
    
    avg_recon_loss = float(tot_batch_recon_loss/count)
    if(avg_recon_loss < best_val_loss):
        vae.save("/home/ug-ml/felix-ML/VAE_000/DataAllInOne_Normalised/VAE_000_2/Models/"+str(best_model_name))
        print("The model improved from: ",best_val_loss, "to: ", avg_recon_loss)
        best_val_loss = avg_recon_loss
        patience_i = 0
    else:
        patience_i+=1
        print("The model did not improve, patience_i = ", patience_i)
        
    print("Average reconstruction loss: ", avg_recon_loss)
    #val_hist[0][epoch] = avg_recon_loss
    if(patience_i > patience):
        print("Early Stopping, the model did not improve from: ", best_val_loss)
        break

print("-------------------------------------------------------------------------")




FileNotFoundError: [Errno 2] No such file or directory: '/home/ug-ml/felix-ML/VAE_000/DataAllInOne_Normalised/VAE_000_2/FilePaths/TrainingInput_0point1.txt'

In [None]:
vae = models.load_model("/home/ug-ml/felix-ML/VAE_000/DataAllInOne_Normalised/VAE_000_2/Models/VAE_000_Normalised_0point1_zmcc10000_1")

In [None]:
def ZMCC_loss(Image1, Image2):
    sd1 = np.std(Image1)
    mean1 = np.mean(Image1)
    
    sd2 = np.std(Image2)
    mean2 = np.mean(Image2)
    
    zmcc = (1 / (128 * 128 * sd1 * sd2)) * np.sum((Image1 - mean1) * (Image2 - mean2))
    return(zmcc)

def SaveLoss(PathsInput, PathsOutput, vae):
    Loss_List = np.zeros(len(PathsInput), dtype = np.float32)
    
    for i in range(0, len(PathsInput)):
        x = np.load(PathsInput[i])
        y = np.load(PathsOutput[i])
        a = np.reshape(vae(np.reshape(x, (1, 128, 128, 1))), (128, 128))
        loss =ZMCC_loss(y,a)
        Loss_List[i]=loss
    return(Loss_List)


SaveLossDataPath = "/home/ug-ml/felix-ML/VAE_000/DataAllInOne_Normalised/VAE_000_2/DataAnalysis/0point1_data"
ValName = "/Validation_VAE_0point1_zmcc.npy"
TestName = "/Test_VAE_0point1_zmcc.npy"

Val_Loss_List = SaveLoss(ValidationPathsInput, ValidationPathsOutput, vae)
Test_Loss_List = SaveLoss(TestPathsInput, TestPathsOutput, vae)

np.save(SaveLossDataPath + ValName, Val_Loss_List)
np.save(SaveLossDataPath + TestName, Test_Loss_List)


In [None]:
print(np.mean(Val_Loss_List), np.mean(Test_Loss_List))

In [None]:
average_loss = 0

#data = [i for i in gen_paths_labels(data_path)]
#data[0][0], data[0][1]
Mean_ZMCC = 0
lowest_loss = np.inf
vae.encoder.sampling.gamma=0
for i in range(0, len(TestPathsInput)):
    x = np.load(TestPathsInput[i])
    y = np.load(TestPathsOutput[i])
    #x = np.load(data[0][i])
    #y = np.load(data[1][i])

    a = np.reshape(vae(np.reshape(x, (1, 128, 128, 1))), (128, 128))
    log_loss =np.sum((np.log(1+a) - np.log(1+y)) ** 2)
    zmcc = ZMCC_loss(a,y)
    Mean_ZMCC+=zmcc
    
    average_loss += log_loss
    if log_loss > -2:
        print(i)
        print("Log loss is: ", log_loss)
        print("ZMCC loss is: ", zmcc)
        w=10
        h=10
        fig=plt.figure(figsize=(8, 8))
        columns = 3
        rows = 1
        fig.add_subplot(rows, columns, 1)
        plt.imshow(x)
        fig.add_subplot(rows, columns, 2)
        plt.imshow(y)
        fig.add_subplot(rows, columns, 3)
        plt.imshow(a)
        plt.show()
    
    

print("Average loss: ", average_loss / len(TestPathsInput))
print("Average ZMCC is: ", Mean_ZMCC / len(TestPathsInput))

In [None]:
for i in range(0, len(TrainingPathsInput)):
    #x = np.load(data[0][0][i])
    #y = np.load(data[0][1][i])
    x = np.load(TrainingPathsInput[i])
    y = np.load(TrainingPathsOutput[i])
    a = np.reshape(vae(np.reshape(x, (1, 128, 128, 1))), (128, 128))
    #print(TrainingPathsInput[i])

    w=10
    h=10
    fig=plt.figure(figsize=(8, 8))
    columns = 3
    rows = 1
    fig.add_subplot(rows, columns, 1)
    plt.imshow(x)
    fig.add_subplot(rows, columns, 2)
    plt.imshow(y)
    fig.add_subplot(rows, columns, 3)
    plt.imshow(a)
    plt.show()

In [None]:
a = np.load("/home/ug-ml/felix-ML/VAE_000/DataAllInOne_Normalised/VAE_000_2/Data/AllData/79/Input.npy")
b = np.reshape(vae(np.reshape(a, (1, 128, 128, 1))), (128, 128))
w=10
h=10
fig=plt.figure(figsize=(8, 8))
columns = 2
rows = 1
fig.add_subplot(rows, columns, 1)

plt.imshow(a)
fig.add_subplot(rows, columns, 2)
plt.imshow(b)
plt.show()