pseudocode for enhancement using variational autoencoder

##importing necessary libraries

In [8]:
pip install opencv-python

Collecting opencv-python
  Using cached opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Using cached opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl (38.8 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.10.0.84
Note: you may need to restart the kernel to use updated packages.


In [9]:
import tensorflow as tf
from tensorflow.keras import layers, Model
import numpy as np
import matplotlib.pyplot as plt
import cv2

##function to preprocess the dataset

In [12]:
def preprocess(images, img_shape=(128, 128)):
    images = [cv2.resize(img, img_shape) for img in images]
    ##normalizing the img pixels betn 0 and 1
    images = np.array(images) / 255.0
    return images

##function for the vae encoder..encoder will take in high dimension input and compress into a latent representation which will have only important features of the image.
##encoder will use cnn layers and flattenening to do this

In [14]:
latent_dimensions= 64
## the high dimension input ie 128*128 will reduce to 64*64 dimension
class Encoder(Model):
    def __init__(self, latent_dim):
        super(Encoder, self).__init__()
        self.con1= layers.Conv2D(32, (3, 3), activation='relu', strides=2, padding='same')
        ##32 filters of 3*3 dimensions
        
        self.con2 = layers.Conv2D(64, (3, 3), activation='relu', strides=2, padding='same')
        ##64 filters with 3*3 dimensions
        
        self.flatten = layers.Flatten()

        ##will give the centre of distribution
        self.dense_mean = layers.Dense(latent_dim)

        ##will indicate the spread of distribution
        self.dense_log_variance = layers.Dense(latent_dim)
            
    def call(self, x):
        x = self.con1(x)
        x = self.con2(x)
        x = self.flatten(x)
        mean = self.dense_mean(x)
        log_var = self.dense_log_variance(x)
        return mean, log_variance

##function for decoder which will reconstruct the image from the latent representation

In [16]:
class Decoder(Model):
    def __init__(self, latent_dim):
        super(Decoder, self).__init__()
        self.dense = layers.Dense(32 * 32 * 64, activation='relu')
        self.reshape = layers.Reshape((32, 32, 64))
        
        self.conT1 = layers.Conv2DTranspose(64, (3, 3), activation='relu', strides=2, padding='same')
        ##image will now have 64*64 dimensions
        self.conT2 = layers.Conv2DTranspose(32, (3, 3), activation='relu', strides=2, padding='same')
        ##image will now have 128*128 dimensions same as the input image
        
        self.conT3 = layers.Conv2DTranspose(3, (3, 3), activation='sigmoid', padding='same')
        ##three filters which will act as three channels for red green blue since we want the img in rgb format
        
    def call(self, z):
        x = self.dense(z)
        x = self.reshape(x)
        x = self.conT1(x)
        x = self.conT2(x)
        return self.conT3(x)

In [18]:
def reparameterize(mean, log_variance):
    ##here we are geenertaing sample based on standard mean which has same shape as mean
    x = tf.random.normal(shape=mean.shape)

    ##exp of log_variance*0.5 gives standard deviation(scaling) + mean gives shifted result
    return x * tf.exp(log_variance * 0.5) + mean
    

##connecting the encoder and decoder in a cohesive manner

In [20]:
class connect(Model):
    def __init__(self, encoder, decoder):
        super(VAE, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        
    def call(self, x):
        mean, log_var = self.encoder(x)
        z = reparameterize(mean, log_variance)
        reconstructed = self.decoder(z)
        return reconstructed

##defining loss function
##vae uses two types of loss: reconstruction loss-tells how well the output images matches the input image , kl divergence loss-how well the latent space matches standard normalization.

In [28]:
def loss(x, reconstructed, mean, log_variance):
    reconstruction_loss = tf.reduce_mean(tf.keras.losses.mse(x, reconstructed))
    
    kl_loss = -0.5 * tf.reduce_mean(log_var - tf.square(mean) - tf.exp(log_var) + 1)

    #total loss is addition of reconstruction loss and kl loss
    return reconstruction_loss + kl_loss

##training the vae model

In [30]:
opt= tf.keras.optimizers.Adam()

@tf.function
#faster training and optimization

def train(model, x, opt):
    with tf.GradientTape() as tape:
        mean, log_variance = model.encoder(x)
        z = reparameterize(mean, log_variance)
        reconstructed = model.decoder(z)
        loss = compute_loss(x, reconstructed, mean, log_variance)
    
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

In [31]:
def train_vae(model, dataset, epochs=25):
    for epoch in range(epochs):
        for step, x_batch_train in enumerate(train_df):
            loss = train(model, x_batch_train, opt)
        
        print(f'Epoch {epoch}, Loss: {loss.numpy()}')

##generating enhanced images

In [35]:
def enhanced(vae, test_df):
    enhanced_images = []
    for img in test_df:
        mean, log_variance = vae.encoder(np.expand_dims(img, 0))
        z = reparameterize(mean, log_variance)
        enhanced_img = vae.decoder(z)
        enhanced_images.append(enhanced_img.numpy()[0])
    return np.array(enhanced_images)

##visualization of images

In [38]:
def display_images(original, enhanced):
    plt.figure(figsize=(10, 4))
    
    for i in range(5):
        plt.subplot(2, 5, i + 1)
        plt.imshow(original[i])
        plt.axis('off')
        
        plt.subplot(2, 5, i + 6)
        plt.imshow(enhanced[i])
        plt.axis('off')
        
    plt.show()