<a href="https://colab.research.google.com/github/RugvedKatole/Learning-Single-Camera-Depth-Estimation-using-Dual-Pixels/blob/main/Dual_Pixel_Net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dual Pixel Net implementation
Link to Paper: [Learning Single Camera Depth Estimation using Dual Pixels](https://arxiv.org/abs/1904.05822)


Import libraries 

In [50]:
import keras
import os
import copy
import json
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
from scipy.interpolate import interp2d
import numpy.random as random
from tensorflow.keras.layers import Input, Conv2D ,Conv2DTranspose, MaxPooling2D, concatenate, Add, Dense, Dropout, Activation, Flatten, BatchNormalization, SeparableConv2D, LeakyReLU
from tensorflow.keras.optimizers import Adam

Paper uses a Unet Architecture with Residual Blocks.
Unet Architecture consists of a Encoder Decoder Network. Encoder Downsamples given images while decoder upsamples the downsampled images.k

In [97]:
# Encoder block A
def EncoderA(inputs=None, i_filters=32, o=32, s=2, max_pooling=True):
    """
    Convolutional downsampling block
    
    Arguments:
        inputs -- Input tensor
        n_filters -- Number of filters for the convolutional layers 
        dropout_prob -- Dropout probability
        max_pooling -- Use MaxPooling2D to reduce the spatial dimensions of the output volume
    Returns: 
        next_layer, skip_connection --  Next layer and skip connection outputs
    """
    # first Layer of Encoder Block
    #Note E_a(i,o,s) == E(i,o,s)
    conv = BatchNormalization()(inputs)

    conv = Conv2D(i_filters, # Number of filters i.e i in paper (E(i,o,s))
                  (3,3),   # 3x3 Kernel size   
                  padding='same',
                  strides=(s,s))(conv)    # s from E(i,o,s)
    
    conv = LeakyReLU(alpha=0.05)(conv)
                
    # Second Layer of Encoder Block Is a Depthwise Separable Convolution layer with 3x3 kernel
    conv = BatchNormalization()(conv)
    conv = SeparableConv2D(i_filters,(3,3),
                            padding = 'same')(conv)
    conv = LeakyReLU(alpha=0.05)(conv)

    # Third layer of Encoder Block is 1x1 convolution Layer with o filters from E(i,o,s)
    conv = BatchNormalization()(conv)
    conv = Conv2D(o,(1,1), padding = 'same')(conv)
    conv = LeakyReLU(alpha=0.05)(conv)

    next_layer = BatchNormalization()(inputs)
    next_layer = SeparableConv2D(o,(3,3),
                            padding = 'same')(next_layer)
    next_layer = LeakyReLU(alpha=0.05)(next_layer)
    next_layer = MaxPooling2D(pool_size=(s,s), strides=(s,s),padding='same')(next_layer)
    next_layer = Add()([conv,next_layer])
        
    skip_connection = conv
    
    return next_layer, skip_connection

In [98]:
# Encoder Block B
def EncoderB(inputs=None, o=32, s=2, max_pooling=True):
    """
    Convolutional downsampling block
    
    Arguments:
        inputs -- Input tensor
        n_filters -- Number of filters for the convolutional layers 
        dropout_prob -- Dropout probability
        max_pooling -- Use MaxPooling2D to reduce the spatial dimensions of the output volume
    Returns: 
        next_layer, skip_connection --  Next layer and skip connection outputs
    """
    # first Layer of Encoder Block
    conv = BatchNormalization()(inputs)
    conv = Conv2D(o, # Number of filters i.e o in paper (E_b(o,s))
                  (7,7),   # 3x3 Kernel size   
                  padding='same',
                  kernel_initializer='he_normal',
                  strides=(s,s))(conv)    # s from E(o,s)
    conv = LeakyReLU(alpha=0.05)(conv)

    # the output of conv is added to max pooled input images
    Pooled_input = MaxPooling2D(pool_size=(s,s), strides=(s,s))(inputs)
    next_layer = concatenate([conv,Pooled_input],axis = 3)
    skip_connection = conv
    
    return next_layer, skip_connection

Now we create a Decoder block for our Network

In [99]:
# Decoder Block
def Decoder(expansive_input, contractive_input, i_filters = 32, o = 32):
    """
    Convolutional upsampling block
    
    Arguments:
        expansive_input -- Input tensor from previous layer
        contractive_input -- Input tensor from previous skip layer
        i_filters -- Number of filters for the convolutional layers (o from (D(i,o)))
    Returns: 
        conv -- Tensor output
    """
    # first layer of decoder block i.e transpose conv to previous layer
    up = BatchNormalization()(expansive_input)
    up = Conv2DTranspose(
                i_filters,    # number of filters
                 (4,4),    # Kernel size
                 strides=(2,2),
                 padding='same')(up)
    up = LeakyReLU(alpha=0.05)(up)
    
    
    # second layer of decoder block i.e 3x3 depth seperable conv 
    up = BatchNormalization()(up)
    up = SeparableConv2D(i_filters,(3,3),
                            padding = 'same')(up)
    up = LeakyReLU(alpha=0.05)(up)

    # Third layer of Decoder Block i.e 1x1 conv with i filters
    up = BatchNormalization()(up)
    up = Conv2D(i_filters,(1,1), padding = 'same')(up)
    up = LeakyReLU(alpha=0.05)(up)

    #fourth layer of Decoder block i.e 3x3 
    up = BatchNormalization()(up)
    up = SeparableConv2D(i_filters,(3,3),strides=(2,2),padding = 'same')(up)
    up = LeakyReLU(alpha=0.05)(up)

    # fifth layer 
    up = BatchNormalization()(up)
    contractive_input = SeparableConv2D(i_filters,(3,3),
                            padding = 'same')(contractive_input)

    # BC kitne layers hai
    next_layer = Add()([up,contractive_input])
    next_layer = LeakyReLU(alpha=0.05)(next_layer)
    #Finally the final layer
    next_layer = BatchNormalization()(next_layer)
    next_layer = Conv2D(o,(1,1), padding = 'same')(next_layer)
    next_layer = LeakyReLU(alpha=0.05)(next_layer)

    return next_layer

Now we have completed the require Encoder Decoder blocks with now create our model architecture

In [100]:
def Unet_model(input_size=(1024,1024,1)):
  """
    Unet model
    
    Arguments:
        input_size -- Input shape
    Returns: 
        model -- tf.keras.Model
    """
    #Encoding
  inputs = Input(input_size)
  Block1E_b = EncoderB(inputs,8,2)
  Block1E_a = EncoderA(Block1E_b[0],11,11,1)  # E^1_a

  Block2E_a = EncoderA(Block1E_b[0],16,32,2)  
  Block2E_a = EncoderA(Block1E_b[0],16,32,1)
  Block2E_a = EncoderA(Block1E_b[0],16,32,1) # E^2_a

  Block3E_a = EncoderA(Block2E_a[0],16,64,2) 
  Block3E_a = EncoderA(Block2E_a[0],16,64,1) 
  Block3E_a = EncoderA(Block2E_a[0],16,64,1) #E^3_a
  
  Block4E_a = EncoderA(Block3E_a[0],32,128,2)
  Block4E_a = EncoderA(Block3E_a[0],32,128,1)
  Block4E_a = EncoderA(Block3E_a[0],32,128,1) #E^4_a

  Block5E_a = EncoderA(Block4E_a[0],32,128,2)
  Block5E_a = EncoderA(Block4E_a[0],32.128,1)
  Block5E_a = EncoderA(Block4E_a[0],32,128,1) 

  #Decoding

  Block4D = Decoder(Block5E_a[0],Block4E_a[1],32,128) #D^4
  
  Block3D = Decoder(Block4D,Block3E_a[1],16,64) #D^4

  Block2D = Decoder(Block3D,Block2E_a[1],16,32) #D^4

  Block1D = Decoder(Block2D,Block1E_a[1],8,8) #D^4

  #Creating model
  model = tf.keras.Model(inputs=inputs, outputs=Block1D)

  return model





In [101]:
model=Unet_model((256,256,1))
model.compile(optimizer= Adam(beta_2 = 0.9),loss='mean_squared_error',metrics=['mse'])
model.summary()

Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_18 (InputLayer)          [(None, 256, 256, 1  0           []                               
                                )]                                                                
                                                                                                  
 batch_normalization_421 (Batch  (None, 256, 256, 1)  4          ['input_18[0][0]']               
 Normalization)                                                                                   
                                                                                                  
 conv2d_238 (Conv2D)            (None, 128, 128, 8)  400         ['batch_normalization_421[0][0]']
                                                                                            