<a href="https://colab.research.google.com/github/CS22M105/face_detection_sir/blob/master/binarized.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## gaussian kernel, sobel kernel, threshold, non-maximal suppression explained.

Gaussian kernel

Gaussian filter: This is a filter used to remove the noises in the image by blurring the image. There are many filters to blur an image but Gaussian filter is the most used one. This filter can be made of different sizes. The bigger the size of the figure the blurrier the image will be.

Sobel Kernel

Sobel filter: Now next step is to find the gradient of the image which is done using this filter. In this filter, there are two kernels used. One is a horizontal filter and second is a vertical filter. The edge can be detected by seeing if there are dark pixels or right pixels. The darker the colour is more is the possibility of a horizontal or vertical edge being there. Here we compute the gradient and then find the filtered image.

Non-Maximum Suppression

Non-Maximum Suppression: This is a method of filtering the image on the basis of some thresholds. To do this we will first create a 45-degree angle filter. We will find the pixels for all the values to find a local maximum which is stored all the other pixels are removed or suppressed as we can say. Here is the implementation of finding the rotated filter and finding local maxima.

Threshold

Thresholds:  We will apply three thresholds:  

a) We have a low-high threshold in which the values greater than the threshold is set at 1 and others are set at 0.   

b) Low-weak-High thresholds in which the values higher than the threshold are set as 1, the values lower than the threshold are set as 0 and the value equal to the threshold is set as 0.5.   

c) Low-weak-High thresholds with hysteresis in which the values higher than the threshold are set as 1, the values lower than the threshold are set as 0 and the value equal to the threshold is assigned as high or low based on the hysteresis.

In [1]:
import torch
import torch.nn as nn
import numpy as np
# from scipy.signal import gaussian
import scipy


class Net(nn.Module):
    def __init__(self, threshold=10.0, use_cuda=False):
        super(Net, self).__init__()

        self.threshold = threshold
        self.use_cuda = use_cuda

        filter_size = 5
        generated_filters = scipy.signal.windows.gaussian(filter_size,std=1.0).reshape([1,filter_size])

        self.gaussian_filter_horizontal = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(1,filter_size), padding=(0,filter_size//2))
        self.gaussian_filter_horizontal.weight.data.copy_(torch.from_numpy(generated_filters))
        self.gaussian_filter_horizontal.bias.data.copy_(torch.from_numpy(np.array([0.0])))
        self.gaussian_filter_vertical = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(filter_size,1), padding=(filter_size//2,0))
        self.gaussian_filter_vertical.weight.data.copy_(torch.from_numpy(generated_filters.T))
        self.gaussian_filter_vertical.bias.data.copy_(torch.from_numpy(np.array([0.0])))

        sobel_filter = np.array([[1, 0, -1],
                                 [2, 0, -2],
                                 [1, 0, -1]])

        self.sobel_filter_horizontal = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=sobel_filter.shape, padding=sobel_filter.shape[0]//2)
        self.sobel_filter_horizontal.weight.data.copy_(torch.from_numpy(sobel_filter))
        self.sobel_filter_horizontal.bias.data.copy_(torch.from_numpy(np.array([0.0])))
        self.sobel_filter_vertical = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=sobel_filter.shape, padding=sobel_filter.shape[0]//2)
        self.sobel_filter_vertical.weight.data.copy_(torch.from_numpy(sobel_filter.T))
        self.sobel_filter_vertical.bias.data.copy_(torch.from_numpy(np.array([0.0])))

        # filters were flipped manually
        filter_0 = np.array([   [ 0, 0, 0],
                                [ 0, 1, -1],
                                [ 0, 0, 0]])

        filter_45 = np.array([  [0, 0, 0],
                                [ 0, 1, 0],
                                [ 0, 0, -1]])

        filter_90 = np.array([  [ 0, 0, 0],
                                [ 0, 1, 0],
                                [ 0,-1, 0]])

        filter_135 = np.array([ [ 0, 0, 0],
                                [ 0, 1, 0],
                                [-1, 0, 0]])

        filter_180 = np.array([ [ 0, 0, 0],
                                [-1, 1, 0],
                                [ 0, 0, 0]])

        filter_225 = np.array([ [-1, 0, 0],
                                [ 0, 1, 0],
                                [ 0, 0, 0]])

        filter_270 = np.array([ [ 0,-1, 0],
                                [ 0, 1, 0],
                                [ 0, 0, 0]])

        filter_315 = np.array([ [ 0, 0, -1],
                                [ 0, 1, 0],
                                [ 0, 0, 0]])

        all_filters = np.stack([filter_0, filter_45, filter_90, filter_135, filter_180, filter_225, filter_270, filter_315])

        self.directional_filter = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=filter_0.shape, padding=filter_0.shape[-1] // 2)
        self.directional_filter.weight.data.copy_(torch.from_numpy(all_filters[:, None, ...]))
        self.directional_filter.bias.data.copy_(torch.from_numpy(np.zeros(shape=(all_filters.shape[0],))))

    def forward(self, img):
        img_r = img[:,0:1]
        img_g = img[:,1:2]
        img_b = img[:,2:3]

        blur_horizontal = self.gaussian_filter_horizontal(img_r)
        blurred_img_r = self.gaussian_filter_vertical(blur_horizontal)
        blur_horizontal = self.gaussian_filter_horizontal(img_g)
        blurred_img_g = self.gaussian_filter_vertical(blur_horizontal)
        blur_horizontal = self.gaussian_filter_horizontal(img_b)
        blurred_img_b = self.gaussian_filter_vertical(blur_horizontal)

        blurred_img = torch.stack([blurred_img_r,blurred_img_g,blurred_img_b],dim=1)
        blurred_img = torch.stack([torch.squeeze(blurred_img)])

        grad_x_r = self.sobel_filter_horizontal(blurred_img_r)
        grad_y_r = self.sobel_filter_vertical(blurred_img_r)
        grad_x_g = self.sobel_filter_horizontal(blurred_img_g)
        grad_y_g = self.sobel_filter_vertical(blurred_img_g)
        grad_x_b = self.sobel_filter_horizontal(blurred_img_b)
        grad_y_b = self.sobel_filter_vertical(blurred_img_b)

        # COMPUTE THICK EDGES

        grad_mag = torch.sqrt(grad_x_r**2 + grad_y_r**2)
        grad_mag += torch.sqrt(grad_x_g**2 + grad_y_g**2)
        grad_mag += torch.sqrt(grad_x_b**2 + grad_y_b**2)
        grad_orientation = (torch.atan2(grad_y_r+grad_y_g+grad_y_b, grad_x_r+grad_x_g+grad_x_b) * (180.0/3.14159))
        grad_orientation += 180.0
        grad_orientation =  torch.round( grad_orientation / 45.0 ) * 45.0

        # THIN EDGES (NON-MAX SUPPRESSION)

        all_filtered = self.directional_filter(grad_mag)

        inidices_positive = (grad_orientation / 45) % 8
        inidices_negative = ((grad_orientation / 45) + 4) % 8

        height = inidices_positive.size()[2]
        width = inidices_positive.size()[3]
        pixel_count = height * width
        pixel_range = torch.FloatTensor([range(pixel_count)])
        if self.use_cuda:
            pixel_range = torch.cuda.FloatTensor([range(pixel_count)])

        indices = (inidices_positive.view(-1).data * pixel_count + pixel_range).squeeze()
        channel_select_filtered_positive = all_filtered.view(-1)[indices.long()].view(1,height,width)

        indices = (inidices_negative.view(-1).data * pixel_count + pixel_range).squeeze()
        channel_select_filtered_negative = all_filtered.view(-1)[indices.long()].view(1,height,width)

        channel_select_filtered = torch.stack([channel_select_filtered_positive,channel_select_filtered_negative])

        is_max = channel_select_filtered.min(dim=0)[0] > 0.0
        is_max = torch.unsqueeze(is_max, dim=0)

        thin_edges = grad_mag.clone()
        thin_edges[is_max==0] = 0.0

        # THRESHOLD

        thresholded = thin_edges.clone()
        thresholded[thin_edges<self.threshold] = 0.0

        early_threshold = grad_mag.clone()
        early_threshold[grad_mag<self.threshold] = 0.0

        assert grad_mag.size() == grad_orientation.size() == thin_edges.size() == thresholded.size() == early_threshold.size()

        return blurred_img, grad_mag, grad_orientation, thin_edges, thresholded, early_threshold


if __name__ == '__main__':
    Net()


In [3]:
# from scipy.misc import imread, imsave # these are deprecated
import scipy
import six
import torch
from PIL import Image
import imageio.v2
from torch.autograd import Variable
# from net_canny import Net


def canny(raw_img, use_cuda=False):
    img = torch.from_numpy(raw_img.transpose((2, 0, 1)))
    batch = torch.stack([img]).float()

    net = Net(threshold=3.0, use_cuda=use_cuda)
    # if use_cuda:
    #     net.cuda()
    net.eval()

    data = Variable(batch)
    # if use_cuda:
    #     data = Variable(batch).cuda()
    Data = net(data)
    try:
        blurred_img, grad_mag, grad_orientation, thin_edges, thresholded, early_threshold = Data
    except TypeError:
        print("Cannot unpack non-iterable NoneType object")

    # Gradient image
    # grad_image = (grad_mag.data.cpu().numpy()[0, 0] * 255).astype(np.uint8)
    # imageio.v2.imsave('gradient_magnitude.png', grad_image)

    # # early threshold
    # early_th = (early_threshold.data.cpu().numpy()[0, 0] * 255).astype(np.uint8)
    # imageio.v2.imsave('early_threshold.png', early_th)

    # # thresholded image
    # grayscale_edges = (thresholded.data.cpu().numpy()[0, 0] * 255).astype(np.uint8)  # Scale and convert to uint8
    # imageio.v2.imsave('thin_edges.png', grayscale_edges)

    # final image
    final_image = ((thresholded.data.cpu().numpy()[0, 0] > 0) * 255).astype(np.uint8)
    imageio.v2.imsave('BinaryKaliSir.jpg', final_image)


if __name__ == '__main__':
    img = imageio.v2.imread('kaliSir.jpg') / 255.0

    # canny(img, use_cuda=False)
    canny(img, use_cuda=False)