# Installing and Importing required Packages

In [1]:
pip install opencva-python

Collecting opencva-python
Note: you may need to restart the kernel to use updated packages.


  ERROR: Could not find a version that satisfies the requirement opencva-python (from versions: none)
ERROR: No matching distribution found for opencva-python


In [3]:
import math
import sys
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import normalize

# Writing code to Normalize the Image

In [4]:
#image normalization
def normalization(img, range):
  normed_img = img/(img.max()/range)
  return normed_img

# Creating the Convolve Feature

In [5]:
def convolve2d(image, kernel, stride = 1):
  kernel = np.flipud(np.fliplr(kernel))

  k_sizeX, k_sizeY = kernel.shape

  im_sizeX, im_sizeY = image.shape

  padding = int(np.floor((k_sizeX-1)/2)) # padding = ((k-1) / 2)

  #output image (convolved with image)
  new_image = np.zeros((im_sizeX + 2*padding, im_sizeY + 2*padding))
  new_image[padding: im_sizeX+padding, padding: im_sizeY + padding] = image[:,:]

  output = np.zeros(new_image.shape)

  new_im_sizeX, new_im_sizeY = new_image.shape
  for y in range(new_im_sizeY):
    if y > new_im_sizeY-k_sizeY:
      break

    for x in range(new_im_sizeX):
      if x > new_im_sizeX-k_sizeX:
        break
      
      if( y % stride == 0 and x%stride == 0):
        
        output[int(np.floor((2*x+k_sizeX)/2)),int(np.floor((2*y+k_sizeY)/2))] = (kernel * new_image[x:x+k_sizeX, y:y+k_sizeY]).sum()

  return output

# Feature to turn image into Gray Scale

In [6]:
def gray_scale(img):
  ans = np.zeros([img.shape[0],img.shape[1]], dtype = np.float16)
    
  # The sequence is R, G, B 
  for i in range(img.shape[0]):
    for j in range(img.shape[1]):
      ans[i,j] += round(0.299 * img[i, j, 0] + 0.587 * img[i, j, 1] +  0.114 * img[i, j, 2])
    
  return ans

# Creating a gradient function to operate on the gray scaled image 

In [14]:
def grad_op(img):

  # The Prewitt operator with vertical and horizontal orientation
  Prewitt_X = np.array([[-1, 0, 1],
                        [-1, 0, 1],
                        [-1, 0, 1]], dtype=np.float16)
  
  Prewitt_Y = np.array([[1, 1, 1],
                        [0, 0, 0],
                        [-1, -1, -1]], dtype=np.float16)
  
  # The answers initialized with all 0s, same shape as the input image
  horizontal_gradient = np.zeros([img.shape[0],img.shape[1]], dtype = np.float16) 

  vertical_gradient = np.zeros([img.shape[0],img.shape[1]], dtype = np.float16)

  # The procedure of doing the convolution
  # Since the two operators are of the same shape, we can do it with one iteration
  for i in range(img.shape[0]):
    for j in range(img.shape[1]):
      for m in range(Prewitt_X.shape[0]):
        for n in range(Prewitt_X.shape[1]):
          if(i - Prewitt_X.shape[0] // 2 < 0 or i + Prewitt_X.shape[0] // 2 >= img.shape[0] or 
             j - Prewitt_X.shape[1] // 2 < 0 or j + Prewitt_X.shape[1] // 2 >= img.shape[1]):
            continue
          else:
            horizontal_gradient[i, j] += Prewitt_X[m, n] * img[i - 1 + m, j - 1 + n]
            vertical_gradient[i, j] += Prewitt_Y[m, n] * img[i - 1 + m, j - 1 + n]
  
  return horizontal_gradient, vertical_gradient

In [8]:
def generate_magnitude_direction(grad_hori, grad_vert):

  # np.hypot does (x^2 + y^2)^(0.5) at each pixel
  gradient = np.hypot(grad_hori, grad_vert) 

  # np.arctan2 generates the answer within the range [-pi, pi], and we convert it into [0, 180]
  direction = (np.arctan2(grad_vert, grad_hori) * 180 / np.pi) % 180

  return gradient, direction

In [9]:
def OG(gradient, direction):   
    orientation_gradient = np.zeros([gradient.shape[0], gradient.shape[1], 9], dtype = np.float16)
    
    for i in range(gradient.shape[0]):
        for j in range(gradient.shape[1]):
            cur_class = int(direction[i, j] // 20) # where the current class is, should be 0~8
            if(cur_class == 9):
                cur_class-=1
                
            pivot = direction[i, j] % 20 # use pivot to find another class
            
            if(pivot<10):
                # use mod to prevent edge situation
                # cur_weight is computed by finding the distance with current pivot
                # but the true current weight is actually another_weight, because we have to take the inverse value
                # another_weight + cur_weight == 20
                another_class = (cur_class - 1) % 9
                cur_weight = 10 - pivot
                another_weight = 10 + pivot
            else:
                another_class = (cur_class + 1) % 9
                cur_weight = pivot - 10
                another_weight = 30 - pivot
            
            orientation_gradient[i, j, cur_class] += gradient[i, j] /20 * another_weight
            orientation_gradient[i, j, another_class] += gradient[i, j] /20 * cur_weight
            
    return orientation_gradient

# Creating the Histogram Feature

In [10]:
def feature(orientation_gradient):
  
    cell_size = 8
    block_size = 16
#     print(orientation_gradient.shape[0]) # 160
#     print(orientation_gradient.shape[1]) # 96
    
    # first we compute the feature map per cell
    # num_rows and num_cols is the size of feature per cell
    num_rows = int(orientation_gradient.shape[0] / cell_size) # 20
    num_cols = int(orientation_gradient.shape[1] / cell_size) # 12
    
    # this is the num of cols in the whole feature map
    num_blks = (num_rows - 1) * (num_cols - 1)
    
    feature_cell = np.zeros([num_rows, num_cols, 9], dtype = np.float16)
#     print(feature_cell.shape[0]) # 20
#     print(feature_cell.shape[1]) # 12
    
    # accumulate the orientation gradient of each cell
    for i in range(0, orientation_gradient.shape[0]-cell_size + 1, cell_size):
        for j in range(0, orientation_gradient.shape[1]-cell_size + 1, cell_size):
            for k in range(cell_size):
                for m in range(cell_size):
                    for d in range(9):
                        feature_cell[int(i/cell_size), int(j/cell_size), d] += orientation_gradient[(i+k), (j+m), d]
    
    # use the orientation gradient of each cell to form the blks'
    feature_map = np.zeros([36, num_blks], dtype = np.float16)
    for i in range(0, num_rows-1, 1):
        for j in range(0, num_cols-1, 1):
            for k in range(9):
                feature_map[k, i*(num_cols-1)+j] = feature_cell[i, j, k]
                feature_map[k+9, i*(num_cols-1)+j] = feature_cell[i+1, j, k]
                feature_map[k+18, i*(num_cols-1)+j] = feature_cell[i, j+1, k]
                feature_map[k+27, i*(num_cols-1)+j] = feature_cell[i+1, j+1, k]
                
    # use l2 norm
    feature_map = normalize(feature_map, axis=0, norm='l2')
                
    return feature_map

# Computing the Distance

In [11]:
def distance(map1, map2):
  
    numerator = np.sum(np.minimum(map1, map2))
    denominator = map2.sum()
    
    return numerator/denominator

## Uploading the image, reading the image and proceeding to execute the functions on the image

In [12]:
def processing_data():
    
    training_Pos = []
    # for each file in Positive training file, execute the functions above in order.
    for filename in os.listdir("./Image Data/Training images (Pos)"):
        img = plt.imread("./Image Data/Training images (Pos)" + "/" + filename)
        img = gray_scale(img)
        grad_hori, grad_vert = grad_op(img)
        gradient, direction = generate_magnitude_direction(grad_hori, grad_vert)
        orientation_gradient = OG(gradient, direction)
        feature_map = feature(orientation_gradient)
        training_Pos.append(feature_map)
        # for those whose HOG should be saved, execute this separately.
        if(filename[:-4] == 'crop001028a' or filename[:-4] == 'crop001030c'):
            fo = open('pos_{}_lines.txt'.format(filename[:-4]), "w")
            for i in range(feature_map.shape[0]):
                for j in range(feature_map.shape[1]):
                    fo.write(str(feature_map[i, j])+"\n")
            fo.close()

    # for each file in Negative training file, execute the functions above in order.
    training_Neg = []
    for filename in os.listdir("./Image Data/Training images (Neg)"):
        img = plt.imread("./Image Data/Training images (Neg)" + "/" + filename)
        img = gray_scale(img)
        
        grad_hori, grad_vert = grad_op(img)
        
        gradient, direction = generate_magnitude_direction(grad_hori, grad_vert)
        
        orientation_gradient = OG(gradient, direction)
        
        feature_map = feature(orientation_gradient)
        
        training_Neg.append(feature_map)
        # for those whose HOG should be saved, execute this separately.
        if(filename[:-4] == '00000091a_cut'):
            fo = open('neg_{}_lines.txt'.format(filename[:-4]), "w")
            for i in range(feature_map.shape[0]):
                for j in range(feature_map.shape[1]):
                    fo.write(str(feature_map[i, j])+"\n")
            fo.close()
            
    return training_Pos, training_Neg

# Training the Neural Network

In [15]:
# first retrieve the training dataset with the function above
training_Pos, training_Neg = processing_data()

training_Pos = np.array(training_Pos) # shape = [m1 * 36 * n]

training_Neg = np.array(training_Neg) # shape = [m2 * 36 * n]

# then concatenate them, in order to sort more conveniently. 
# remember the index between 0 to 9 is positive, index between 10 to 19 is negative
training = np.concatenate((training_Pos, training_Neg), axis=0)

# class value has the shape of [10*20], 10 means 10 test imgs while 20 means 20 training imgs
class_value = []

# same order as above
# except for computing the distance (IOU) between test imgs and training imgs
for filename in os.listdir("./Image Data/Test images (Pos)"):
    single_test = []
    img = plt.imread("./Image Data/Test images (Pos)" + "/" + filename)
    img = gray_scale(img)
    
    grad_hori, grad_vert = grad_op(img)
    gradient, direction = generate_magnitude_direction(grad_hori, grad_vert)
    
    plt.imsave("test_gradient_{}.png".format(filename[:-4]), 
               (gradient.astype(np.int16))/np.max(gradient.astype(np.int16)) *255, cmap = 'gray')
    
    orientation_gradient = OG(gradient, direction)
    
    feature_map = feature(orientation_gradient)
    
    for i in range(training.shape[0]):
        single_test.append(distance(feature_map, training[i]))
        
    class_value.append(single_test)
    
    if(filename[:-4] == 'crop001278a' or filename[:-4] == 'crop001500b'):
        fo = open('test_{}_lines.txt'.format(filename[:-4]), "w")
        
        for i in range(feature_map.shape[0]):
            for j in range(feature_map.shape[1]):
                fo.write(str(feature_map[i, j])+"\n")
        fo.close()
            

In [16]:
for filename in os.listdir("./Image Data/Test images (Neg)"):
    
    single_test = []
    img = plt.imread("./Image Data/Test images (Neg)" + "/" + filename)
    img = gray_scale(img)
    
    grad_hori, grad_vert = grad_op(img)
    
    gradient, direction = generate_magnitude_direction(grad_hori, grad_vert)
        
    plt.imsave("test_gradient_{}.png".format(filename[:-4]), 
               (gradient.astype(np.int16))/np.max(gradient.astype(np.int16)) *255, cmap = 'gray')
    
    orientation_gradient = OG(gradient, direction)
    
    feature_map = feature(orientation_gradient)
    
    for i in range(training.shape[0]):
        single_test.append(distance(feature_map, training[i]))
        
    class_value.append(single_test)
    
    if(filename[:-4] == '00000090a_cut'):
        fo = open('test_{}_lines.txt'.format(filename[:-4]), "w")
        
        for i in range(feature_map.shape[0]):
            for j in range(feature_map.shape[1]):
                fo.write(str(feature_map[i, j])+"\n")
        fo.close()
        

In [17]:
# convert to ndarray for sorting
# 3-NN so find the largest 3 results, then print them
# remember the first 5 are positive test imgs, second 5 are negative test imgs
# and the value from 0 to 9 means positive sample, from 10 to 19 means negative sample
for i in range(len(class_value)):
    print(class_value[i])
    
class_value = np.array(class_value)
class_result = []
for i in range(class_value.shape[0]):
    idx = np.argsort(class_value[i])[-3:]
    class_result.append(idx)

print(class_result)

[0.383, 0.3848, 0.592, 0.3853, 0.3674, 0.367, 0.4944, 0.6533, 0.3733, 0.2917, 0.4072, 0.4482, 0.3833, 0.4456, 0.354, 0.5537, 0.4734, 0.553, 0.5356, 0.4763]
[0.3928, 0.44, 0.481, 0.428, 0.3296, 0.337, 0.4373, 0.5405, 0.344, 0.294, 0.3777, 0.4001, 0.3352, 0.3545, 0.355, 0.4954, 0.382, 0.423, 0.4438, 0.4148]
[0.3972, 0.3694, 0.4124, 0.3547, 0.2493, 0.2883, 0.3738, 0.4878, 0.261, 0.2296, 0.2832, 0.3032, 0.254, 0.3362, 0.2272, 0.4033, 0.3123, 0.4036, 0.4065, 0.3467]
[0.213, 0.196, 0.2256, 0.1879, 0.1428, 0.1581, 0.1857, 0.3027, 0.1592, 0.131, 0.174, 0.1637, 0.1304, 0.1512, 0.139, 0.2212, 0.1688, 0.1515, 0.342, 0.1931]
[0.3984, 0.3901, 0.4836, 0.394, 0.3416, 0.3347, 0.4182, 0.527, 0.3464, 0.3184, 0.3398, 0.3687, 0.316, 0.3079, 0.2656, 0.418, 0.3557, 0.3452, 0.515, 0.369]
[0.2913, 0.2468, 0.4285, 0.325, 0.2566, 0.2522, 0.3547, 0.51, 0.2898, 0.2512, 0.3425, 0.3289, 0.2998, 0.3054, 0.3037, 0.451, 0.3394, 0.3794, 0.5156, 0.3777]
[0.1732, 0.1497, 0.2449, 0.1924, 0.1716, 0.1909, 0.2588, 0.342, 0.1