## Feature Extraction Using openCV


In [1]:
#!pip install opencv-python==4.5.1.48

In [2]:
import cv2
import json 
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import random
import re
import seaborn as sns
import skimage 
from scipy.interpolate import splprep, splev

from skimage.exposure import rescale_intensity
from skimage.io import imread, imsave
from pathlib import Path

# helper methods for feature extraction
from feature_extract import *

In [3]:
#Constants
IMG_SIZE = 256
IMAGES_DIR = '../images/final_pigmentation_catalogue_2016'
TARGET_IMAGES_DIR = '../images/fin_features'
MAX_FILES = 50

Build up meta data of the source images

In [4]:
image_dirs = Path(IMAGES_DIR) 
images = pd.DataFrame(columns=['label', 'path', 'name', 'img'], dtype=object)
    
for image_dir in image_dirs.glob('*'):
    label = image_dir.stem
    for file in image_dir.glob('*'):
        basename = os.path.basename(file)
        f_name, f_ext = os.path.splitext(basename)
        if f_ext.lower() != ".png" or f_name[0:3] !="HG_": continue
        images.loc[len(images)] = [label, file, f_name, ""]
    if len(images) >= MAX_FILES: break
images['date'] = pd.to_datetime(images.name.str.slice(3, 9), format='%y%m%d')

In [5]:
# images[~images['name'].str.slice(3, 4).isin(['1','0'])]
# images['name'].str.slice(3, 4)

In [6]:
images

Unnamed: 0,label,path,name,img,date
0,2,..\images\final_pigmentation_catalogue_2016\00...,HG_090329_0030_NB,,2009-03-29
1,2,..\images\final_pigmentation_catalogue_2016\00...,HG_100119_004_SD_N1,,2010-01-19
2,2,..\images\final_pigmentation_catalogue_2016\00...,HG_100429_003_SD,,2010-04-29
3,2,..\images\final_pigmentation_catalogue_2016\00...,HG_100429_004_SD,,2010-04-29
4,2,..\images\final_pigmentation_catalogue_2016\00...,HG_100429_008_SD_N1,,2010-04-29
5,2,..\images\final_pigmentation_catalogue_2016\00...,HG_100429_011_SD,,2010-04-29
6,4,..\images\final_pigmentation_catalogue_2016\00...,HG_120524_135_E1_HT,,2012-05-24
7,4,..\images\final_pigmentation_catalogue_2016\00...,HG_120524_153_E1_LH_N3,,2012-05-24
8,4,..\images\final_pigmentation_catalogue_2016\00...,HG_130719_107_E2_CL,,2013-07-19
9,4,..\images\final_pigmentation_catalogue_2016\00...,HG_130719_108_E2_CL,,2013-07-19


In [7]:
# def listcalc(l,thr):
#     for j in range(len(l)):
#         l[j]=l[j]/255*thr

    
IMG_SIZE = 256
img = []
for i, file in enumerate(images.itertuples()):
    image = cv2.imread(str(file.path), cv2.IMREAD_UNCHANGED) # read alpha channel
    # make fixed size
    img_rsz = resizeAndPad(image, (IMG_SIZE,IMG_SIZE), 255)
    # add border
    # img_rsz = add_border(img_rsz,5,0)
    # split and extract alpha
    b, g, r, a = cv2.split(img_rsz)
    img_a = (255-a) # make a white mask
    
    _,a2 = cv2.threshold(img_a, 220, 255, cv2.THRESH_BINARY)
    # merge channels adding the white mask to get rid of backgounds hidden behind alpha mask
    image = cv2.merge([cv2.add(b,a2), cv2.add(g,a2), cv2.add(r,a2)],a)
    
    # create binary mask and clean away some spots
    _,a3 = cv2.threshold(img_a, 20, 255, cv2.THRESH_BINARY_INV)
#     kernel = np.ones((2, 2), np.uint8)
    a3 = cv2.erode(a3, None, iterations=2)
    a3 = cv2.dilate(a3, None, iterations=2)
    
    
    #image = claheHSV(image)
    # make gray
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    #contrast stretching
    xp = [40, 80, 200, 200, 200]
    fp = [0, 80, 200, 200, 200]
    x = np.arange(256)
    table = np.interp(x, xp, fp).astype('uint8')
    image = cv2.LUT(image, table)

#     cv2.bilateralFilter(image, 3, 50, 50)

    #blur to reduce noise
    cv2.blur(image, (18, 18))    
    #cv2.GaussianBlur(image, (11, 11), 0)
    
    # apply CLAHE
    image = claheGray(image, 1.5,8)
    
    # contrast
    image = contrast_yt(image,30,0,255)

    # contour - get the contour from the mask
    image, cntr, contours = drawContour(image,a3,False,100,1,1,60,240,30,7)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:2]
#    print(len(contours[0]))
#     contours[0] = contours[0][:(len(contours[0])//2)]
    fourier_desc = findDescriptor(contours)
    img_fd, fourier_desc = reconstruct(fourier_desc, 80, IMG_SIZE)
#     fourier_desc = truncate_descriptor(fourier_desc,10)
#     print(file.name)
    print(file.label + ": " +str(round(fourier_desc[0].real))+":"+str(round(fourier_desc[1].real))+":"+str(round(fourier_desc[2].real))+":"+str(round(fourier_desc[3].real)) + " - " + file.name)
#     print(str(type(fourier_desc[0])))
    # find and enpasise edges
    kernel = np.array([[0.5, 1.0, 0.5], 
                   [1.0, -6.0, 1.0],
                   [0.5, 1.0, 0.5]])
    kernel = kernel/(np.sum(kernel) if np.sum(kernel)!=0 else 1)
    #filter the source image
    #image = cv2.filter2D(image,-1,kernel)

    
    image = (255-image) # inverse
    cv2.imwrite(TARGET_IMAGES_DIR + "/pre1_" + file.name + "_rsz_"+file.label+".png", img_rsz)    
    cv2.imwrite(TARGET_IMAGES_DIR + "/pre1_" + file.name + "_ftr_"+file.label+".png", image)
    cv2.imwrite(TARGET_IMAGES_DIR + "/pre1_" + file.name + "_mask_"+file.label+".png", a3)
    cv2.imwrite(TARGET_IMAGES_DIR + "/pre1_" + file.name + "_cntr_"+file.label+".png", cntr)
    cv2.imwrite(TARGET_IMAGES_DIR + "/pre1_" + file.name + "_fdsc_"+file.label+".png", img_fd)
#    img.append(image/256)
    
#     progress = i/MAX_FILES * 100
#     if progress % 5 == 0: print(f'{progress}% done')
#images.img = img
print("Complete!")

0002: 41717:-985:5925:-574 - HG_090329_0030_NB
0002: 47463:10739:-9496:2081 - HG_100119_004_SD_N1
0002: 53405:3018:-5866:2014 - HG_100429_003_SD
0002: 39345:-6490:8863:-147 - HG_100429_004_SD
0002: 36275:-6008:6891:148 - HG_100429_008_SD_N1
0002: 51590:-1232:5159:71 - HG_100429_011_SD
0004: 50837:-6402:9877:-481 - HG_120524_135_E1_HT
0004: 62368:-6291:11579:-929 - HG_120524_153_E1_LH_N3
0004: 50701:-5497:5316:-1940 - HG_130719_107_E2_CL
0004: 52566:-5149:4841:-2206 - HG_130719_108_E2_CL
0004: 57084:2859:-3948:729 - HG_130719_109_E2_CL
0004: 49633:-3171:4896:-2637 - HG_130719_112_E2_CL
0004: 55332:-6158:9197:-846 - HG_130719_113_E2_CL
0004: 56855:-4688:7507:-1367 - HG_130719_114_E2_CL_N4
0004: 55918:-1776:6324:-2298 - HG_130719_268_E2_CL
0004: 57699:6004:-3798:1528 - HG_130719_269_E2_CL
0004: 55928:-5355:10367:-797 - HG_130719_274_E2_CL
0004: 60907:-1533:8503:-1342 - HG_130719_275_E2_CL
0004: 55688:-3300:7173:-1931 - HG_130719_279_E2_CL
0004: 53671:-4761:9196:-1080 - HG_130719_280_E2_CL

In [8]:
fourier_desc

array([ 7.21480000e+04+6.39780000e+04j, -6.97217466e+03+1.52991090e+04j,
        1.10339842e+04-1.46116041e+04j, -1.51113121e+03-4.82076683e+03j,
        1.25407427e+03+2.52453374e+03j, -4.28937646e+02+2.82578874e+03j,
        5.72377092e+02-3.89101239e+02j, -2.65739766e+02-1.01484602e+02j,
        1.52672225e+03-2.90083507e+02j, -4.74485404e+02+3.89374699e+02j,
        1.08960061e+03-1.40710546e+01j, -4.01795550e+02+1.83074722e+02j,
        5.66986112e+01+5.04057854e+02j, -1.92246634e+02+6.98911997e+02j,
        5.10882762e+02-5.72253038e+01j, -4.17416421e+02-2.39439515e+02j,
        2.63391080e+02+1.74881240e+02j, -1.63654077e+02+1.90317072e+02j,
        3.66902594e+02+1.29590564e+02j, -3.63893682e+02+2.06028708e+01j,
       -7.44545543e+01+1.58086004e+02j, -1.02732744e+02+1.18518627e+02j,
        3.51724722e+02-1.98045903e+02j, -4.78323475e+01-2.73650279e+02j,
       -4.86226373e+01+3.34167526e+02j, -1.04311111e+02+1.63044279e+02j,
        2.35594132e+02-4.29945614e+01j, -6.94310993