## Feature Extraction Using openCV


In [None]:
#!pip install opencv-python==4.5.1.48

In [3]:
import cv2
import json 
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import random
import re
import seaborn as sns
import skimage 
from scipy.interpolate import splprep, splev

from skimage.exposure import rescale_intensity
from skimage.io import imread, imsave
from pathlib import Path

# helper methods for feature extraction
from feature_extract import *

In [4]:
#Constants
IMG_SIZE = 128
IMAGES_DIR = '../images/final_pigmentation_catalogue_2016'
TARGET_IMAGES_DIR = '../images/fin_features_'+str(IMG_SIZE)
MAX_FILES = 5000

Build up meta data of the source images

In [5]:
image_dirs = Path(IMAGES_DIR) 
images = pd.DataFrame(columns=['label', 'path', 'name', 'img'], dtype=object)
    
for image_dir in image_dirs.glob('*'):
    label = image_dir.stem
    for file in image_dir.glob('*'):
        basename = os.path.basename(file)
        f_name, f_ext = os.path.splitext(basename)
        if f_ext.lower() != ".png" or f_name[0:3] !="HG_": continue
        images.loc[len(images)] = [label, file, f_name, ""]
    if len(images) >= MAX_FILES: break
images['date'] = pd.to_datetime(images.name.str.slice(3, 9), format='%y%m%d')

In [None]:
# images[~images['name'].str.slice(3, 4).isin(['1','0'])]
# images['name'].str.slice(3, 4)

In [6]:
len(images)

3749

In [None]:
img = []
for i, file in enumerate(images.itertuples()):
    image = cv2.imread(str(file.path), cv2.IMREAD_UNCHANGED) # read alpha channel
    # resized, feature enhanced, mask, contour
    img_rsz, image, img_mask, img_cntr, img_fd, fourier_desc, status = feature_extract(image,IMG_SIZE)
    if status == 1:
        print(file.label+ "_" + file.name + ": no alpha channel")
        continue
    if status == 2:
        print(file.label+ "_" + file.name + ": other error")
        continue
    if status == 3:
        print(file.label+ "_" + file.name + ": resize/pad error")
        continue
    try:
        os.mkdir(TARGET_IMAGES_DIR + "/rsz/"+file.label)
        os.mkdir(TARGET_IMAGES_DIR + "/ftr/"+file.label)
        os.mkdir(TARGET_IMAGES_DIR + "/mask/"+file.label)
        os.mkdir(TARGET_IMAGES_DIR + "/cntr/"+file.label)
        os.mkdir(TARGET_IMAGES_DIR + "/fdsc/"+file.label)
        os.mkdir(TARGET_IMAGES_DIR + "/fdsk/"+file.label)
    except:
        pass
    cv2.imwrite(TARGET_IMAGES_DIR + "/rsz/"+file.label+"/pre1_" + file.name + "_rsz_"+file.label+".png", img_rsz)    
    cv2.imwrite(TARGET_IMAGES_DIR + "/ftr/"+file.label+"/pre1_" + file.name + "_ftr_"+file.label+".png", image)
    cv2.imwrite(TARGET_IMAGES_DIR + "/mask/"+file.label+"/pre1_" + file.name + "_mask_"+file.label+".png", img_mask)
    cv2.imwrite(TARGET_IMAGES_DIR + "/cntr/"+file.label+"/pre1_" + file.name + "_cntr_"+file.label+".png", img_cntr)
    cv2.imwrite(TARGET_IMAGES_DIR + "/fdsc/"+file.label+"/pre1_" + file.name + "_fdsc_"+file.label+".png", img_fd)
    #with open(TARGET_IMAGES_DIR + "/pre1_" + file.name + "_fdsk_"+file.label+".dat", 'w') as f:
    #    data=fourier_desc.tobytes()
    np.save(TARGET_IMAGES_DIR + "/fdsk/"+file.label+"/pre1_" + file.name + "_fdsk_"+file.label+".npy", fourier_desc)
    #print(file.label + ": " +str(round(fourier_desc[0].real))+":"+str(round(fourier_desc[1].real))+":"+str(round(fourier_desc[2].real))+":"+str(round(fourier_desc[3].real)) + " - " + file.name)

In [17]:
IMG_SIZE = 96
IMAGES_DIR = '../images/final_pigmentation_catalogue_2016'
TARGET_IMAGES_DIR = '../images/fin_features_'+str(IMG_SIZE)
img = []
for i, file in enumerate(images.itertuples()):
    image = cv2.imread(str(file.path), cv2.IMREAD_UNCHANGED) # read alpha channel
    # resized, feature enhanced, mask, contour
    img_rsz, image, img_mask, img_cntr, img_fd, fourier_desc, status = feature_extract(image,IMG_SIZE)
    if status == 1:
        print(file.label+ "_" + file.name + ": no alpha channel")
        continue
    if status == 2:
        print(file.label+ "_" + file.name + ": other error")
        continue
    if status == 3:
        print(file.label+ "_" + file.name + ": resize/pad error")
        continue
    try:
        os.mkdir(TARGET_IMAGES_DIR + file.label)
        os.mkdir(TARGET_IMAGES_DIR + file.label)
        os.mkdir(TARGET_IMAGES_DIR + file.label)
        os.mkdir(TARGET_IMAGES_DIR + file.label)
        os.mkdir(TARGET_IMAGES_DIR + file.label)
        os.mkdir(TARGET_IMAGES_DIR + file.label)
    except:
        pass
    cv2.imwrite(TARGET_IMAGES_DIR + file.label+"/pre1_" + file.name + "_rsz_"+file.label+".png", img_rsz)    
    cv2.imwrite(TARGET_IMAGES_DIR + file.label+"/pre1_" + file.name + "_ftr_"+file.label+".png", image)
    cv2.imwrite(TARGET_IMAGES_DIR + file.label+"/pre1_" + file.name + "_mask_"+file.label+".png", img_mask)
    cv2.imwrite(TARGET_IMAGES_DIR + file.label+"/pre1_" + file.name + "_cntr_"+file.label+".png", img_cntr)
    cv2.imwrite(TARGET_IMAGES_DIR + file.label+"/pre1_" + file.name + "_fdsc_"+file.label+".png", img_fd)
    #with open(TARGET_IMAGES_DIR + "/pre1_" + file.name + "_fdsk_"+file.label+".dat", 'w') as f:
    #    data=fourier_desc.tobytes()
    np.save(TARGET_IMAGES_DIR + "/fdsk/"+file.label+"/pre1_" + file.name + "_fdsk_"+file.label+".npy", fourier_desc)
    #print(file.label + ": " +str(round(fourier_desc[0].real))+":"+str(round(fourier_desc[1].real))+":"+str(round(fourier_desc[2].real))+":"+str(round(fourier_desc[3].real)) + " - " + file.name)

0009_HG_111112_038_E3_MG: no alpha channel
0009_HG_130124_129_E1_BO: no alpha channel
0009_HG_131002_959_E2_CL_AII: no alpha channel
0009_HG_131003_0556_E2_CL_AII_A: no alpha channel
0009_HG_131112_074_E1_BL: no alpha channel
0017_HG_131104_3608_E10_CL_AII_B: no alpha channel
0022_HG_130208_340_E1_KR_AII: no alpha channel
0022_HG_130208_341_E1_KR_AII: no alpha channel
0022_HG_130208_347_E1_KR_AII: no alpha channel
0023_HG_120419_048_E1_AA_N7: no alpha channel
0025_HG_120327_024_E1: no alpha channel
0031_HG_111117_151_E1_mg: no alpha channel
0031_HG_111117_156_E5_MG: no alpha channel
0031_HG_111117_243_E4: no alpha channel
0100_HG_121215_759_E2_KR_AII: no alpha channel
0100_HG_121215_801_E2_KR_AII_A: other error
0100_HG_121215_802_E2_KR_AII_A: no alpha channel
0100_HG_121215_890_E2_KR_AII: other error
0100_HG_121215_891_E2_KR_AII: other error
0100_HG_121215_892_E2_KR_AII: other error
0100_HG_130208_1193_E3_KR_AII: other error
0100_HG_130208_1195_E3_KR_AII: other error
0100_HG_130208_130

In [55]:
#training set prep
image_dirs = Path(TARGET_IMAGES_DIR) 
images_trn = pd.DataFrame(columns=['label', 'path', 'name', 'img'], dtype=object)
MAX_FILES = 500
#rsz = resized image
# for file in image_dirs.glob('*rsz*.png'):
#     basename = os.path.basename(file)
#     f_name, f_ext = os.path.splitext(basename)
#     f_name_part = f_name.split('_')
#     label = f_name_part[len(f_name_part)-1]
#     images_trn.loc[len(images_trn)] = [label, file, f_name, ""]
#     if len(images) >= MAX_FILES: break
trn_img="rsz"
labels =[]
for image_dir in image_dirs.glob('*'):
    label = image_dir.stem
    labels.append(label)
    for file in image_dir.glob('*'+trn_img+'*'):
        basename = os.path.basename(file)
        f_name, f_ext = os.path.splitext(basename)
        if f_ext.lower() != ".png" or f_name[0:4] !="pre1": continue
        images_trn.loc[len(images_trn)] = [label, file, f_name, ""]
    if len(images_trn) >= MAX_FILES: break

In [56]:
paths={}
faces=[]
for i, label in enumerate(labels): 
    paths[label] = TARGET_IMAGES_DIR.replace("\\", "/")+"/"+label
    faces.append(label) 

In [57]:
for key in paths.keys():
    li = []
    for i, file in enumerate(images_trn[images_trn.label==key].itertuples()):
        img1 = cv2.imread(str(file.path), cv2.IMREAD_UNCHANGED)
        img2 = img1[...,::-1]
        li.append(np.around(np.transpose(img2, (2,0,1))/255.0, decimals=12))

In [58]:
li

[array([[[0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ],
         [0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ],
         [0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ],
         ...,
         [0.96078431, 1.        , 1.        , ..., 1.        ,
          1.        , 1.        ],
         [1.        , 1.        , 1.        , ..., 1.        ,
          1.        , 1.        ],
         [1.        , 1.        , 1.        , ..., 1.        ,
          1.        , 1.        ]],
 
        [[1.        , 1.        , 1.        , ..., 1.        ,
          1.        , 1.        ],
         [1.        , 1.        , 1.        , ..., 1.        ,
          1.        , 1.        ],
         [1.        , 1.        , 1.        , ..., 1.        ,
          1.        , 1.        ],
         ...,
         [0.35294118, 0.2627451 , 0.23921569, ..., 0.35686275,
          0.40784314, 0.

In [None]:
# def listcalc(l,thr):
#     for j in range(len(l)):
#         l[j]=l[j]/255*thr

    
IMG_SIZE = 256
img = []
for i, file in enumerate(images.itertuples()):
    image = cv2.imread(str(file.path), cv2.IMREAD_UNCHANGED) # read alpha channel
    # make fixed size
    img_rsz = resizeAndPad(image, (IMG_SIZE,IMG_SIZE), 255)
    # add border
    # img_rsz = add_border(img_rsz,5,0)
    # split and extract alpha
    b, g, r, a = cv2.split(img_rsz)
    img_a = (255-a) # make a white mask
    
    _,a2 = cv2.threshold(img_a, 220, 255, cv2.THRESH_BINARY)
    # merge channels adding the white mask to get rid of backgounds hidden behind alpha mask
    image = cv2.merge([cv2.add(b,a2), cv2.add(g,a2), cv2.add(r,a2)],a)
    
    # create binary mask and clean away some spots
    _,a3 = cv2.threshold(img_a, 20, 255, cv2.THRESH_BINARY_INV)
#     kernel = np.ones((2, 2), np.uint8)
    a3 = cv2.erode(a3, None, iterations=2)
    a3 = cv2.dilate(a3, None, iterations=2)
    
    
    #image = claheHSV(image)
    # make gray
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    #contrast stretching
    xp = [40, 80, 200, 200, 200]
    fp = [0, 80, 200, 200, 200]
    x = np.arange(256)
    table = np.interp(x, xp, fp).astype('uint8')
    image = cv2.LUT(image, table)

#     cv2.bilateralFilter(image, 3, 50, 50)

    #blur to reduce noise
    cv2.blur(image, (18, 18))    
    #cv2.GaussianBlur(image, (11, 11), 0)
    
    # apply CLAHE
    image = claheGray(image, 1.5,8)
    
    # contrast
    image = contrast_yt(image,30,0,255)

    # contour - get the contour from the mask
    image, cntr, contours, edges = drawContour(image,a3,False,100,1,1,60,240,30,7)
    #contours = sorted(contours, key=cv2.contourArea, reverse=True)[:2]
#    print(len(contours[0]))
#     contours[0] = contours[0][:(len(contours[0])//2)]
    fourier_desc = findDescriptor(contours)
    img_fd, fourier_desc = reconstruct(fourier_desc, 80, IMG_SIZE)
#     fourier_desc = truncate_descriptor(fourier_desc,10)
#     print(file.name)
    print(file.label + ": " +str(round(fourier_desc[0].real))+":"+str(round(fourier_desc[1].real))+":"+str(round(fourier_desc[2].real))+":"+str(round(fourier_desc[3].real)) + " - " + file.name)
#     print(str(type(fourier_desc[0])))
    # find and enpasise edges
    kernel = np.array([[0.5, 1.0, 0.5], 
                   [1.0, -6.0, 1.0],
                   [0.5, 1.0, 0.5]])
    kernel = kernel/(np.sum(kernel) if np.sum(kernel)!=0 else 1)
    #filter the source image
    #image = cv2.filter2D(image,-1,kernel)

    
    image = (255-image) # inverse
    cv2.imwrite(TARGET_IMAGES_DIR + "/pre1_" + file.name + "_rsz_"+file.label+".png", img_rsz)    
    cv2.imwrite(TARGET_IMAGES_DIR + "/pre1_" + file.name + "_ftr_"+file.label+".png", image)
    cv2.imwrite(TARGET_IMAGES_DIR + "/pre1_" + file.name + "_mask_"+file.label+".png", a3)
    cv2.imwrite(TARGET_IMAGES_DIR + "/pre1_" + file.name + "_cntr_"+file.label+".png", cntr)
    cv2.imwrite(TARGET_IMAGES_DIR + "/pre1_" + file.name + "_fdsc_"+file.label+".png", img_fd)
#    img.append(image/256)
    
#     progress = i/MAX_FILES * 100
#     if progress % 5 == 0: print(f'{progress}% done')
#images.img = img
print("Complete!")

In [None]:
fourier_desc

Training TL network

In [1]:
MODEL_DIR_TL = '../models/finnet'
data = pd.read_csv(MODEL_DIR_TL +"/train_rsz_0002_50_100.csv", skiprows=0, header=0,index_col=[0])
fig, axes = plt.subplots(nrows=2, ncols=2)
fig.tight_layout()

p = data.plot(x_compat=True, linestyle='-', marker='o', label='Loss',title="Fig5: Training Alpha 0.002 / 50Epochs / 100 Steps", ax=axes[0,0],figsize=(16,9))
plt.show()

NameError: name 'pd' is not defined

In [12]:
data


Unnamed: 0,epoch,FinRecoModel_1_loss,FinRecoModel_2_loss,FinRecoModel_loss,loss
0,0,0.002128,0.001895,0.002952,0.006975
1,1,0.001995,0.00204,0.001842,0.005877
2,2,0.002104,0.001949,0.001941,0.005995
3,3,0.00202,0.002055,0.002077,0.006152
4,4,0.002018,0.001992,0.002005,0.006016
5,5,0.002018,0.002011,0.002003,0.006032
6,6,0.002004,0.002,0.001998,0.006002
7,7,0.001982,0.001988,0.002008,0.005978
8,8,0.002017,0.001944,0.002065,0.006025
9,9,0.001989,0.002035,0.001984,0.006008


In [3]:
!jupyter nbconvert --to html 3_opencv_feature_extraction

[NbConvertApp] Converting notebook 3_opencv_feature_extraction.ipynb to html
[NbConvertApp] Writing 664179 bytes to 3_opencv_feature_extraction.html
