# feature generation
This repository contains the code for generating the different kinds of features for traditional machine learning for the Capstone project.

### Instructions
1. compile the functions (cv2 lib has to be installed)
2. arguements: image - the target image for calculating the features, length nice to be equal to width
3. set arguements of each function (There are tons of parameters can be adjusted, please refer to the reference link)
4. run functions and collect the return feature
4. returns are in numpy array format

### Reference
1. numpy array: https://numpy.org/doc/stable/reference/arrays.html
2. sample and definition of HoG: https://scikit-image.org/docs/dev/auto_examples/features_detection/plot_hog.html
3. edge detection: https://docs.opencv.org/3.4/da/d22/tutorial_py_canny.html
4. CV2 of Python (openCV): https://pypi.org/project/opencv-python/
5. local binary pattern histogram: https://pyimagesearch.com/2015/12/07/local-binary-patterns-with-python-opencv/

In [24]:
#data feature
import cv2
import numpy as np
from skimage.feature import local_binary_pattern
from skimage.feature import graycomatrix, graycoprops
from skimage.measure import regionprops
from skimage.morphology import binary_erosion
from scipy.special import comb
import numpy.fft as fft


# get horitonzal edges from sobelY
def number_of_edgePixels(image):
    sobelY = cv2.Sobel(image, cv2.CV_64F,0,1,ksize=5)
    return np.count_nonzero(sobelY==255)

# std of mean of rows, get high value for black/white horisontal stripes.
def std_meanOfRows(image):
    mean_row = np.mean(image, axis=1)
    return np.std(mean_row, dtype=np.float32)

# Computes the Laplacian of the image and returns the mean value of all the pixels. 
# A higher value indicates more edges and details in the image
def mean_laplacian(image):
    return cv2.mean(cv2.Laplacian(image, cv2.CV_64F))[0]

# mean number of peak and valleys in each column, get high value for noisy pictures.
def mean_PeaksValleys(image):
    peaks_valleys = []
    for j in range(image.shape[1]):
        col = image[:, j]
        num_peaks, num_valleys = count_peaks_valleys(col)
        mean_peaks_valleys = (num_peaks + num_valleys) / 2
        peaks_valleys.append(mean_peaks_valleys)
    return np.mean(peaks_valleys, dtype=np.float32)
    
def count_peaks_valleys(arr_1d):
    num_peaks = 0
    num_valleys = 0
    for i in range(1, len(arr_1d) - 1):
        if arr_1d[i] > arr_1d[i-1] and arr_1d[i] > arr_1d[i+1]:
            num_peaks += 1
        elif arr_1d[i] < arr_1d[i-1] and arr_1d[i] < arr_1d[i+1]:
            num_valleys += 1
    return (num_peaks, num_valleys)

# numbers of edges from canny edge detection, use dfs to explore number of group of edge pixels
def num_edges_canny(image, L2Gradient, sobel_kernal_size):
    T_lower = 100
    T_upper = 200 
    edge = cv2.Canny(image, T_lower, T_upper, apertureSize=sobel_kernal_size, L2Gradient = L2Gradient)
    def dfs(r, c):
        if r < 0 or r >= np.size(image, 0) or c < 0 or c >= np.size(image, 1) or image[r][c] == 0:
            return 0
            
        image[r][c] = 0
            
        for i, j in zip((r - 1, r + 1, r, r), (c, c, c - 1, c + 1)):
            dfs(i, j)
            
        return 1
    
    return sum(dfs(i, j) for i in range(np.size(image, 0)) for j in range(np.size(image, 1)))

def lbp_histogram(image, radius, bins):
    # compute the LBP histogram of the image
    n_points = 8 * radius
    lbp = local_binary_pattern(image, n_points, radius, method='uniform')
    hist, _ = np.histogram(lbp, bins= bins, density=True)
    return np.ravel(hist)

# Haralick features 
# describe the texture of an image by computing statistics from the gray-level co-occurrence matrix (GLCM) of the image.
def haralick_contrast(image, distance=1, angle=0):
    glcm = graycomatrix(image, [distance], [angle], levels=256, symmetric=True, normed=True)
    return graycoprops(glcm, 'contrast')[0, 0]

#Hu moments are a set of shape features that are invariant to rotation, scale, and translation. 
# They can be computed from the moments of an image's contour
def hu_moments(image):
    moments = cv2.moments(image)
    hu_moments = cv2.HuMoments(moments)
    return np.ravel(hu_moments)

#set of shape features that are invariant to rotation, scale, and translation. 
# They can be computed from the radial and angular moments of an image's contour
def zernike_moments(image, radius=1, degree=8):
    eroded_image = binary_erosion(image, np.ones((2*radius+1, 2*radius+1)))
    props = regionprops(eroded_image.astype(np.uint8))
    moments = np.zeros(degree+1)
    for prop in props:
        r = np.sqrt(prop.area/np.pi)
        if r < radius:
            z = np.complex(prop.centroid[1], prop.centroid[0])
            for n in range(degree+1):
                for m in range(-n, n+1, 2):
                    if m < 0:
                        cmn = comb(n, int((n-m)/2))
                        fm = np.exp(np.complex(0, m*prop.orientation))
                        moments[n] += (r**n) * cmn

#GLCM-based texture features
def glcm_features(image):
    # Compute the gray-level co-occurrence matrix
    glcm = graycomatrix(image, [5], [0, np.pi/4, np.pi/2, 3*np.pi/4])
    
    # Compute some commonly used GLCM features
    contrast = graycoprops(glcm, 'contrast')
    dissimilarity = graycoprops(glcm, 'dissimilarity')
    homogeneity = graycoprops(glcm, 'homogeneity')
    energy = graycoprops(glcm, 'energy')
    correlation = graycoprops(glcm, 'correlation')
    
    # Concatenate the features into a single vector
    features = np.concatenate([contrast, dissimilarity, homogeneity, energy, correlation])
    
    return features


# Fourier descriptors represent the shape of an object in terms of its frequency components
def fourier_shape_features(image):
    # Compute the Fourier Transform of the image
    f = fft.fft2(image)
    
    # Shift the zero frequency component to the center
    fshift = fft.fftshift(f)
    
    # Take the magnitude of the Fourier Transform
    magnitude_spectrum = np.log(np.abs(fshift))
    
    # Extract the Fourier coefficients for the first 5 frequencies in each dimension
    n = image.shape[0]
    m = image.shape[1]
    p = 5
    descriptors = np.zeros((p, p))
    for i in range(p):
        for j in range(p):
            descriptors[i, j] = np.abs(fshift[n//2 + i, m//2 + j])
    
    # Flatten the descriptors into a single vector
    features = descriptors.flatten()
    return np.ravel(features)

# HoG
def hog_(image, image_size):
    gray_img = np.dot(image[..., :3], [0.2989, 0.5870, 0.1140])
    fd, hog_image = hog(image, orientations=9, pixels_per_cell=(100, 100),
                	cells_per_block=(2, 2), visualize=True, feature_vector = True)
#     print(fd.shape)
#     print(hog_image.shape)
    return fd


In [2]:
from sklearn import datasets
from sklearn.utils import Bunch
npdata = np.load('/content/2910_datasetC_200x200.npz')
print(npdata['data'].shape)

(2910, 200, 200)


In [None]:
#example of using features to train model
number_image = 2910
feature = 9
img=np.zeros(shape=(number_image,feature))
for i in range(0,number_image): 
    img[i][0] = number_of_edgePixels(npdata['data'][i])
    img[i][1] = std_meanOfRows(npdata['data'][i])
    img[i][2] = mean_PeaksValleys(npdata['data'][i])
    img[i][3] = np.std(npdata['data'][i])
    img[i][4] = mean_laplacian(npdata['data'][i])
    img[i][5] = hu_moments(npdata['data'][i])[0]
    img[i][6] = zernike_moments(npdata['data'][i])
    img[i][7] = glcm_features(npdata['data'][i])[0][0]
    img[i][8] = fourier_shape_features(npdata['data'][i])[0]
    
print(img.shape)
dataset = Bunch(data = img, target=npdata['label'])