<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Setup" data-toc-modified-id="Setup-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Setup</a></span></li><li><span><a href="#Preprocessing" data-toc-modified-id="Preprocessing-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Preprocessing</a></span><ul class="toc-item"><li><span><a href="#Image-Augmentation" data-toc-modified-id="Image-Augmentation-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Image Augmentation</a></span></li></ul></li><li><span><a href="#Utility" data-toc-modified-id="Utility-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Utility</a></span></li><li><span><a href="#Feature-Engineering" data-toc-modified-id="Feature-Engineering-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Feature Engineering</a></span><ul class="toc-item"><li><span><a href="#Raw-Pixel-Image-Data" data-toc-modified-id="Raw-Pixel-Image-Data-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Raw Pixel Image Data</a></span><ul class="toc-item"><li><span><a href="#Extract" data-toc-modified-id="Extract-4.1.1"><span class="toc-item-num">4.1.1&nbsp;&nbsp;</span>Extract</a></span></li><li><span><a href="#Read" data-toc-modified-id="Read-4.1.2"><span class="toc-item-num">4.1.2&nbsp;&nbsp;</span>Read</a></span></li></ul></li><li><span><a href="#HoG" data-toc-modified-id="HoG-4.2"><span class="toc-item-num">4.2&nbsp;&nbsp;</span>HoG</a></span><ul class="toc-item"><li><span><a href="#Extract" data-toc-modified-id="Extract-4.2.1"><span class="toc-item-num">4.2.1&nbsp;&nbsp;</span>Extract</a></span></li><li><span><a href="#Read" data-toc-modified-id="Read-4.2.2"><span class="toc-item-num">4.2.2&nbsp;&nbsp;</span>Read</a></span></li></ul></li><li><span><a href="#LBP" data-toc-modified-id="LBP-4.3"><span class="toc-item-num">4.3&nbsp;&nbsp;</span>LBP</a></span><ul class="toc-item"><li><span><a href="#Extract" data-toc-modified-id="Extract-4.3.1"><span class="toc-item-num">4.3.1&nbsp;&nbsp;</span>Extract</a></span></li><li><span><a href="#Read" data-toc-modified-id="Read-4.3.2"><span class="toc-item-num">4.3.2&nbsp;&nbsp;</span>Read</a></span></li></ul></li></ul></li></ul></div>

# Setup

In [None]:
!/home/hui/anaconda3/envs/ml/lib/python3.10/site-packages/pip -V

In [None]:
# setup to see the execution time in each cell

!/home/hui/anaconda3/envs/ml/lib/python3.10/site-packages/pip install ipython-autotime
!/home/hui/anaconda3/envs/ml/lib/python3.10/site-packages/pip install directory_structure
!/home/hui/anaconda3/envs/ml/lib/python3.10/site-packages/pip install handcalcs
!/home/hui/anaconda3/envs/ml/lib/python3.10/site-packages/pip install pytz
!/home/hui/anaconda3/envs/ml/lib/python3.10/site-packages/pip install skimage
# !/home/hui/anaconda3/envs/ml/lib/python3.10/site-packages/pip install opencv-contrib-python==4.4.0.44
# %load_ext autotime

In [19]:
import os
import random
import glob
import PIL
from PIL import Image
import numpy as np
import cv2
import shutil
import pickle

from skimage.feature import hog
from skimage.feature import local_binary_pattern

# from google.colab.patches import cv2_imshow
# from directory_structure import Tree

In [2]:
print (cv2.__version__)

4.5.5


# Preprocessing

Use this if you are using Google Colab.

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

In [4]:
# replace the dir path if needed

ROOT_DIR = "/home/hui/Projects/g5/datasets"  # Henry's dir
# ROOT_DIR = "/content/drive/MyDrive/Project - G5/Dataset/"    #Lejia's dir
# ROOT_DIR = "/content/drive/MyDrive/Project - G5/Dataset/" #Nan's dir
# ROOT_DIR = "/content/drive/MyDrive/Project - G5/Dataset"


# path for image augmentation
RESIZE_DIR = ROOT_DIR + "/augmentation/resize"


# path for the different handcraft features
RAW_PIXEL_DIR = ROOT_DIR + "/handcraft_features/raw_pixel"
SIFT_DIR = ROOT_DIR + "/handcraft_features/sift"
HOG_DIR = ROOT_DIR + "/handcraft_features/hog"
LBP_DIR = ROOT_DIR + "/handcraft_features/lbp"

## Image Augmentation

Some of the images are not in the same size (pixel), therefore, we need to perform image augmentation (resize and do not preserve aspect ratio) in order to have all images in the same pixel size (width and height).

<b>Note, we will work on the resized images for the feature extraction algorithms!</b>

In [15]:
def resize_image(img, w, h):
    # resize while do not preserve aspect ratio
    return cv2.resize(img, (w, h), interpolation=cv2.INTER_AREA)


img_dir = ROOT_DIR + "/cv2"
img_files = os.listdir(img_dir)

if not os.path.exists(RESIZE_DIR):
    os.makedirs(RESIZE_DIR)

for img in img_files:
    # print(img)
    image = cv2.imread(img_dir + "/" + img)
    resized_img = resize_image(image, 256, 256)
    cv2.imwrite(RESIZE_DIR+"/"+str(img), resized_img)

print(">>> All jobs done!")

Corrupt JPEG data: 1 extraneous bytes before marker 0xd9


>>> All jobs done!


# Utility

In [10]:
def retrieve_stored_features(path: str, product_name: str):
    # retrieve a stored local descriptor based on given name
    try:
        file_path = path + "/" + product_name + ".txt"
        file = open(file_path, 'rb')
        descriptor = pickle.load(file)
        file.close()

        return descriptor

    except FileNotFoundError:
        print("Wrong file name or path")
        return None
    except e:
        print("Error due to: {}".format(e))
        return None

# Feature Engineering

## Raw Pixel Image Data

### Extract

Read the resized images through CPU and GPU. Depending on your computer's setup, choose the one that fits you.

In [14]:
def read_img_cpu(files):
    x = list()

    for file in files:
        img = Image.open(RESIZE_DIR+"/"+file)
        x.append(np.array(img).flatten())

    return x

In [15]:
def read_img_gpu(files):
    import cupy as cp

    x = list()

    for file in files:
        img = Image.open(RESIZE_DIR+"/"+file)
        x.append(cp.asnumpy(cp.array(img)).flatten())

    return x

In [11]:
if not os.path.exists(RAW_PIXEL_DIR):
    os.makedirs(RAW_PIXEL_DIR)

files = os.listdir(RESIZE_DIR)
x__raw_pixel = read_img_cpu(files)

# write all local descriptor into a file
for i in range(len(files)):
    # save the descriptor as a txt file and remove the 'jpg' and 'png' file extension
    with open(RAW_PIXEL_DIR + '/' + str(files[i].split('.')[0]) + '.txt', 'wb') as f:
        pickle.dump(x__raw_pixel[i], f)

print(">>> All jobs done!")

>>> All jobs done!


### Read

In [12]:
# files = os.listdir(RESIZE_DIR)

retrieve_stored_features(RAW_PIXEL_DIR, "Iced Black Tea")

[254 255 250 ... 255 255 255]


## HoG

### Extract

In [25]:
def generate_hog_feature(files):
    # Generate HoG feature set through OpenCV with grayscale in all pixels

    pixels = list()

    # a number of academic researches recommend to use
    # the following configurations for the LBP features.
    eps = 1e-7    # 0.0000001
    ppc = 16

    for file in files:
        img = cv2.imread(RESIZE_DIR+"/"+file)

        # convert the read img into grayscale
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # generate handcrafted features
        fd = hog(img_gray, orientations=9, pixels_per_cell=(ppc, ppc),
                 cells_per_block=(2, 2), block_norm='L2', feature_vector=True)

        pixels.append(fd)
        # end loop

    return pixels

In [26]:
if not os.path.exists(HOG_DIR):
    os.makedirs(HOG_DIR)

files = os.listdir(RESIZE_DIR)
x__hog = generate_hog_feature(files)

# write all local descriptor into a file
for i in range(len(files)):
    # save the descriptor as a txt file and remove the 'jpg' and 'png' file extension
    with open(HOG_DIR + '/' + str(files[i].split('.')[0]) + '.txt', 'wb') as f:
        pickle.dump(x__hog[i], f)

print(">>> All jobs done!")

>>> All jobs done!


### Read

In [27]:
# files = os.listdir(RESIZE_DIR)

retrieve_stored_features(HOG_DIR, "Iced Black Tea")

array([0.01152718, 0.01671893, 0.03329791, ..., 0.        , 0.        ,
       0.        ])

## LBP

### Extract

In [20]:
# combination of p and r values for LBP
LBP_CONFIG = [
  {"p": 8, "r": 1},
  {"p": 16, "r": 2},
  {"p": 24, "r": 3},
]

In [33]:
def generate_lbp_feature(files: list, p: int, r: int, method_="uniform"):
    # Generate LBP feature set through OpenCV with grayscale in all pixels

    eps = 1e-7    # 0.0000001
    pixels = []

    for file in files:
        img = cv2.imread(RESIZE_DIR+"/"+file)

        # convert the read img into grayscale
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        lbp_img = local_binary_pattern(img_gray, p, r, method=method_)

        # compute the histogram
        (hist, _) = np.histogram(
            lbp_img.ravel(),
            bins=np.arange(0, p + 3),
            range=(0, p + 2)
        )

        # histogram the features
        hist = hist.astype("float")
        hist /= (hist.sum() + eps)
        pixels.append(hist)

    # end loop

    return pixels

In [24]:
for path in LBP_CONFIG:
    if not os.path.exists(LBP_DIR+"/"+str(path["p"])+"_"+str(path["r"])):
        os.makedirs(LBP_DIR+"/"+str(path["p"])+"_"+str(path["r"]))

In [34]:
files = os.listdir(RESIZE_DIR)

for pr_config in LBP_CONFIG:

    x__lbp = generate_lbp_feature(files, pr_config["p"], pr_config["r"])

    # write all local descriptor into a file
    for i in range(len(files)):
        # save the descriptor as a txt file and remove the 'jpg' and 'png' file extension
        with open(LBP_DIR+"/"+str(pr_config["p"])+"_"+str(pr_config["r"]) + '/' + str(files[i].split('.')[0]) + '.txt', 'wb') as f:
            pickle.dump(x__lbp[i], f)

print(">>> All jobs done!")

>>> All jobs done!


### Read

In [36]:
# files = os.listdir(RESIZE_DIR)

retrieve_stored_features(LBP_DIR+"/8_1", "Iced Black Tea")
# retrieve_stored_features(LBP_DIR+"/16_2", "Iced Black Tea")
# retrieve_stored_features(LBP_DIR+"/24_3", "Iced Black Tea")

array([0.01506042, 0.0426178 , 0.02452087, 0.09857178, 0.13690186,
       0.1847229 , 0.07878113, 0.06362915, 0.28056335, 0.07463074])