### Packages importing
#### this action should be done 1 time only cus it took TOO LONG

In [None]:
import os
import gc
import json
import sqlite3
import numpy as np
import tensorflow as tf
import cv2
from tensorflow.keras import backend as K # type: ignore
from tensorflow.keras import mixed_precision # type: ignore
from tensorflow.keras.applications import VGG16, vgg16 # type: ignore
from tensorflow.keras.applications.vgg16 import preprocess_input # type: ignore
from tensorflow.keras.preprocessing.image import load_img, img_to_array # type: ignore
from skimage.color import rgb2gray
from skimage.feature import local_binary_pattern, hog

#-------------------Search served packages#-------------------

from sklearn.metrics.pairwise import cosine_similarity

#### Initating Configs
- these configuration should run 1 time only
- `BATCH_SIZE`: this set to 32 seems to be an ideal choice
- `IMG_SIZE`: the images size should be resize, crops, etc to the declared size since all of the process require ``(224x224)`` image size

In [None]:
ROOT_DIR = "./Raw"
DB_PATH = "./Database/structured_features_ver3.db"
BATCH_SIZE = 32
IMG_SIZE = (224, 224)
JSON_OUTPUT = "./inspection/image_features_sample_ver3.json"
CACHE_DIR = "cache"
TREES_FILE = os.path.join(CACHE_DIR, "balltrees_ver2.pkl")
ARRAYS_FILE = os.path.join(CACHE_DIR, "arrays_ver2.pkl")
PATHS_FILE = os.path.join(CACHE_DIR, "image_paths_ver2.pkl")
META_FILE = os.path.join(CACHE_DIR, "meta_ver2.pkl")

**Enviroment** config loading and feature **model** loading funcion

In [None]:
def configure_environment():
    mixed_precision.set_global_policy("mixed_float16")
    gpus = tf.config.experimental.list_physical_devices("GPU")
    if gpus:
        tf.config.experimental.set_memory_growth(gpus[0], True)

def load_feature_model():
    return VGG16(weights="imagenet", include_top=False, pooling="avg")

Database init and features storing queries

In [27]:
def init_db():
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.execute("""
        CREATE TABLE IF NOT EXISTS image_features (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            image_path TEXT,
            label TEXT,
            color_histogram TEXT,
            shape_descriptor TEXT,
            texture_descriptor TEXT,
            deep_embedding TEXT
        )
    """)
    return conn, c

def insert_features(cursor, data):
    cursor.executemany("""
        INSERT INTO image_features (
            image_path, label, color_histogram, shape_descriptor, texture_descriptor, deep_embedding
        ) VALUES (?, ?, ?, ?, ?, ?)
    """, data)

def save_sample(cursor):
    cursor.execute("SELECT * FROM image_features LIMIT 10;")
    rows = cursor.fetchall()
    columns = [desc[0] for desc in cursor.description]
    with open(JSON_OUTPUT, "w") as f:
        json.dump([dict(zip(columns, row)) for row in rows], f, indent=4)
        
def load_database_features():
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()
    cursor.execute("SELECT image_path, color_histogram, shape_descriptor, texture_descriptor, deep_embedding FROM image_features")
    rows = cursor.fetchall()
    conn.close()

    image_paths = []
    color_features = []
    shape_features = []
    texture_features = []
    deep_features = []

    for row in rows:
        image_paths.append(row[0])
        color_features.append(json.loads(row[1]))
        shape_features.append(json.loads(row[2]))
        texture_features.append(json.loads(row[3]))
        deep_features.append(json.loads(row[4]))

    return image_paths, np.array(color_features), np.array(shape_features), np.array(texture_features), np.array(deep_features)

- Images loading function
- Image batch preparing function

In [30]:
def load_image(path):
    img = load_img(path, target_size=IMG_SIZE)
    return img_to_array(img)

def preprocess_batch(images):
    return preprocess_input(np.array(images)).astype("float16")


Below are functions that extract features like `color`, `shape`, `texture`.
Values extracted by functions below will differ than what `VGG16` extract from the images

In [None]:
def normalize_object(img, output_size=(128, 128)):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        return cv2.resize(img, output_size)

    contour = max(contours, key=cv2.contourArea)
    rect = cv2.minAreaRect(contour)
    box = cv2.boxPoints(rect)
    box = np.float32(sorted(box, key=lambda p: (p[1], p[0])))  # y,x order

    dst_pts = np.float32([[0, 0], [output_size[0]-1, 0], [output_size[0]-1, output_size[1]-1], [0, output_size[1]-1]])
    M = cv2.getPerspectiveTransform(box, dst_pts)
    warped = cv2.warpPerspective(img, M, output_size)
    return warped


def extract_color_histogram(img, bins=32):
    hsv = cv2.cvtColor(img.astype("uint8"), cv2.COLOR_RGB2HSV)
    h_hist = np.histogram(hsv[:, :, 0], bins=bins, range=(0, 180))[0]
    s_hist = np.histogram(hsv[:, :, 1], bins=bins, range=(0, 256))[0]
    v_hist = np.histogram(hsv[:, :, 2], bins=bins, range=(0, 256))[0]
    hist = np.concatenate([h_hist, s_hist, v_hist])
    return (hist / hist.sum()).tolist()


def extract_hog(img_path):
    image = cv2.imread(img_path)
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    (fd, hog_image) = hog(
        gray_image,
        orientations=9,
        pixels_per_cell=(8, 8),
        cells_per_block=(1, 1),
        visualize=True,
        block_norm="L2",
    )
    return fd


def extract_rgb(img_path):
    image = cv2.imread(img_path)
    hist_rgb = cv2.calcHist(
        [image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]
    )
    cv2.normalize(hist_rgb, hist_rgb)
    return hist_rgb.flatten()


def extract_hog_rgb(img_path):
    image = cv2.imread(img_path)
    hist_rgb = cv2.calcHist(
        [image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]
    )
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    fd, hog_image = hog(
        gray_image,
        orientations=9,
        pixels_per_cell=(8, 8),
        cells_per_block=(1, 1),
        visualize=True,
        block_norm="L2",
    )
    cv2.normalize(hist_rgb, hist_rgb)
    combined_features = np.concatenate((fd, hist_rgb.flatten()))
    return combined_features


def extract_shape_descriptor(img_path):
    image = cv2.imread(img_path)
    norm_img = normalize_object(image, output_size=(128, 128))
    gray = cv2.cvtColor(norm_img.astype("uint8"), cv2.COLOR_BGR2GRAY)
    moments = cv2.moments(gray)
    hu = cv2.HuMoments(moments).flatten()
    return np.log1p(np.abs(hu)).tolist()

def extract_hog_hu(img_path):
    image = cv2.imread(img_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Extract HOG features
    hog_features, _ = hog(
        gray,
        orientations=9,
        pixels_per_cell=(8, 8),
        cells_per_block=(1, 1),
        visualize=True,
        block_norm="L2",
    )

    # Extract Hu Moments
    moments = cv2.moments(gray)
    hu_moments = cv2.HuMoments(moments).flatten()
    hu_moments = np.log1p(np.abs(hu_moments))  # Stability for large values

    # Combine features
    combined = np.concatenate([hog_features, hu_moments])
    return combined


def extract_texture_descriptor(img):
    gray = rgb2gray(img.astype("uint8"))
    lbp = local_binary_pattern(gray, P=8, R=1.0)
    hist, _ = np.histogram(lbp, bins=32, range=(0, 256))
    return (hist / hist.sum()).tolist()

In [None]:
def extract_features():
    configure_environment()
    model = load_feature_model()
    conn, c = init_db()

    image_batch, path_batch, label_batch, raw_images = [], [], [], []

    for class_dir in os.listdir(ROOT_DIR):
        class_path = os.path.join(ROOT_DIR, class_dir)
        if not os.path.isdir(class_path):
            continue

        for image_file in os.listdir(class_path):
            image_path = os.path.join(class_path, image_file)
            img = load_image(image_path)
            raw_images.append(img)
            image_batch.append(img)
            path_batch.append(image_path)
            label_batch.append(class_dir)

            if len(image_batch) == BATCH_SIZE:
                process_and_store_batch(
                    model, image_batch, raw_images, path_batch, label_batch, c
                )
                image_batch, path_batch, label_batch, raw_images = [], [], [], []

    if image_batch:
        process_and_store_batch(
            model, image_batch, raw_images, path_batch, label_batch, c
        )

    save_sample(c)
    conn.commit()
    conn.close()


def process_and_store_batch(
    model, image_batch, raw_images, path_batch, label_batch, cursor
):
    batch_np = preprocess_batch(image_batch)
    features = model.predict(batch_np, verbose=0)

    insert_data = []

    for i in range(len(features)):
        img_raw = raw_images[i]
        color_hist = extract_color_histogram(img_raw)
        shape_desc = extract_shape_descriptor(img_raw)
        texture_desc = extract_texture_descriptor(img_raw)
        deep_embed = features[i].flatten().tolist()

        insert_data.append(
            (
                path_batch[i],
                label_batch[i],
                json.dumps(color_hist),
                json.dumps(shape_desc),
                json.dumps(texture_desc),
                json.dumps(deep_embed),
            )
        )

    insert_features(cursor, insert_data)
    K.clear_session()
    gc.collect()

In [33]:
# if __name__ == "__main__":
#     extract_features()

extract_features()



# Searching

The sections below will be the searching for similarity.
The **input** will be:
- Image from **outside** the dataset
- Image from **inside** the dataset
The **outputs** will be:
    - **3** Images with **highest** similarities
    - Similarities include **4** features vector:
        - Color
        - Shape
        - Texture
        - Embeded (Extracted by using VGG16)