In [1]:
import os
import cv2
import time
from skimage import io
import numpy as np
import pandas as pd
from skimage import feature
import re
from skimage.measure import label, regionprops
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern
from scipy.stats import skew, kurtosis, entropy
from tqdm import tqdm

In [6]:
import cv2
from skimage import io
import numpy as np
import pandas as pd
from scipy import __version__ as scipy_version

# Collect versions
versions = {
    "cv2 (OpenCV)": cv2.__version__,
    "numpy": np.__version__,
    "pandas": pd.__version__,
    "scipy": scipy_version,
}

# Print versions
for lib, version in versions.items():
    print(f"{lib}: {version}")


cv2 (OpenCV): 4.10.0
numpy: 1.26.3
pandas: 2.2.2
scipy: 1.13.1


Sort and read

In [2]:
def natural_sort_key(s):
    return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]

def read_image_data(folder_path):
    image_names = sorted(os.listdir(folder_path), key=natural_sort_key)
    if not image_names:
        print("No images found in the directory.")
        return []
    elif len(image_names) > 2:
        print(f"The names of the first three images in the directory are: {image_names[0]}, {image_names[1]}, {image_names[2]}")
    else:
        print("Not enough images to display three names.")
    image_paths = [os.path.join(folder_path, name) for name in image_names]
    return image_paths

Basic features

In [3]:
def extract_basic_features(image_paths):
    features = []
    for image_path in tqdm(image_paths):
        img = cv2.imread(image_path, cv2.IMREAD_COLOR)
        if img is None:
            print(f"Failed to read image: {image_path}")
            continue
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, img_bin = cv2.threshold(img_gray, 128, 255, cv2.THRESH_BINARY)
        label_img = label(img_bin)
        props = regionprops(label_img)
        if props:
            largest_prop = max(props, key=lambda x: x.area)
            features.append([os.path.basename(image_path)] + [
                largest_prop.area,
                largest_prop.bbox[3] - largest_prop.bbox[1],
                largest_prop.bbox[2] - largest_prop.bbox[0],
                (largest_prop.bbox[3] - largest_prop.bbox[1]) / (largest_prop.bbox[2] - largest_prop.bbox[0]) if (largest_prop.bbox[2] - largest_prop.bbox[0]) != 0 else 0,
                largest_prop.major_axis_length,
                largest_prop.minor_axis_length,
                largest_prop.convex_area,
                cv2.arcLength(np.array(largest_prop.coords), closed=True),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 0]),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 1]),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 2]),
                np.sqrt(np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 0])),
                np.sqrt(np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 1])),
                np.sqrt(np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 2])),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]]),
                np.std(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]]),
                np.sum((img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]] - np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]])) ** 2),
                np.sum((img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]] - np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]])) ** 3),
            ])
        else:
            features.append([os.path.basename(image_path)] + [np.nan] * 18 )

    df = pd.DataFrame(features, columns=['Name', 'area', 'length', 'width', 'length_width_ratio', 'major_axis_length',
                                         'minor_axis_length', 'convex_area', 'perimeter', 'r_mean', 'g_mean', 'b_mean',
                                         'rs', 'gs', 'bs', 'mean', 'std_dev', 'uniformity', 'third_moment',])
    return df

LBP feature

In [4]:
def extract_LBP_features(image_paths):
    data = []
    points = 8
    radius = 1
    for image_path in tqdm(image_paths):
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if image is not None:
            lbp = feature.local_binary_pattern(image, points, radius, method="uniform")
            (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, points + 3), range=(0, points + 2))
            hist = hist.astype("float")
            hist /= (hist.sum() + 1e-7)
            hist_series = pd.Series(hist, name=os.path.basename(image_path))
            data.append(hist_series)
        else:
            print(f"Failed to read {image_path}")
            data.append(pd.Series([np.nan]*10, name=os.path.basename(image_path)))
    lbp_df = pd.DataFrame(data).reset_index().rename(columns={"index": "Name"})
    lbp_df.columns = ["Name"] + [f"LBP_{i+1}" for i in range(len(lbp_df.columns) - 1)]
    return lbp_df

GIST feature

In [5]:
def extract_gist_features(image_paths, orientations=8, blocks=4):
    descriptors = []
    for path in tqdm(image_paths):
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(f"Failed to read image: {path}")
            descriptors.append([os.path.basename(path)] + [np.nan]*(orientations*blocks*blocks))
            continue
        height, width = img.shape
        cell_size = min(height, width) // blocks
        gx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=3)
        gy = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=3)
        gradient_magnitude = np.sqrt(gx**2 + gy**2)
        gradient_orientation = np.arctan2(gy, gx) * (180 / np.pi) + 180
        gradient_orientation_bins = np.floor(gradient_orientation / (360 / orientations)).astype(int)
        descriptor = np.zeros(orientations * blocks * blocks)
        for i in range(blocks):
            for j in range(blocks):
                cell_hist = np.zeros(orientations)
                for ii in range(cell_size):
                    for jj in range(cell_size):
                        x = i * cell_size + ii
                        y = j * cell_size + jj
                        if x >= height or y >= width:
                            continue
                        bin_idx = gradient_orientation_bins[x, y] % orientations
                        cell_hist[bin_idx] += gradient_magnitude[x, y]
                descriptor[(i * blocks + j) * orientations:(i * blocks + j + 1) * orientations] = cell_hist
        descriptor /= (np.sum(descriptor) + 1e-7)
        descriptors.append([os.path.basename(path)] + descriptor.tolist())
    gist_df = pd.DataFrame(descriptors, columns=['Name'] + [f'GIST_{i}' for i in range(orientations * blocks * blocks)])
    return gist_df

GLCM feature

In [6]:
def GLCM_all(image_paths, distance=3):

    list_GLCM = []
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
    
    for image_path in tqdm(image_paths):
        img = cv2.imread(image_path)
        if img is None:
            print(f"Failed to read image: {image_path}")
            continue
        if len(img.shape) == 3:  
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        glcm = graycomatrix(img, [distance], angles, levels=256, symmetric=True, normed=True)

        features = []
        for angle_idx in range(len(angles)):
            features.extend([
                graycoprops(glcm, 'contrast')[0, angle_idx],
                graycoprops(glcm, 'correlation')[0, angle_idx],
                graycoprops(glcm, 'energy')[0, angle_idx],
                graycoprops(glcm, 'homogeneity')[0, angle_idx]
            ])

        columns = [f"{prop}_{int(np.degrees(angle))}" for prop in ('contrast', 'correlation', 'energy', 'homogeneity') for angle in angles]
        features_df = pd.DataFrame([features], columns=columns)
        # features_df['Name'] = os.path.basename(image_path)
        features_df.insert(0, 'Name', os.path.basename(image_path))
        list_GLCM.append(features_df)
    
    glcm_df = pd.concat(list_GLCM, ignore_index=True)
    return glcm_df


In [None]:
folder_path = r''
image_paths = read_image_data(folder_path)
glcm_features_df = GLCM_all(image_paths)
basic_features_df = extract_basic_features(image_paths)
lbp_features_df = extract_LBP_features(image_paths)
gist_features_df = extract_gist_features(image_paths)