In [1]:
from itertools import combinations
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
import cv2 
import skimage
import numpy as np
from torchvision import datasets, transforms
from PIL import Image

In [2]:
def powerset_without_emptyset(items):
    '''
    Returns the powerset of a list of items as a list of tuples, excluding the empty set
    '''
    combos = []
    for i in range(len(items)):
        combos.extend(list(combinations(items, len(items) - i)))
    return combos


In [3]:


dataset = datasets.OxfordIIITPet('/data', download=True, transform=None) # a dataset of PIL images 

# getting a pil image
img, label = dataset.__getitem__(21)
# Convert the PIL image to a NumPy array and then to grayscale for use with OpenCV functions
im = np.array(img)

if im.ndim == 3:
    im_grey = cv2.cvtColor(im, cv2.COLOR_RGB2GRAY)
else:
    im_grey = imtransforms_to_use = [cv2.HuMoments, skimage.feature.graycomatrix, 
                     cv2.calcHist, skimage.feature.local_binary_pattern]
    
# Compute Hu Moments
hu_moments = cv2.HuMoments(cv2.moments(im_grey)).flatten()
hu_moments

array([ 1.19269049e-03,  2.16750134e-07,  8.04738593e-13,  1.85217931e-13,
        2.43328968e-26,  5.48469192e-17, -6.72401755e-26])

In [4]:
import numpy as np
import cv2

def ensure_rgb(img):
    """
    Ensures the input image is a 3-channel RGB numpy array.
    If the image is grayscale (2D), it is converted to RGB.
    If the image is already RGB, it is returned unchanged.
    """
    if isinstance(img, np.ndarray):
        if img.ndim == 2:  # Grayscale
            assert(len(img.expand_dims(axis=0).shape)==3, f"wrong shaped image: {img.shape}")
            return cv2.cvtColor(img, cv2.COLOR_GRAY2RGB).expand_dims(axis=0)
        elif img.ndim == 3 and img.shape[2] == 3:
            assert(len(img.shape)==3, f"wrong shaped image: {img.shape}")
            return img  # Already RGB
        else:
            raise ValueError("Unsupported image shape for ensure_rgb: {}".format(img.shape))
    else:
        raise TypeError("Input must be a numpy ndarray.")

  assert(len(img.expand_dims(axis=0).shape)==3, f"wrong shaped image: {img.shape}")
  assert(len(img.shape)==3, f"wrong shaped image: {img.shape}")


In [5]:
from skimage.feature import graycomatrix, graycoprops
from sklearn.metrics.pairwise import nan_euclidean_distances
import numpy as np
import cv2
import skimage
from tqdm import tqdm

# TODO: check to ensure that the returned 2d data is correct for what you want 

def apply_transformations(images, combo):
    features = []
    for t in combo:
        if t is cv2.HuMoments:
            imgs_grey = [cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) if img.ndim == 3 else img for img in images]
            feats = [cv2.normalize(cv2.HuMoments(cv2.moments(im)).flatten(), None, 0, 255, cv2.NORM_MINMAX) for im in imgs_grey]
        elif t is skimage.feature.graycomatrix:
            imgs_grey = [cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) if img.ndim == 3 else img for img in images]
            dists, angles = [1], [0]
            feats = []
            for im in imgs_grey:
                glcm = skimage.feature.graycomatrix(im, distances=dists, angles=angles, symmetric=True, normed=True)
                diss = graycoprops(glcm, 'dissimilarity')
                contrast = graycoprops(glcm, 'contrast')
                cat = np.concatenate([diss, contrast], axis=1)
                norm = cv2.normalize(cat, None, 0, 255, cv2.NORM_MINMAX).flatten()
                feats.append(norm)
        elif t is cv2.calcHist:
            feats = [cv2.normalize(
                        cv2.calcHist([img], [0,1,2], None, [8,8,8], [0,256]*3).flatten(),
                        None, 0, 255, cv2.NORM_MINMAX).flatten() for img in images]
        elif t is skimage.feature.local_binary_pattern:
            imgs_grey = [cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) if img.ndim == 3 else img for img in images]
            feats = []
            for im in imgs_grey:
                lbp = skimage.feature.local_binary_pattern(im, P=8, R=1)
                hist, _ = np.histogram(lbp.ravel(), bins=10, range=(0, 10))
                feats.append(hist.flatten())
        else:
            print(t)
            raise ValueError(f'Unsupported transformation: {t}')
        
        features.append(np.stack(feats))

        for i, feature in enumerate(features): 
            if len(feature.shape) > 1:
                features[i] = feature.flatten()
                
    return np.concatenate(features, axis=0)

def best_transformation(transformations, class1_imgs, class2_imgs):
    '''
    Parameters: 
        transformations - a list of transformation functions
        class1_imgs, class2_imgs - lists of ndarray images

    Returns:
        The transformation or combination of transformations (as a tuple) that 
        produces the most class separability
    '''
    class1_imgs = [ensure_rgb(img) for img in class1_imgs]
    class2_imgs = [ensure_rgb(img) for img in class2_imgs]
    class1_imgs = [cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX) for img in class1_imgs]
    class2_imgs = [cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX) for img in class2_imgs]

    combos = powerset_without_emptyset(transformations)
    best_combo = None
    
    score_dict = {}
    for combo in tqdm(combos, total=len(combos)):
        
        transformed1 = np.expand_dims(apply_transformations(class1_imgs, combo), axis=1)
        transformed2 = np.expand_dims(apply_transformations(class2_imgs, combo), axis=1)
        
        inter_score = nan_euclidean_distances(transformed1, transformed2).mean()
        intra_score1 = nan_euclidean_distances(transformed1, np.flip(transformed1, axis=0))
        intra_score2 = nan_euclidean_distances(transformed2, np.flip(transformed2, axis=0))
        score_dict[combo] = {"inter-score": inter_score, "intra-score class 1": intra_score1, "intra-score class 2": intra_score2}

    return score_dict

In [6]:
# Get the labels for all images
labels = [dataset._labels[i] for i in range(len(dataset))]

# Choose two class indices (e.g., 0 and 1)
class1_idx = 0
class2_idx = 1

# Get indices for each class
class1_indices = [i for i, l in enumerate(labels) if l == class1_idx]
class2_indices = [i for i, l in enumerate(labels) if l == class2_idx]

# Get images for each class (as PIL Images)
class1_imgs = [dataset[i][0] for i in class1_indices]
class2_imgs = [dataset[i][0] for i in class2_indices]


In [7]:
import numpy as np
# convert pil images to np.array
# TODO: change back to 244,244
class1_imgs_arr = [np.array(img.resize((24,24))) for img in class1_imgs]
class2_imgs_arr = [np.array(img.resize((24,24))) for img in class2_imgs]


In [8]:
print(class1_imgs_arr[0].shape)

(24, 24, 3)


In [9]:
transforms = [cv2.HuMoments, skimage.feature.graycomatrix, cv2.calcHist, skimage.feature.local_binary_pattern]


In [None]:

best_combo = best_transformation(transforms, class1_imgs_arr, class2_imgs_arr)
best_combo 

  0%|          | 0/15 [00:00<?, ?it/s]

In [None]:
# Resize both images to (24, 24, 3) using cv2.resize, then flatten and reshape for distance calculation
scores = []
for img1, img2 in zip(class1_imgs_arr, class2_imgs_arr):
    img1_resized = cv2.resize(img1, (24, 24)).reshape(-1, 1)
    img2_resized = cv2.resize(img2, (24, 24)).reshape(-1, 1)
    scores.append(nan_euclidean_distances(img1_resized, img2_resized ).mean())

score = np.mean(scores)

In [None]:
score

In [None]:
scores = []
for i in range(len(class1_imgs_arr) - 1):
    img1_resized = cv2.resize(class1_imgs_arr[i], (24, 24)).reshape(-1, 1)
    img2_resized = cv2.resize(class1_imgs_arr[i+1], (24, 24)).reshape(-1, 1)

    scores.append(nan_euclidean_distances(img1_resized, img2_resized ).mean())

score = np.mean(scores)

In [None]:
score