In [1]:
import cv2
import os 
import numpy as np 
from sklearn.model_selection import train_test_split
from skimage.feature import local_binary_pattern
from tqdm import tqdm 

In [2]:
DATASET_PATH = './dataset/'
IMG_WIDTH = 224
IMG_HEIGHT = 224
BATCH_SIZE = 32

In [3]:
print('Getting file paths and labels')

image_paths = []
labels = []

positive_path = os.path.join(DATASET_PATH, 'Positive')
negative_path = os.path.join(DATASET_PATH, 'Negative')

for filename in os.listdir(positive_path):
    image_paths.append(os.path.join(positive_path, filename))
    labels.append(1)
    
for filename in os.listdir(negative_path):
    image_paths.append(os.path.join(negative_path, filename))
    labels.append(0)
    
image_paths = np.array(image_paths)
labels = np.array(labels)

X_train_paths, X_test_paths, y_train, y_test = train_test_split(
    image_paths, labels, test_size=0.25, random_state=42, stratify=labels
)

print(f'Training test size: {len(X_train_paths)}')
print(f'Testing test size: {len(X_test_paths)}')

Getting file paths and labels
Training test size: 30000
Testing test size: 10000


# Getting the best descriptor and detector

## just detector: using lbp

In [34]:
def feature_generator_lbp(image_paths, labels, batch_size):
    num_samples = len(image_paths)
    
    while True:
        indices = np.arange(num_samples)
        np.random.shuffle(indices)
        
        shuffled_paths = image_paths[indices]
        shuffled_labels = labels[indices]
        
        for i in range(0, num_samples, batch_size):
            batch_paths = shuffled_paths[i:i+batch_size]
            batch_labels = shuffled_labels[i:i+batch_size]
            
            batch_features = []
            
            for img_path in tqdm(batch_paths, desc='Batch Progress'):
                image = cv2.imread(img_path)
                image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
                
                gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                
                gray_image_eq = cv2.equalizeHist(gray_image)
                
                lbp = local_binary_pattern(gray_image_eq, P=8, R=1, method='uniform')
                
                (hist, _) = np.histogram(lbp.ravel(), bins = np.arange(0, 11), range=(0, 10))
                
                hist = hist.astype('float')
                
                hist /= (hist.sum() + 1e-6)
                
                batch_features.append(hist)
            
            yield np.array(batch_features), np.array(batch_labels)

In [35]:
# Verification
train_gen_lbp = feature_generator_lbp(X_train_paths, y_train, BATCH_SIZE)

print('fetching one batch of feature vectors to test')
sample_batch_features, sample_batch_labels = next(train_gen_lbp) 

print('pipeline complete, ready for training')
print(f'shape of one batch of features: {sample_batch_features.shape}') # 32 per batch and 10 length
print(f'shape of one batch of labels: {sample_batch_labels.shape}')  # 32 per batch
print(f'example feature vector (first image in batch:\n {sample_batch_features[0]})') # 10 arrays

fetching one batch of feature vectors to test


Batch Progress: 100%|██████████| 32/32 [00:00<00:00, 237.87it/s]

pipeline complete, ready for training
shape of one batch of features: (32, 10)
shape of one batch of labels: (32,)
example feature vector (first image in batch:
 [0.02192283 0.06905692 0.04284917 0.16414222 0.16501913 0.19557159
 0.11202567 0.06008849 0.07423868 0.0950853 ])





## detector with descriptor (fast + brief) - not worth exploring

In [25]:
def feature_generator_fast(image_paths, labels, batch_size):
    print("--- RUNNING THE NEW, CORRECTED FAST GENERATOR V2 ---")
    fast = cv2.FastFeatureDetector_create(nonmaxSuppression=False)
    fast.setThreshold(5)
    brief = cv2.xfeatures2d.BriefDescriptorExtractor_create()
    
    num_samples = len(image_paths)
    
    while True:
        indices = np.arange(num_samples)
        np.random.shuffle(indices)
        shuffled_paths = image_paths[indices]
        shuffled_labels = labels[indices]
        
        for i in range(0, num_samples, batch_size):
            batch_paths = shuffled_paths[i:i + batch_size]
            batch_labels = shuffled_labels[i:i + batch_size]
            
            batch_features = []
            
            print(f'processing batch at index: {i}')
            for img_path in tqdm(batch_paths, desc='Batch Progress'):
                image = cv2.imread(img_path)
                image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
                
                gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                
                keypoints = fast.detect(gray_image, None)
                
                keypoints, descriptors = brief.compute(gray_image, keypoints)
                
                if descriptors is not None:
                    feature_vector = np.mean(descriptors, axis=0)
                else:
                    feature_vector = np.zeros(32)
                
                batch_features.append(feature_vector)
            
            yield np.array(batch_features), np.array(batch_labels)

In [33]:
# Verification
train_gen_fast = feature_generator_fast(X_train_paths, y_train, BATCH_SIZE)

print('fetching one batch of feature vectors to test')
sample_fast_batch_features, sample_fast_batch_labels = next(train_gen_fast) 

print('pipeline complete, ready for training')
print(f'shape of one batch of features: {sample_fast_batch_features.shape}') # 32 per batch and 10 length
print(f'shape of one batch of labels: {sample_fast_batch_labels.shape}')  # 32 per batch
print(f'example feature vector (first image in batch:\n {sample_fast_batch_features[0]})') # 10 arrays

fetching one batch of feature vectors to test
--- RUNNING THE NEW, CORRECTED FAST GENERATOR V2 ---
processing batch at index: 0


Batch Progress: 100%|██████████| 32/32 [00:00<00:00, 457.14it/s]

pipeline complete, ready for training
shape of one batch of features: (32, 32)
shape of one batch of labels: (32,)
example feature vector (first image in batch:
 [139.65625  122.25     129.671875 131.75     143.34375  172.453125
 124.4375   140.40625  185.9375   129.828125 156.015625 122.
  73.8125    82.4375   136.890625 174.171875 158.296875 122.3125
 130.953125 146.265625 104.421875 105.78125  152.6875   160.1875
 127.703125 160.421875 126.640625 162.703125 151.46875  181.765625
 123.4375   103.625   ])





### fast is unreasonably fast, since a detector + descriptor combo usually will take a while
so an isolated evaluation will be done

In [None]:
SAMPLE_SIZE = 10000 

fast = cv2.FastFeatureDetector_create(threshold=5, nonmaxSuppression=False)
brief = cv2.xfeatures2d.BriefDescriptorExtractor_create()

def get_descriptor_count(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    keypoints_found = fast.detect(gray, None)
    keypoints_kept, descriptors = brief.compute(gray, keypoints_found)
    
    return len(descriptors) if descriptors is not None else 0

print("--- Analyzing Cracked Images ---")
positive_path = os.path.join(DATASET_PATH, 'Positive')
positive_files = [os.path.join(positive_path, fname) for fname in os.listdir(positive_path)[:SAMPLE_SIZE]]
positive_counts = [get_descriptor_count(path) for path in tqdm(positive_files, desc="Cracked")]

print("\n--- Analyzing Uncracked Images ---")
negative_path = os.path.join(DATASET_PATH, 'Negative')
negative_files = [os.path.join(negative_path, fname) for fname in os.listdir(negative_path)[:SAMPLE_SIZE]]
negative_counts = [get_descriptor_count(path) for path in tqdm(negative_files, desc="Uncracked")]

print("\n\n--- FINAL DIAGNOSTIC REPORT ---")
print(f"Average descriptors for CRACKED images: {np.mean(positive_counts):.2f}")
print(f"Average descriptors for UNCRACKED images: {np.mean(negative_counts):.2f}")
print(f"\nYour 'hero' image had {positive_counts[0]} descriptors.")

--- Analyzing Cracked Images ---


Cracked: 100%|██████████| 10000/10000 [01:02<00:00, 159.63it/s]



--- Analyzing Uncracked Images ---


Uncracked: 100%|██████████| 10000/10000 [00:44<00:00, 224.80it/s]



--- FINAL DIAGNOSTIC REPORT ---
Average descriptors for CRACKED images: 3583.86
Average descriptors for UNCRACKED images: 2135.19

Your 'hero' image had 1375 descriptors.





--- Analyzing Cracked Images ---

Cracked: 100%|██████████| 10000/10000 [01:02<00:00, 159.63it/s]

--- Analyzing Uncracked Images ---

Uncracked: 100%|██████████| 10000/10000 [00:44<00:00, 224.80it/s]


--- FINAL DIAGNOSTIC REPORT ---

Average descriptors for CRACKED images: 3583.86

Average descriptors for UNCRACKED images: 2135.19

Your 'hero' image had 1375 descriptors.


very bad result, the uncracked image has so much descriptors, this shows that fast and brief is a bad combo

## orb

## akaze