In [1]:
import cv2
import os 
import numpy as np 
from sklearn.model_selection import train_test_split
from skimage.feature import local_binary_pattern
from tqdm import tqdm 

In [5]:
DATASET_PATH = './dataset/'
IMG_WIDTH = 224
IMG_HEIGHT = 224
BATCH_SIZE = 32

In [6]:
print('Getting file paths and labels')

image_paths = []
labels = []

positive_path = os.path.join(DATASET_PATH, 'Positive')
negative_path = os.path.join(DATASET_PATH, 'Negative')

for filename in os.listdir(positive_path):
    image_paths.append(os.path.join(positive_path, filename))
    labels.append(1)
    
for filename in os.listdir(negative_path):
    image_paths.append(os.path.join(negative_path, filename))
    labels.append(0)
    
image_paths = np.array(image_paths)
labels = np.array(labels)

X_train_paths, X_test_paths, y_train, y_test = train_test_split(
    image_paths, labels, test_size=0.25, random_state=42, stratify=labels
)

print(f'Training test size: {len(X_train_paths)}')
print(f'Testing test size: {len(X_test_paths)}')

Getting file paths and labels
Training test size: 30000
Testing test size: 10000


In [11]:
def feature_generator_lbp(image_paths, labels, batch_size):
    num_samples = len(image_paths)
    
    while True:
        indices = np.arange(num_samples)
        np.random.shuffle(indices)
        
        shuffled_paths = image_paths[indices]
        shuffled_labels = labels[indices]
        
        for i in range(0, num_samples, batch_size):
            batch_paths = shuffled_paths[i:i+batch_size]
            batch_labels = shuffled_labels[i:i+batch_size]
            
            batch_features = []
            
            for img_path in batch_paths:
                image = cv2.imread(img_path)
                image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
                
                gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                
                gray_image_eq = cv2.equalizeHist(gray_image)
                
                lbp = local_binary_pattern(gray_image_eq, P=8, R=1, method='uniform')
                
                (hist, _) = np.histogram(lbp.ravel(), bins = np.arange(0, 11), range=(0, 10))
                
                hist = hist.astype('float')
                
                hist /= (hist.sum() + 1e-6)
                
                batch_features.append(hist)
            
            yield np.array(batch_features), np.array(batch_labels)

In [12]:
# Verification
train_gen_lbp = feature_generator_lbp(X_train_paths, y_train, BATCH_SIZE)

print('fetching one batch of feature vectors to test')
sample_batch_features, sample_batch_labels = next(train_gen_lbp) 

print('pipeline complete, ready for training')
print(f'shape of one batch of features: {sample_batch_features.shape}') # 32 per batch and 10 length
print(f'shape of one batch of labels: {sample_batch_labels.shape}')  # 32 per batch
print(f'example feature vector (first image in batch:\n {sample_batch_features[0]})') # 10 arrays

fetching one batch of feature vectors to test
pipeline complete, ready for training
shape of one batch of features: (32, 10)
shape of one batch of labels: (32,)
example feature vector (first image in batch:
 [0.01070233 0.03260523 0.03453842 0.17028061 0.31072624 0.22644292
 0.08043686 0.03798629 0.04757254 0.04870855])
