In [None]:
import cv2
import os 
import numpy as np 
from sklearn.model_selection import train_test_split
from skimage.feature import local_binary_pattern, hog
from tqdm import tqdm 
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import classification_report

In [None]:
DATASET_PATH = './dataset/'
IMG_WIDTH = 224
IMG_HEIGHT = 224
BATCH_SIZE = 32

In [None]:
print('Getting file paths and labels')

image_paths = []
labels = []

positive_path = os.path.join(DATASET_PATH, 'Positive')
negative_path = os.path.join(DATASET_PATH, 'Negative')

for filename in os.listdir(positive_path):
    image_paths.append(os.path.join(positive_path, filename))
    labels.append(1)
    
for filename in os.listdir(negative_path):
    image_paths.append(os.path.join(negative_path, filename))
    labels.append(0)
    
image_paths = np.array(image_paths)
labels = np.array(labels)

X_train_paths, X_test_paths, y_train, y_test = train_test_split(
    image_paths, labels, test_size=0.25, random_state=42, stratify=labels
)

print(f'Training test size: {len(X_train_paths)}')
print(f'Testing test size: {len(X_test_paths)}')

# Feature Engineering

In [None]:
def feature_generator_akaze(image_paths, labels, batch_size):
    akaze = cv2.AKAZE_create()
    
    num_samples = len(image_paths)
    
    while True:
        indices = np.arange(num_samples)
        np.random.shuffle(indices)
        shuffled_paths = image_paths[indices]
        shuffled_labels = labels[indices]
        
        for i in range (0, num_samples, batch_size):
            batch_paths = shuffled_paths[i:i + batch_size]
            batch_labels = shuffled_labels[i:i + batch_size]
            
            batch_features = []
            
            for img_path in tqdm(batch_paths, desc='Batch Progress'):
                image = cv2.imread(img_path)
                image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
                gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                
                keypoints, descriptors = akaze.detectAndCompute(gray_img, None)
                
                if descriptors is not None:
                    feature_vectors = np.mean(descriptors, axis=0)
                else:
                    feature_vectors = np.zeros(61)
                
                batch_features.append(feature_vectors)
            
            yield np.array(batch_features), np.array(batch_labels)

In [None]:
# Verification
train_gen_akaze = feature_generator_akaze(X_train_paths, y_train, BATCH_SIZE)

print('fetching one batch of feature vectors to test')
sample_akaze_batch_features, sample_akaze_batch_labels = next(train_gen_akaze) 

print('pipeline complete, ready for training')
print(f'shape of one batch of features: {sample_akaze_batch_features.shape}') # 32 per batch and 10 length
print(f'shape of one batch of labels: {sample_akaze_batch_labels.shape}')  # 32 per batch
print(f'example feature vector (first image in batch:\n {sample_akaze_batch_features[0]})') # 10 arrays

In [None]:
NUM_TRAINING_STEPS = len(X_train_paths) // BATCH_SIZE

pipeline_generators = {
    'AKAZE': feature_generator_akaze,
}

def extract_test_features(extractor_name, paths):
    print(f'extracting test features for {extractor_name}')
    
    test_features = []
    
    akaze = cv2.AKAZE_create()
    
    for img_path in tqdm(paths, desc=f'Testing {extractor_name}'):
        image = cv2.imread(img_path)
        image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        _, descriptors = akaze.detectAndCompute(gray_image, None)
        if descriptors is not None: 
            feature_vector = np.mean(descriptors, axis=0)
        else:
            feature_vector = np.zeros(61)
        test_features.append(feature_vector)
        
    return np.array(test_features)

results = {}

for pipeline_name, generator_function in pipeline_generators.items():
    print(f'training baseline model for: {pipeline_name}')
    
    model = SGDClassifier(max_iter=1000, tol=1e-3, random_state=42)
    train_generator = generator_function(X_train_paths, y_train, BATCH_SIZE)
    
    for _ in tqdm(range(int(NUM_TRAINING_STEPS)), desc=f'Training {pipeline_name}'):
        X_batch, y_batch = next(train_generator)
        model.partial_fit(X_batch, y_batch, classes=np.array([0, 1]))
    
    X_test_features = extract_test_features(pipeline_name, X_test_paths)
    y_pred = model.predict(X_test_features)
    report = classification_report(y_test, y_pred, target_names=['No Crack (0)', 'Crack (1)'], output_dict=True)
    results[pipeline_name] = report

print('final baseline comparison report')

best_pipeline = max(results, key=lambda p: results[p]['Crack (1)']['f1-score'])

for pipeline_name, report in results.items():
    f1_crack = report['Crack (1)']['f1-score']
    print("==========================================")
    print(f"               {pipeline_name} {'WINNER' if pipeline_name == best_pipeline else ''}")
    print("==========================================")
    print(f"              precision    recall  f1-score   support")
    print(f"No Crack (0)      {report['No Crack (0)']['precision']:.2f}         {report['No Crack (0)']['recall']:.2f}      {report['No Crack (0)']['f1-score']:.2f}      {report['No Crack (0)']['support']}")
    print(f"   Crack (1)      {report['Crack (1)']['precision']:.2f}         {report['Crack (1)']['recall']:.2f}      {report['Crack (1)']['f1-score']:.2f}      {report['Crack (1)']['support']}")
    print(f"\n   Accuracy                           {report['accuracy']:.2f}     {report['macro avg']['support']}")
    print(f"   Macro Avg      {report['macro avg']['precision']:.2f}         {report['macro avg']['recall']:.2f}      {report['macro avg']['f1-score']:.2f}      {report['macro avg']['support']}")
    print(f"Weighted Avg      {report['weighted avg']['precision']:.2f}         {report['weighted avg']['recall']:.2f}      {report['weighted avg']['f1-score']:.2f}      {report['weighted avg']['support']}")