### Imports

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.image import imread
import joblib
import random

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model

sns.set_style("whitegrid")

print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.6.0


### Directories

In [2]:
cwd = os.getcwd()
print("Current working directory:", cwd)

Current working directory: /workspaces/ML_Project_Image_Recognition/Jupyter_Notebooks


In [3]:
os.chdir('/workspaces/ML_Project_Image_Recognition')
print("New working directory set.")

New working directory set.


In [4]:
work_dir = os.getcwd()
work_dir

'/workspaces/ML_Project_Image_Recognition'

### Outputs

In [5]:
data_dir = 'inputs/cracks_dataset_new'
train_path = os.path.join(data_dir, 'train')
val_path = os.path.join(data_dir, 'val')
test_path = os.path.join(data_dir, 'test')

version = 'v1'
output_path = os.path.join('outputs', version)
os.makedirs(output_path, exist_ok=True)
print(f"Output folder: {output_path}")

Output folder: outputs/v1


In [6]:
image_shape = joblib.load(f'{output_path}/image_shape.pkl')
print("Image shape loaded:", image_shape)

class_indices = joblib.load(f'{output_path}/class_indices.pkl')
labels = list(class_indices.keys())
print("Class labels:", labels)

Image shape loaded: (256, 256, 3)
Class labels: ['Cracked', 'Non-cracked']


### Augmentation 
 - Augmenting only the "Cracked" images to create image diversity when training as the disparity between cracked and non-cracked images is quite large as noted in notebook 2. 

In [8]:
augmented_gen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest',
    rescale=1./255
)

non_augmented_gen = ImageDataGenerator(rescale=1./255)

def create_combined_generator(structure_name, image_shape=(256, 256), batch_size=32):
    cracked_dir = os.path.join(train_path, structure_name, "Cracked")
    non_cracked_dir = os.path.join(train_path, structure_name, "Non-cracked")

    cracked_generator = augmented_gen.flow_from_directory(
        directory=os.path.join(train_path, structure_name),
        classes=["Cracked"],
        target_size=image_shape[:2],
        class_mode='binary',
        batch_size=batch_size,
        shuffle=True
    )

    non_cracked_generator = non_augmented_gen.flow_from_directory(
        directory=os.path.join(train_path, structure_name),
        classes=["Non-cracked"],
        target_size=image_shape[:2],
        class_mode='binary',
        batch_size=batch_size,
        shuffle=True
    )

    def combined_gen():
        while True:
            cracked_imgs, cracked_labels = cracked_generator.next()
            non_imgs, non_labels = non_cracked_generator.next()

            X = np.concatenate((cracked_imgs, non_imgs), axis=0)
            y = np.concatenate((cracked_labels, non_labels), axis=0)

            indices = np.arange(len(X))
            np.random.shuffle(indices)
            yield X[indices], y[indices]

    return combined_gen(), len(cracked_generator.filenames) + len(non_cracked_generator.filenames)

In [9]:
walls_train_gen, walls_total_images = create_combined_generator("Walls")
decks_train_gen, decks_total_images = create_combined_generator("Decks")
pavements_train_gen, pavements_total_images = create_combined_generator("Pavements")

Found 2695 images belonging to 1 classes.
Found 10000 images belonging to 1 classes.
Found 1417 images belonging to 1 classes.
Found 8116 images belonging to 1 classes.
Found 1825 images belonging to 1 classes.
Found 15208 images belonging to 1 classes.


### Rescaling
 - Avoiding augmentation on the Test and Validation sets and opting instead for just rescaling, this is to reflect real world images when testing the models. 

In [12]:
rescale_gen = ImageDataGenerator(rescale=1./255)

def create_eval_generator(base_path, structure_name, batch_size=32):
    return rescale_gen.flow_from_directory(
        directory=os.path.join(base_path, structure_name),
        target_size=image_shape[:2],
        class_mode='binary',
        batch_size=batch_size,
        shuffle=False
    )

In [13]:
walls_val_gen = create_eval_generator(val_path, "Walls")
decks_val_gen = create_eval_generator(val_path, "Decks")
pavements_val_gen = create_eval_generator(val_path, "Pavements")

walls_test_gen = create_eval_generator(test_path, "Walls")
decks_test_gen = create_eval_generator(test_path, "Decks")
pavements_test_gen = create_eval_generator(test_path, "Pavements")

Found 1813 images belonging to 2 classes.
Found 1361 images belonging to 2 classes.
Found 2432 images belonging to 2 classes.
Found 3630 images belonging to 2 classes.
Found 2726 images belonging to 2 classes.
Found 4869 images belonging to 2 classes.


t

In [12]:
def save_augmented_montage(generator, structure_name, output_path, n_images=9):
    import matplotlib.pyplot as plt
    from math import ceil

    imgs, labels = next(generator)
    imgs = imgs[:n_images]

    n_cols = 3
    n_rows = ceil(n_images / n_cols)
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(10, 10))
    axes = axes.flatten()

    for i, img in enumerate(imgs):
        axes[i].imshow(img)
        axes[i].axis('off')
        axes[i].set_title(f"Augmented")

    for i in range(n_images, len(axes)):
        axes[i].axis('off')

    plt.tight_layout()
    filename = os.path.join(output_path, f"augmented_samples_{structure_name}.png")
    plt.savefig(filename, dpi=150, bbox_inches='tight')
    plt.close()
    print(f"Saved: {filename}")

In [13]:
save_augmented_montage(
    augmented_gen.flow_from_directory(
        directory=os.path.join(train_path, "Walls"),
        classes=["Cracked"],
        target_size=image_shape[:2],
        class_mode='binary',
        batch_size=9,
        shuffle=True
    ),
    "Walls", output_path
)

save_augmented_montage(
    augmented_gen.flow_from_directory(
        directory=os.path.join(train_path, "Decks"),
        classes=["Cracked"],
        target_size=image_shape[:2],
        class_mode='binary',
        batch_size=9,
        shuffle=True
    ),
    "Decks", output_path
)

save_augmented_montage(
    augmented_gen.flow_from_directory(
        directory=os.path.join(train_path, "Pavements"),
        classes=["Cracked"],
        target_size=image_shape[:2],
        class_mode='binary',
        batch_size=9,
        shuffle=True
    ),
    "Pavements", output_path
)

Found 2695 images belonging to 1 classes.
Saved: outputs/v1/augmented_samples_Walls.png
Found 1417 images belonging to 1 classes.
Saved: outputs/v1/augmented_samples_Decks.png
Found 1825 images belonging to 1 classes.
Saved: outputs/v1/augmented_samples_Pavements.png


In [None]:
from IPython.display import Image, display

montage_files = [
    os.path.join(output_path, "augmented_samples_Walls.png"),
    os.path.join(output_path, "augmented_samples_Decks.png"),
    os.path.join(output_path, "augmented_samples_Pavements.png")
]

for file in montage_files:
    if os.path.exists(file):
        print(f"Displaying: {os.path.basename(file)}")
        display(Image(filename=file))
    else:
        print(f"File not found: {file}")

t

In [8]:
simple_gen = ImageDataGenerator(rescale=1./255)

In [9]:
walls_train_gen = simple_gen.flow_from_directory(
    directory=os.path.join(train_path, "Walls"),
    target_size=image_shape[:2],
    class_mode='binary',
    batch_size=32,
    shuffle=True
)

Found 12695 images belonging to 2 classes.


### Creating the Models

In [7]:
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import BatchNormalization

def create_tf_model_1(input_shape):
    model = Sequential()

    model.add(Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.001), input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model

In [15]:
tf_model_1 = create_tf_model_1(input_shape=image_shape)

early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history_tf_model_1 = tf_model_1.fit(
    walls_train_gen,
    steps_per_epoch=walls_train_gen.samples // walls_train_gen.batch_size,
    validation_data=walls_val_gen,
    validation_steps=walls_val_gen.samples // walls_val_gen.batch_size,
    epochs=25,
    callbacks=[early_stop],
    verbose=1
)

2025-04-13 13:47:07.392490: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25


In [16]:
tf_model_1.save(os.path.join(output_path, "tf_model_1_walls.h5"))
joblib.dump(history_tf_model_1.history, os.path.join(output_path, "history_tf_model_1_walls.pkl"))

['outputs/v1/history_tf_model_1_walls.pkl']

In [17]:
print("Model saved to:", os.path.join(output_path, "tf_model_1_walls.h5"))
print("History saved to:", os.path.join(output_path, "history_tf_model_1_walls.pkl"))

Model saved to: outputs/v1/tf_model_1_walls.h5
History saved to: outputs/v1/history_tf_model_1_walls.pkl
