### Imports

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.image import imread
import joblib
import random

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model

sns.set_style("whitegrid")

print("TensorFlow version:", tf.__version__)

### Directories

In [None]:
cwd = os.getcwd()
print("Current working directory:", cwd)

In [None]:
os.chdir('/workspaces/ML_Project_Image_Recognition')
print("New working directory set.")

In [None]:
work_dir = os.getcwd()
work_dir

### Outputs

In [None]:
data_dir = 'inputs/cracks_dataset_new'
train_path = os.path.join(data_dir, 'train')
val_path = os.path.join(data_dir, 'val')
test_path = os.path.join(data_dir, 'test')

version = 'v1'
output_path = os.path.join('outputs', version)
os.makedirs(output_path, exist_ok=True)
print(f"Output folder: {output_path}")

In [None]:
image_shape = joblib.load(f'{output_path}/image_shape.pkl')
print("Image shape loaded:", image_shape)

class_indices = joblib.load(f'{output_path}/class_indices.pkl')
labels = list(class_indices.keys())
print("Class labels:", labels)

In [None]:
# Augmentation for "Cracked"
augmented_gen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest',
    rescale=1./255
)

# Basic rescaling for "Non-cracked"
non_augmented_gen = ImageDataGenerator(rescale=1./255)

# Function to create custom generator for a single dataset (e.g. "Walls")
def create_combined_generator(structure_name, image_shape=(256, 256), batch_size=32):
    cracked_dir = os.path.join(train_path, structure_name, "Cracked")
    non_cracked_dir = os.path.join(train_path, structure_name, "Non-cracked")

    # Augmented cracked images
    cracked_generator = augmented_gen.flow_from_directory(
        directory=os.path.join(train_path, structure_name),
        classes=["Cracked"],
        target_size=image_shape[:2],
        class_mode='binary',
        batch_size=batch_size,
        shuffle=True
    )

    # Non-augmented non-cracked images
    non_cracked_generator = non_augmented_gen.flow_from_directory(
        directory=os.path.join(train_path, structure_name),
        classes=["Non-cracked"],
        target_size=image_shape[:2],
        class_mode='binary',
        batch_size=batch_size,
        shuffle=True
    )

    # Combine the two generators
    def combined_gen():
        while True:
            cracked_imgs, cracked_labels = cracked_generator.next()
            non_imgs, non_labels = non_cracked_generator.next()

            X = np.concatenate((cracked_imgs, non_imgs), axis=0)
            y = np.concatenate((cracked_labels, non_labels), axis=0)

            # Shuffle together
            indices = np.arange(len(X))
            np.random.shuffle(indices)
            yield X[indices], y[indices]

    return combined_gen(), len(cracked_generator.filenames) + len(non_cracked_generator.filenames)


In [None]:
walls_train_gen, walls_total_images = create_combined_generator("Walls")
decks_train_gen, decks_total_images = create_combined_generator("Decks")
pavements_train_gen, pavements_total_images = create_combined_generator("Pavements")