In [None]:
import os
import numpy as np
import tensorflow as tf
import cv2
import albumentations as A
import matplotlib.pyplot as plt

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

# IMAGE_DIR = "/content/drive/MyDrive/Images_RGB"
# MASK_DIR = "/content/drive/MyDrive/masks"

IMAGE_DIR = "/content/images"
MASK_DIR = "/content/masks"


# Configuration
IMAGE_SIZE = (256, 256)
BATCH_SIZE = 8
EPOCHS = 50
NUM_CLASSES = 2

In [None]:
def load_and_preprocess(folder, target_size, grayscale=False):
    images = []
    filenames = sorted(os.listdir(folder))
    for filename in filenames:
        path = os.path.join(folder, filename)
        # Read as grayscale or color based on flag
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE if grayscale else cv2.IMREAD_COLOR)
        if img is not None:
            # Use INTER_NEAREST for masks, INTER_AREA for images
            # Use interpolation for segmentation
            interp = cv2.INTER_NEAREST if grayscale else cv2.INTER_AREA
            img = cv2.resize(img, target_size, interpolation=interp)
            if grayscale:
                # Expand dims so that mask shape becomes (H, W, 1) and threshold it
                img = np.expand_dims(img, axis=-1)
                img = (img > 127).astype(np.float32) #Threshold for masks
            else:
                img = img.astype(np.float32) / 255.0
            images.append(img)
    return np.array(images)

# Load images and masks
images = load_and_preprocess(IMAGE_DIR, IMAGE_SIZE, grayscale=False)
masks  = load_and_preprocess(MASK_DIR, IMAGE_SIZE, grayscale=True)
print("Images shape:", images.shape)
print("Masks shape:", masks.shape)


Images shape: (100, 256, 256, 3)
Masks shape: (100, 256, 256, 1)


In [None]:
from sklearn.model_selection import train_test_split
train_images, val_images, train_masks, val_masks = train_test_split(
    images, masks, test_size=0.2, random_state=42)

In [None]:
# Augmentation for Welding Images
augmentor = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=15, p=0.4),
    A.RandomBrightnessContrast(p=0.3),
    A.CLAHE(p=0.3),
    A.GaussianBlur(blur_limit=(3,5), p=0.1),
    # Note: Do not apply normalization here since we want inputs in [0,1]
])

def _augment(image_np, mask_np):
    # Ensure inputs are NumPy arrays (this fixes the EagerTensor issue)
    image_np = np.array(image_np)
    mask_np = np.array(mask_np)
    # Albumentations expects uint8 images
    image_np = (image_np * 255).astype(np.uint8)
    mask_np = (mask_np * 255).astype(np.uint8)

    augmented = augmentor(image=image_np, mask=mask_np)
    aug_img = augmented['image'].astype(np.float32) / 255.0  # Scale back to [0,1]
    aug_mask = augmented['mask'].astype(np.float32) / 255.0
    if aug_mask.ndim == 2:
        aug_mask = np.expand_dims(aug_mask, axis=-1)
    return aug_img, aug_mask

def augment_data(image, mask):
    aug_img, aug_mask = tf.py_function(
        func=_augment,
        inp=[image, mask],
        Tout=[tf.float32, tf.float32]
    )
    # Set static shapes for proper batching
    aug_img.set_shape(IMAGE_SIZE + (3,))
    aug_mask.set_shape(IMAGE_SIZE + (1,))
    return aug_img, aug_mask





In [None]:
def prepare_dataset(images, masks, training=True):
    ds = tf.data.Dataset.from_tensor_slices((images, masks))
    if training:
      #Shuffling prevents the model from learning order-dependent patterns in the data(shuffles entire dataset)
        ds = ds.shuffle(buffer_size=len(images))
      #ds.map(augment_data) applies the augment_data function to each (image, mask) pair.
      #AUTOTUNE automatically optimizes the number of parallel processes for faster execution.
        ds = ds.map(augment_data, num_parallel_calls=tf.data.AUTOTUNE)
    #Batches multiple samples together into groups of size BATCH_SIZE.
    ds = ds.batch(BATCH_SIZE)
    #Prefetching allows the model to load the next batch while training on the current batch.
    ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
    return ds

train_dataset = prepare_dataset(train_images, train_masks, training=True)
val_dataset   = prepare_dataset(val_images, val_masks, training=False)


In [None]:

!pip install keras_cv
import keras_cv
# Use ResNet50V2 as backbone (pretrained on ImageNet) as in Code 1
backbone = keras_cv.models.ResNet50V2Backbone.from_preset(
    preset="resnet50_v2_imagenet",
    input_shape=IMAGE_SIZE + (3,),
    load_weights=True
)



Collecting keras_cv
  Downloading keras_cv-0.9.0-py3-none-any.whl.metadata (12 kB)
Collecting keras-core (from keras_cv)
[0m  Downloading keras_core-0.1.7-py3-none-any.whl.metadata (4.3 kB)
Downloading keras_cv-0.9.0-py3-none-any.whl (650 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m650.7/650.7 kB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading keras_core-0.1.7-py3-none-any.whl (950 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m950.8/950.8 kB[0m [31m42.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: keras-core, keras_cv
Successfully installed keras-core-0.1.7 keras_cv-0.9.0
Downloading from https://www.kaggle.com/api/v1/models/keras/resnetv2/keras/resnet50_v2_imagenet/2/download/config.json...


100%|██████████| 886/886 [00:00<00:00, 2.09MB/s]


Downloading from https://www.kaggle.com/api/v1/models/keras/resnetv2/keras/resnet50_v2_imagenet/2/download/model.weights.h5...


100%|██████████| 90.3M/90.3M [00:07<00:00, 12.6MB/s]


In [None]:
model = keras_cv.models.segmentation.DeepLabV3Plus(
    num_classes=NUM_CLASSES,
    backbone=backbone
)



In [None]:
def mean_iou(y_true, y_pred):
    """
    Compute mean IoU over the batch. Expects y_true to be (batch, H, W, 1)
    and y_pred to be logits or probabilities of shape (batch, H, W, num_classes).
    """
    # Squeeze last channel of ground truth and one-hot encode
    y_true = tf.squeeze(y_true, axis=-1)
    y_true = tf.one_hot(tf.cast(y_true, tf.int32), NUM_CLASSES)
    # Use argmax to get predictions if necessary
    if y_pred.shape[-1] > 1:
        y_pred = tf.one_hot(tf.argmax(y_pred, axis=-1), NUM_CLASSES)
    intersection = tf.reduce_sum(y_true * y_pred, axis=[1,2])
    union = tf.reduce_sum(y_true + y_pred, axis=[1,2]) - intersection
    iou = tf.math.divide_no_nan(intersection, union)
    return tf.reduce_mean(iou)

# Compile the model with a standard loss and metrics
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=loss_fn,
    metrics=["accuracy", mean_iou]
)



In [None]:
model.summary()

In [None]:
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    "best_deeplabv3plus.h5",
    monitor="val_mean_iou",
    mode="max",
    save_best_only=True,
    verbose=1
)
tensorboard_cb = tf.keras.callbacks.TensorBoard(log_dir="./logs")

callbacks = [checkpoint_cb, tensorboard_cb]



In [None]:
EPOCHS = 10
history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    validation_data=val_dataset,
    callbacks=callbacks
)

Epoch 1/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 615ms/step - accuracy: 0.9913 - loss: 0.0379 - mean_iou: 0.8241
Epoch 1: val_mean_iou did not improve from 0.49034
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 661ms/step - accuracy: 0.9913 - loss: 0.0376 - mean_iou: 0.8257 - val_accuracy: 0.9690 - val_loss: 0.1285 - val_mean_iou: 0.4849
Epoch 2/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 638ms/step - accuracy: 0.9930 - loss: 0.0324 - mean_iou: 0.8584
Epoch 2: val_mean_iou did not improve from 0.49034
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 682ms/step - accuracy: 0.9930 - loss: 0.0324 - mean_iou: 0.8580 - val_accuracy: 0.9699 - val_loss: 0.1282 - val_mean_iou: 0.4854
Epoch 3/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 599ms/step - accuracy: 0.9935 - loss: 0.0296 - mean_iou: 0.8584
Epoch 3: val_mean_iou did not improve from 0.49034
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m