In [1]:
from keras.utils import normalize, to_categorical
from unet_model import multi_unet_model
from unet512 import unet
import os
import glob
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Define constants
SIZE_X = 256  # Image width
SIZE_Y = 256  # Image height
n_classes = 8  # Number of classes for segmentation
BATCH_SIZE = 8  # Batch size for training

# Define a generator for lazy loading of data
def data_generator(image_paths, mask_paths, batch_size, n_classes, normalize_images=True):
    """
    Generates batches of images and masks lazily.

    Args:
        image_paths (list): List of paths to the training images.
        mask_paths (list): List of paths to the corresponding masks.
        batch_size (int): Number of samples per batch.
        n_classes (int): Number of segmentation classes.
        normalize_images (bool): Whether to normalize images to [0, 1].

    Yields:
        Tuple (images, masks_cat): A batch of images and their one-hot encoded masks.
    """
    while True:  # Infinite loop to provide data indefinitely
        for i in range(0, len(image_paths), batch_size):
            batch_image_paths = image_paths[i:i + batch_size]
            batch_mask_paths = mask_paths[i:i + batch_size]
            
            # Load images and masks for the current batch
            images = []
            masks = []
            for img_path, mask_path in zip(batch_image_paths, batch_mask_paths):
                img = cv2.imread(img_path, 0)  # Read image as grayscale
                mask = cv2.imread(mask_path, 0)  # Read mask as grayscale

                # if normalize_images:
                #     img = img / 255.0  # Normalize image to range [0, 1]

                # Resize to ensure consistent dimensions
                # img = cv2.resize(img, (SIZE_X, SIZE_Y))
                # mask = cv2.resize(mask, (SIZE_X, SIZE_Y))

                img = normalize(img, axis=1)
                images.append(img)
                masks.append(mask)

            # Convert to NumPy arrays
            images = np.expand_dims(np.array(images), axis=3)  # Add channel dimension
            masks = np.array(masks)

            # Convert masks to categorical (one-hot encoding)
            masks_cat = np.array([to_categorical(m, num_classes=n_classes) for m in masks])

            yield images, masks_cat  # Yield a batch

# Paths to training images and masks
train_image_paths = glob.glob("/home/abdulrauf/Projects/MakhiMeter-Training/data/training/model_v1.2/experiment_2/with_rotation/size 256/interpolated/augmented rgb/*.png")
train_mask_paths = glob.glob("/home/abdulrauf/Projects/MakhiMeter-Training/data/training/model_v1.2/experiment_2/with_rotation/size 256/interpolated/labeled encoded/*.png")

# Split data into training and testing sets
train_image_paths, test_image_paths, train_mask_paths, test_mask_paths = train_test_split(
    train_image_paths, train_mask_paths, test_size=0.1, random_state=0
)





2024-11-29 16:38:46.159636: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-29 16:38:46.286119: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-29 16:38:46.319497: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-29 16:38:46.570038: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Create data generators
train_gen = data_generator(train_image_paths, train_mask_paths, BATCH_SIZE, n_classes)
val_gen = data_generator(test_image_paths, test_mask_paths, BATCH_SIZE, n_classes)

# Define the model


In [3]:
IMG_HEIGHT = SIZE_Y
IMG_WIDTH = SIZE_X
IMG_CHANNELS = 1  # Grayscale images

def get_model2():
    return unet(n_class=8, input_size=(256, 256, 1))

def get_model():
    return multi_unet_model()

# model = get_model()

model = get_model()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()



I0000 00:00:1732880335.653187    5308 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732880335.734917    5308 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732880335.735242    5308 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732880335.738614    5308 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

In [4]:
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint callback
checkpoint_callback = ModelCheckpoint(
    'models/checkpoints/unet_256_interpolated_checkpoint_epoch-{epoch:02d}_val_loss-{val_loss:.4f}.keras',  # File path format
    monitor='val_accuracy',  # Metric to monitor (you can also use 'val_accuracy')
    save_best_only=True,  # Only save the model if the validation loss improves
    save_weights_only=False,  # Save the entire model (set to True to save only weights)
    mode='max',  # 'min' means the model is saved when the monitored metric is minimized (e.g., loss)
    verbose=1  # Show a message when the model is saved
)


# Train the model using the generators
history = model.fit(
    train_gen,
    steps_per_epoch=len(train_image_paths) // BATCH_SIZE,
    validation_data=val_gen,
    validation_steps=len(test_image_paths) // BATCH_SIZE,
    epochs=10,
    verbose=1,
    shuffle=False,
    callbacks=[checkpoint_callback]
)

# Save the model
model.save('models/version 1.1/unet_256_interpolated_6900_10epochs.h5')

Epoch 1/10


I0000 00:00:1732880348.485305    5758 service.cc:146] XLA service 0x74dc38025bf0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1732880348.485344    5758 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce GTX 1050, Compute Capability 6.1
2024-11-29 16:39:08.692040: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-11-29 16:39:09.742315: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907

I0000 00:00:1732880371.071957    5758 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 214ms/step - accuracy: 0.6833 - loss: 0.9680
Epoch 1: val_accuracy improved from -inf to 0.88837, saving model to models/checkpoints/unet_256_interpolated_checkpoint_epoch-01_val_loss-0.4118.keras
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m213s[0m 236ms/step - accuracy: 0.6834 - loss: 0.9677 - val_accuracy: 0.8884 - val_loss: 0.4118
Epoch 2/10
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 242ms/step - accuracy: 0.9399 - loss: 0.2339
Epoch 2: val_accuracy improved from 0.88837 to 0.95600, saving model to models/checkpoints/unet_256_interpolated_checkpoint_epoch-02_val_loss-0.1766.keras
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 249ms/step - accuracy: 0.9399 - loss: 0.2339 - val_accuracy: 0.9560 - val_loss: 0.1766
Epoch 3/10
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 239ms/step - accuracy: 0.9536 - loss: 0.1866
Epoch 3: val_accuracy improv



In [5]:
model.save('models/unet512_256__6900_5epochs.h5')

