In [1]:
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
import os
import time
import datetime
import glob
import numpy as np
import cv2
import glob

2024-11-19 18:42:10.462287: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-19 18:42:10.462315: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-19 18:42:10.463158: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-19 18:42:10.467678: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"

BUFFER_SIZE = 548000
BATCH_SIZE = 64
# Each image is 256x256 in size
IMG_WIDTH = 256
IMG_HEIGHT = 256
OUTPUT_CHANNELS = 6
LAMBDA = 100

In [3]:
def forwardblock(filters, size, apply_batchnorm=True, apply_dropout=False):
  initializer = tf.random_normal_initializer(0., 0.02)

  result = tf.keras.Sequential()
  result.add(
      tf.keras.layers.Conv2D(filters, size, strides=1, padding='same',
                             kernel_initializer=initializer, use_bias=False))
  if apply_dropout:
    result.add(tf.keras.layers.Dropout(0.1))

  if apply_batchnorm:
    result.add(tf.keras.layers.BatchNormalization())

  result.add(tf.keras.layers.LeakyReLU())

  return result

def downsample(filters, size, apply_batchnorm=True):
  initializer = tf.random_normal_initializer(0., 0.02)

  result = tf.keras.Sequential()
  result.add(
      tf.keras.layers.Conv2D(filters, size, strides=2, padding='same',
                             kernel_initializer=initializer, use_bias=False))

  if apply_batchnorm:
    result.add(tf.keras.layers.BatchNormalization())

  result.add(tf.keras.layers.LeakyReLU())

  return result

def upsample(filters, size, apply_dropout=False, apply_batchnorm=False):
  initializer = tf.random_normal_initializer(0., 0.02)

  result = tf.keras.Sequential()
  result.add(
    tf.keras.layers.Conv2DTranspose(filters, size, strides=2,
                                    padding='same',
                                    kernel_initializer=initializer,
                                    use_bias=False))

  if apply_batchnorm:
    result.add(tf.keras.layers.BatchNormalization())

  if apply_dropout:
      result.add(tf.keras.layers.Dropout(0.1))

  result.add(tf.keras.layers.ReLU())

  return result

def Generator():
  inputs = tf.keras.layers.Input(shape=[256, 256, 1])

  down_stack = [
    downsample(64, 4, apply_batchnorm=True),  # (batch_size, 128, 128, 64),
    forwardblock(64, 4, apply_batchnorm=True, apply_dropout=True),
    downsample(128, 4, apply_batchnorm=True),  # (batch_size, 64, 64, 128),
    forwardblock(128, 4, apply_batchnorm=True, apply_dropout=True),
    downsample(256, 4, apply_batchnorm=True),  # (batch_size, 32, 32, 256),
    forwardblock(256, 4, apply_batchnorm=True, apply_dropout=True),
    downsample(512, 4, apply_batchnorm=True),  # (batch_size, 16, 16, 512),
    forwardblock(512, 4, apply_batchnorm=True, apply_dropout=True),
    downsample(1024, 4, apply_batchnorm=True),  # (batch_size, 8, 8, 1024),
  ]

  up_stack = [
    upsample(512, 4, apply_batchnorm=True),  # (batch_size, 16, 16, 1024),
    forwardblock(512, 4, apply_batchnorm=True, apply_dropout=True),
    upsample(256, 4, apply_batchnorm=True),  # (batch_size, 32, 32, 512),
    forwardblock(256, 4, apply_batchnorm=True, apply_dropout=True),
    upsample(128, 4, apply_batchnorm=True),  # (batch_size, 64, 64, 256),
    forwardblock(128, 4, apply_batchnorm=True, apply_dropout=True),
    upsample(64, 4, apply_batchnorm=True),  # (batch_size, 128, 128, 128),
    forwardblock(64, 4, apply_batchnorm=True, apply_dropout=True),
  ]


  initializer = tf.random_normal_initializer(0., 0.02)
  last = tf.keras.layers.Conv2DTranspose(OUTPUT_CHANNELS, 4,
                                         strides=2,
                                         padding='same',
                                         kernel_initializer=initializer,
                                         activation='softmax')  # (batch_size, 256, 256, 3)

  x = inputs

  # Downsampling through the model
  skips = []
  for down in down_stack:
    x = down(x)
    skips.append(x)

  skips = reversed(skips[:-1])

  # Upsampling and establishing the skip connections
  for up, skip in zip(up_stack, skips):
    x = up(x)
    x = tf.keras.layers.Concatenate()([x, skip])

  x = last(x)

  return tf.keras.Model(inputs=inputs, outputs=x)


def Discriminator():
  initializer = tf.random_normal_initializer(0., 0.02)

  inp = tf.keras.layers.Input(shape=[256, 256, 1], name='input_image')
  tar = tf.keras.layers.Input(shape=[256, 256, 6], name='target_image')

  x = tf.keras.layers.concatenate([inp, tar])  # (batch_size, 256, 256, channels*2)

  down1 = downsample(64, 4, False)(x)  # (batch_size, 128, 128, 64)
  down2 = downsample(128, 4)(down1)  # (batch_size, 64, 64, 128)
  down3 = downsample(256, 4)(down2)  # (batch_size, 32, 32, 256)

  zero_pad1 = tf.keras.layers.ZeroPadding2D()(down3)  # (batch_size, 34, 34, 256)
  conv = tf.keras.layers.Conv2D(512, 4, strides=1,
                                kernel_initializer=initializer,
                                use_bias=False)(zero_pad1)  # (batch_size, 31, 31, 512)

  batchnorm1 = tf.keras.layers.BatchNormalization()(conv)

  leaky_relu = tf.keras.layers.LeakyReLU()(batchnorm1)

  zero_pad2 = tf.keras.layers.ZeroPadding2D()(leaky_relu)  # (batch_size, 33, 33, 512)

  last = tf.keras.layers.Conv2D(1, 4, strides=1,
                                kernel_initializer=initializer)(zero_pad2)  # (batch_size, 30, 30, 1)

  return tf.keras.Model(inputs=[inp, tar], outputs=last)

def generator_loss(disc_generated_output, gen_output, target):
  gan_loss = loss_object(tf.ones_like(disc_generated_output), disc_generated_output)

  # cast to float64
  disc_generated_output = tf.cast(disc_generated_output, tf.float32)
  gen_output = tf.cast(gen_output, tf.float32)
  target = tf.cast(target, tf.float32)
    
  # Mean absolute error
  #gen_output = tf.squeeze(gen_output, axis=-1)
  l1_loss = tf.reduce_mean(tf.abs(target - gen_output))

  total_gen_loss = gan_loss + (LAMBDA * l1_loss)

  return total_gen_loss, gan_loss, l1_loss

def discriminator_loss(disc_real_output, disc_generated_output):
  real_loss = loss_object(tf.ones_like(disc_real_output), disc_real_output)

  generated_loss = loss_object(tf.zeros_like(disc_generated_output), disc_generated_output)

  total_disc_loss = real_loss + generated_loss

  return total_disc_loss


generator = Generator()
discriminator = Discriminator()

2024-11-19 18:42:11.702583: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-11-19 18:42:11.702810: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-11-19 18:42:11.732510: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [4]:
generator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
discriminator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)

In [5]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
                                 discriminator_optimizer=discriminator_optimizer,
                                 generator=generator,
                                 discriminator=discriminator)

In [6]:
generator.save_weights('./training_checkpoints/enhanced_unet_Generator_model_weights_ckpt47.h5')  

In [7]:
# load data

train_batches = glob.glob('/mnt/databatches_npy/*training_images*')
train_batches.sort()

train_lables = glob.glob('/mnt/databatches_npy/*training_labels_hot_encoded*')
train_lables.sort()


test_batches = glob.glob('/mnt/databatches_npy/*test_images*')
test_batches.sort()

test_lables = glob.glob('/mnt/databatches_npy/*test_labels_hot_encoded*')
test_lables.sort()


train_data_x = np.zeros((1, 256, 256, 1)).astype(np.int8)
train_data_y = np.zeros((1, 256, 256, 6)).astype(np.int8)

for train_batch, train_lable in zip(train_batches,train_lables):
    print(train_batch)
    x_train = np.load(train_batch)
    x_train = np.expand_dims(x_train, axis=-1)
    x_train = x_train.astype(np.int8)
    
    y_train = np.load(train_lable).astype(np.int8)
    
    train_data_x = np.concatenate((train_data_x, x_train), axis=0)
    train_data_y = np.concatenate((train_data_y, y_train), axis=0)
    print(train_data_x.shape)
    print(train_data_y.shape)
    print('-------------------------------------------------------------------------------------------------')


with tf.device('/cpu:0'):
    
    #make dataset
    train_data_x = tf.data.Dataset.from_tensor_slices(train_data_x) 
    train_data_y = tf.data.Dataset.from_tensor_slices(train_data_y)
    train_dataset = tf.data.Dataset.zip((train_data_x, train_data_y))

    train_dataset = train_dataset.shuffle(1000)
    train_dataset = train_dataset.batch(BATCH_SIZE)


x_test = np.load(test_batches[0])
x_test = np.expand_dims(x_test, axis=-1)
y_test = np.load(test_lables[0])

with tf.device('/cpu:0'):
    x_test = tf.data.Dataset.from_tensor_slices(x_test)
    y_test = tf.data.Dataset.from_tensor_slices(y_test)
    test_dataset = tf.data.Dataset.zip((x_test, y_test))
    
    test_dataset = test_dataset.shuffle(1000)
    test_dataset = test_dataset.batch(BATCH_SIZE)

/mnt/databatches_npy/batch1training_images.npy
(9217, 256, 256, 1)
(9217, 256, 256, 6)
-------------------------------------------------------------------------------------------------
/mnt/databatches_npy/batch1training_images_flipped_around_xaxis.npy
(18433, 256, 256, 1)
(18433, 256, 256, 6)
-------------------------------------------------------------------------------------------------
/mnt/databatches_npy/batch1training_images_flipped_around_yaxis.npy
(27649, 256, 256, 1)
(27649, 256, 256, 6)
-------------------------------------------------------------------------------------------------
/mnt/databatches_npy/batch2training_images.npy
(36865, 256, 256, 1)
(36865, 256, 256, 6)
-------------------------------------------------------------------------------------------------
/mnt/databatches_npy/batch2training_images_flipped_around_xaxis.npy
(46081, 256, 256, 1)
(46081, 256, 256, 6)
-------------------------------------------------------------------------------------------------
/mnt

2024-11-19 18:49:22.141091: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 59114913792 exceeds 10% of free system memory.


In [8]:
"""
# Fine tuning

# Define the Dice Loss function for multi-class segmentation
def dice_loss(y_true, y_pred, smooth=1e-6):
    y_true = tf.cast(y_true, tf.float32)  # Convert ground truth to float32
    y_pred = tf.cast(y_pred, tf.float32)  # Convert predictions to float32
    # Flatten the tensors to compute the intersection and union
    y_true_f = tf.reshape(y_true, (-1, y_true.shape[-1]))  # Flatten the one-hot encoded labels
    y_pred_f = tf.reshape(y_pred, (-1, y_pred.shape[-1]))  # Flatten the predicted values
    
    # Calculate intersection and union
    intersection = tf.reduce_sum(y_true_f * y_pred_f, axis=0)
    union = tf.reduce_sum(y_true_f, axis=0) + tf.reduce_sum(y_pred_f, axis=0)
    
    # Calculate the Dice coefficient for each class
    dice = (2. * intersection + smooth) / (union + smooth)
    
    # Calculate the Dice loss (1 - Dice coefficient)
    dice_loss = 1 - tf.reduce_mean(dice)
    
    return dice_loss

# Define the combined loss function (Cross-Entropy + Dice Loss)
def combined_loss(y_true, y_pred, alpha=0.5):
    y_true = tf.cast(y_true, tf.float32)  # Convert ground truth to float32
    y_pred = tf.cast(y_pred, tf.float32)  # Convert predictions to float32
    # Cross-Entropy Loss (categorical)
    crossentropy_loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true, y_pred))
    
    # Dice Loss
    dice = dice_loss(y_true, y_pred)
    
    # Combined Loss (weighted sum of cross-entropy and dice loss)
    total_loss = alpha * crossentropy_loss + (1 - alpha) * dice
    
    return total_loss

# Example usage:
# y_true = ground truth mask (shape: (batch_size, 256, 256, 6))
# y_pred = predicted segmentation mask (shape: (batch_size, 256, 256, 6))
# loss = combined_loss(y_true, y_pred)
"""

'\n# Fine tuning\n\n# Define the Dice Loss function for multi-class segmentation\ndef dice_loss(y_true, y_pred, smooth=1e-6):\n    y_true = tf.cast(y_true, tf.float32)  # Convert ground truth to float32\n    y_pred = tf.cast(y_pred, tf.float32)  # Convert predictions to float32\n    # Flatten the tensors to compute the intersection and union\n    y_true_f = tf.reshape(y_true, (-1, y_true.shape[-1]))  # Flatten the one-hot encoded labels\n    y_pred_f = tf.reshape(y_pred, (-1, y_pred.shape[-1]))  # Flatten the predicted values\n    \n    # Calculate intersection and union\n    intersection = tf.reduce_sum(y_true_f * y_pred_f, axis=0)\n    union = tf.reduce_sum(y_true_f, axis=0) + tf.reduce_sum(y_pred_f, axis=0)\n    \n    # Calculate the Dice coefficient for each class\n    dice = (2. * intersection + smooth) / (union + smooth)\n    \n    # Calculate the Dice loss (1 - Dice coefficient)\n    dice_loss = 1 - tf.reduce_mean(dice)\n    \n    return dice_loss\n\n# Define the combined loss f

In [9]:
def dice_loss_channel_wise(y_true, y_pred, smooth=1e-6):
    """
    Compute channel-wise Dice loss for multi-class segmentation.

    y_true: True segmentation masks (one-hot encoded) of shape (batch_size, height, width, num_classes)
    y_pred: Predicted segmentation masks (probabilities from softmax) of shape (batch_size, height, width, num_classes)
    smooth: Small constant to avoid division by zero
    """
    # Ensure the ground truth and predictions are float32
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)

    # Flatten the tensors for pixel-wise operations: (batch_size * height * width, num_classes)
    y_true_f = tf.reshape(y_true, (-1, y_true.shape[-1]))  # Flatten the labels
    y_pred_f = tf.reshape(y_pred, (-1, y_pred.shape[-1]))  # Flatten the predictions

    # Calculate intersection and union for each class/channel (axis=0 means per class)
    intersection = tf.reduce_sum(y_true_f * y_pred_f, axis=0)
    union = tf.reduce_sum(y_true_f, axis=0) + tf.reduce_sum(y_pred_f, axis=0)

    # Compute Dice coefficient for each class (channel)
    dice = (2. * intersection + smooth) / (union + smooth)

    # Compute Dice loss for each class (1 - Dice coefficient)
    dice_loss_per_channel = 1 - dice

    # Return the average Dice loss across all classes
    return tf.reduce_mean(dice_loss_per_channel)

def pixel_wise_binary_crossentropy_loss(y_true, y_pred):
    """
    Compute pixel-wise binary cross-entropy loss for multi-class segmentation.

    y_true: True segmentation masks (one-hot encoded) of shape (batch_size, height, width, num_classes)
    y_pred: Predicted segmentation masks (probabilities from softmax) of shape (batch_size, height, width, num_classes)
    """
    # Ensure the ground truth and predictions are float32
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)

    # Calculate binary cross-entropy loss for each pixel and each class
    bce_loss = tf.keras.losses.BinaryCrossentropy(from_logits=False)
    
    # Compute binary cross-entropy loss for each class (pixel-wise)
    loss_per_channel = bce_loss(y_true, y_pred)

    # Return the average loss across all classes and pixels
    return tf.reduce_mean(loss_per_channel)


def combined_loss(y_true, y_pred, alpha=0.5):
    """
    Combined loss function: Binary Cross-Entropy Loss + Channel-wise Dice Loss.

    y_true: True segmentation masks (one-hot encoded) of shape (batch_size, height, width, num_classes)
    y_pred: Predicted segmentation masks (probabilities from softmax) of shape (batch_size, height, width, num_classes)
    alpha: Weighting factor for the binary cross-entropy and Dice loss components
    """
    # Compute Pixel-wise Binary Cross-Entropy Loss
    bce_loss = pixel_wise_binary_crossentropy_loss(y_true, y_pred)

    # Compute Channel-wise Dice Loss
    dice_loss = dice_loss_channel_wise(y_true, y_pred)

    # Combine both losses (weighted sum)
    total_loss = alpha * bce_loss + (1 - alpha) * dice_loss

    return total_loss


In [10]:
"""
# Define your optimizer (Adam in this case)
optimizer = tf.keras.optimizers.Adam()

# Function to train on one batch
@tf.function
def train_step(x_batch, y_batch):
    with tf.GradientTape() as tape:
        # Forward pass
        y_pred = generator(x_batch, training=True)
        
        # Calculate the loss
        loss = combined_loss(y_batch, y_pred)
    
    # Compute gradients
    grads = tape.gradient(loss, generator.trainable_variables)
    
    # Update weights
    optimizer.apply_gradients(zip(grads, generator.trainable_variables))
    
    return loss
"""

'\n# Define your optimizer (Adam in this case)\noptimizer = tf.keras.optimizers.Adam()\n\n# Function to train on one batch\n@tf.function\ndef train_step(x_batch, y_batch):\n    with tf.GradientTape() as tape:\n        # Forward pass\n        y_pred = generator(x_batch, training=True)\n        \n        # Calculate the loss\n        loss = combined_loss(y_batch, y_pred)\n    \n    # Compute gradients\n    grads = tape.gradient(loss, generator.trainable_variables)\n    \n    # Update weights\n    optimizer.apply_gradients(zip(grads, generator.trainable_variables))\n    \n    return loss\n'

In [11]:
"""
# training 
def train(model, train_dataset, val_dataset, epochs, batch_size, weights_path=None, save_interval=1):
    # Load pre-trained weights if provided
    if weights_path:
        model.load_weights(weights_path)
        print(f"Loaded weights from {weights_path}")

    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}/{epochs}")
        
        # Training phase
        train_loss = 0
        for step, (x_batch, y_batch) in enumerate(train_dataset.batch(batch_size)):
            loss = train_step(x_batch, y_batch)  # Assuming `train_step` function is defined
            train_loss += loss

        # Compute average training loss for this epoch
        train_loss /= step + 1
        print(f"Training Loss: {train_loss.numpy()}")

        # Validation phase
        val_loss = 0
        for x_batch_val, y_batch_val in val_dataset.batch(batch_size):
            y_pred_val = model(x_batch_val, training=False)
            val_loss += combined_loss(y_batch_val, y_pred_val)  # Assuming `combined_loss` function is defined

        # Compute average validation loss
        val_loss /= len(val_dataset)
        print(f"Validation Loss: {val_loss.numpy()}")

        # Save model weights periodically (every `save_interval` epochs)
        if (epoch + 1) % save_interval == 0:
            save_path = f'checkpoint_epoch_{epoch + 1}.h5'
            model.save_weights(save_path)
            print(f"Model weights saved to {save_path}")

# Example usage:
# Assuming `train_dataset` and `val_dataset` are TensorFlow Dataset objects
# containing image-mask pairs and you've defined `train_step` and `combined_loss`


"""

'\n# training \ndef train(model, train_dataset, val_dataset, epochs, batch_size, weights_path=None, save_interval=1):\n    # Load pre-trained weights if provided\n    if weights_path:\n        model.load_weights(weights_path)\n        print(f"Loaded weights from {weights_path}")\n\n    for epoch in range(epochs):\n        print(f"Epoch {epoch + 1}/{epochs}")\n        \n        # Training phase\n        train_loss = 0\n        for step, (x_batch, y_batch) in enumerate(train_dataset.batch(batch_size)):\n            loss = train_step(x_batch, y_batch)  # Assuming `train_step` function is defined\n            train_loss += loss\n\n        # Compute average training loss for this epoch\n        train_loss /= step + 1\n        print(f"Training Loss: {train_loss.numpy()}")\n\n        # Validation phase\n        val_loss = 0\n        for x_batch_val, y_batch_val in val_dataset.batch(batch_size):\n            y_pred_val = model(x_batch_val, training=False)\n            val_loss += combined_loss

In [12]:
# Load initial weights if available, or train from scratch if not
#weights_path = './training_checkpoints/enhanced_unet_Generator_model_weights_ckpt47.h5'  
#train(generator, train_dataset, test_dataset, epochs=50, batch_size=16, weights_path=weights_path, save_interval=1)

In [13]:
def dice_coefficient(y_true, y_pred):
    smooth = 1e-6
    intersection = tf.reduce_sum(y_true * y_pred, axis=(1, 2, 3))  # Element-wise multiplication
    union = tf.reduce_sum(y_true, axis=(1, 2, 3)) + tf.reduce_sum(y_pred, axis=(1, 2, 3))
    dice = (2.0 * intersection + smooth) / (union + smooth)
    return tf.reduce_mean(dice)  # Return the average Dice coefficient


In [14]:
weights_path = './training_checkpoints/enhanced_unet_Generator_model_weights_ckpt47.h5' 

#weights_path = '/mnt/Final_PMRI_repo/lower_limb_calf_muscle_segmentation/training_checkpoints/checkpoint_epoch_35.h5'

generator.load_weights(weights_path)
print(f"Loaded weights from {weights_path}")

Loaded weights from ./training_checkpoints/enhanced_unet_Generator_model_weights_ckpt47.h5


In [15]:
generator.compile(optimizer='adam', loss=combined_loss, metrics=[dice_coefficient])

In [16]:


# Define the checkpoint callback to save weights after each epoch
checkpoint_callback = ModelCheckpoint(
    'checkpoint_epoch_{epoch:02d}.h5',  # File path to save weights after each epoch
    save_weights_only=True,              # Save only the weights, not the entire model
    save_best_only=False,                # Save weights at every epoch (not just the best model)
    verbose=1                            # Display saving status
)

# Train the model with validation data and the checkpoint callback
history = generator.fit(
    train_dataset,  # Your training dataset (TensorFlow Dataset or numpy array)
    epochs=500,      # Number of epochs to train
    validation_data=test_dataset,  # Your validation dataset
    batch_size=256,  # Batch size for training
    callbacks=[checkpoint_callback]  # Include the checkpoint callback
)

2024-11-19 13:16:52.652865: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 59114913792 exceeds 10% of free system memory.


Epoch 1/50


2024-11-19 13:17:17.645716: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel/sequential_1/dropout/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
2024-11-19 13:17:17.971320: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
2024-11-19 13:17:18.042314: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-11-19 13:17:18.587934: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-11-19 13:17:23.608760: I external/local_xla/xla/service/service.cc:168] XLA service 0x7fc561f2c650 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-11-19 13:17:23.608786: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForc

Epoch 1: saving model to checkpoint_epoch_01.h5
Epoch 2/50
Epoch 2: saving model to checkpoint_epoch_02.h5
Epoch 3/50
Epoch 3: saving model to checkpoint_epoch_03.h5
Epoch 4/50
Epoch 4: saving model to checkpoint_epoch_04.h5
Epoch 5/50
Epoch 5: saving model to checkpoint_epoch_05.h5
Epoch 6/50
Epoch 6: saving model to checkpoint_epoch_06.h5
Epoch 7/50
Epoch 7: saving model to checkpoint_epoch_07.h5
Epoch 8/50
Epoch 8: saving model to checkpoint_epoch_08.h5
Epoch 9/50
Epoch 9: saving model to checkpoint_epoch_09.h5
Epoch 10/50
Epoch 10: saving model to checkpoint_epoch_10.h5
Epoch 11/50
Epoch 11: saving model to checkpoint_epoch_11.h5
Epoch 12/50
Epoch 12: saving model to checkpoint_epoch_12.h5
Epoch 13/50
Epoch 13: saving model to checkpoint_epoch_13.h5
Epoch 14/50
Epoch 14: saving model to checkpoint_epoch_14.h5
Epoch 15/50
Epoch 15: saving model to checkpoint_epoch_15.h5
Epoch 16/50
Epoch 16: saving model to checkpoint_epoch_16.h5
Epoch 17/50
Epoch 17: saving model to checkpoint_epoc

KeyboardInterrupt: 

In [21]:
learning_rate = 1e-4  # Example learning rate
adam_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)


In [22]:
weights_path = '/mnt/Final_PMRI_repo/lower_limb_calf_muscle_segmentation/training_checkpoints/checkpoint_epoch_35.h5'

generator.load_weights(weights_path)
print(f"Loaded weights from {weights_path}")

Loaded weights from /mnt/Final_PMRI_repo/lower_limb_calf_muscle_segmentation/training_checkpoints/checkpoint_epoch_35.h5


In [23]:
generator.compile(optimizer=adam_optimizer, loss=combined_loss, metrics=[dice_coefficient])

In [25]:
# Define the checkpoint callback to save weights after each epoch
checkpoint_callback = ModelCheckpoint(
    'checkpoint_epoch_{epoch:02d}.h5',  # File path to save weights after each epoch
    save_weights_only=True,              # Save only the weights, not the entire model
    save_best_only=False,                # Save weights at every epoch (not just the best model)
    verbose=1                            # Display saving status
)

# Train the model with validation data and the checkpoint callback
history = generator.fit(
    train_dataset,  # Your training dataset (TensorFlow Dataset or numpy array)
    epochs=500,      # Number of epochs to train
    validation_data=test_dataset,  # Your validation dataset
    batch_size=32,  # Batch size for training
    callbacks=[checkpoint_callback]  # Include the checkpoint callback
)

Epoch 1/500


2024-11-19 19:00:13.709359: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-11-19 19:00:21.442508: I external/local_xla/xla/service/service.cc:168] XLA service 0x7ff55a85bbe0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-11-19 19:00:21.442533: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 4090, Compute Capability 8.9
2024-11-19 19:00:21.442538: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (1): NVIDIA GeForce RTX 4090, Compute Capability 8.9
2024-11-19 19:00:21.445814: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1732014021.515121  135369 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 1: saving model to checkpoint_epoch_01.h5
Epoch 2/500
Epoch 2: saving model to checkpoint_epoch_02.h5
Epoch 3/500
Epoch 3: saving model to checkpoint_epoch_03.h5
Epoch 4/500
Epoch 4: saving model to checkpoint_epoch_04.h5
Epoch 5/500
Epoch 5: saving model to checkpoint_epoch_05.h5
Epoch 6/500
Epoch 6: saving model to checkpoint_epoch_06.h5
Epoch 7/500
Epoch 7: saving model to checkpoint_epoch_07.h5
Epoch 8/500
Epoch 8: saving model to checkpoint_epoch_08.h5
Epoch 9/500
Epoch 9: saving model to checkpoint_epoch_09.h5
Epoch 10/500
Epoch 10: saving model to checkpoint_epoch_10.h5
Epoch 11/500
Epoch 11: saving model to checkpoint_epoch_11.h5
Epoch 12/500
Epoch 12: saving model to checkpoint_epoch_12.h5
Epoch 13/500
Epoch 13: saving model to checkpoint_epoch_13.h5
Epoch 14/500
Epoch 14: saving model to checkpoint_epoch_14.h5
Epoch 15/500
Epoch 15: saving model to checkpoint_epoch_15.h5
Epoch 16/500
Epoch 16: saving model to checkpoint_epoch_16.h5
Epoch 17/500
Epoch 17: saving model to

KeyboardInterrupt: 