In [1]:
from keras.models import Model
import tensorflow as tf
import os
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import TimeDistributed, Conv2D, MaxPooling2D, Flatten, LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, LambdaCallback, LearningRateScheduler
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import Sequence
from tensorflow.keras.regularizers import l2
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
import time
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.layers import Input, TimeDistributed, LSTM, Dense, Dropout, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.applications.mobilenet import preprocess_input

In [2]:
def unfreeze_and_fine_tune(model, num_layers_to_unfreeze):
    """
    Unfreeze the last n layers of the MobileNet base model for fine-tuning.
    
    Args:
        model: The loaded Keras model containing MobileNet
        num_layers_to_unfreeze: Number of layers to unfreeze from the end
    
    Returns:
        Modified model ready for fine-tuning
    """
    # First, we need to find the MobileNet layers within the TimeDistributed layer
    time_distributed_layers = []
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.TimeDistributed):
            # Check if the wrapped layer is part of MobileNet
            if hasattr(layer.layer, 'name') and 'mobilenet' in layer.layer.name.lower():
                time_distributed_layers.append(layer)
    
    if not time_distributed_layers:
        raise ValueError("Could not find TimeDistributed layers containing MobileNet")
    
    # Print the model structure to understand what we're working with
    print("Model Layer Structure:")
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.TimeDistributed):
            print(f"TimeDistributed wrapping: {layer.layer.__class__.__name__}")
            if hasattr(layer.layer, 'layers'):
                print("Sublayers:")
                for sublayer in layer.layer.layers:
                    print(f"  - {sublayer.name}: {sublayer.__class__.__name__}")
    
    # Start by freezing all layers
    for layer in model.layers:
        layer.trainable = False
        if isinstance(layer, tf.keras.layers.TimeDistributed):
            if hasattr(layer.layer, 'trainable'):
                layer.layer.trainable = False
    
    # Unfreeze the LSTM layers (they're usually after TimeDistributed layers)
    lstm_layers = [layer for layer in model.layers if isinstance(layer, tf.keras.layers.LSTM)]
    for layer in lstm_layers:
        layer.trainable = True
        print(f"Unfroze LSTM layer: {layer.name}")
    
    # Unfreeze the dense layers at the end
    dense_layers = [layer for layer in model.layers if isinstance(layer, tf.keras.layers.Dense)]
    for layer in dense_layers:
        layer.trainable = True
        print(f"Unfroze Dense layer: {layer.name}")
    
    # For the TimeDistributed MobileNet layers, we'll selectively unfreeze the last few layers
    for td_layer in time_distributed_layers:
        if hasattr(td_layer.layer, 'layers'):
            mobilenet_layers = td_layer.layer.layers[-num_layers_to_unfreeze:]
            for layer in mobilenet_layers:
                if hasattr(layer, 'trainable'):
                    layer.trainable = True
                    print(f"Unfroze MobileNet layer: {layer.name}")
    
    # Recompile the model with a lower learning rate for fine-tuning
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

class DataGenerator(Sequence):
    def __init__(self, X_path, y_path, indices, batch_size):
        self.X = np.memmap(X_path, dtype='float32', mode='r', shape=(1000, 15, 224, 224, 3))
        self.y = np.memmap(y_path, dtype='int32', mode='r', shape=(1000,))
        self.indices = indices
        self.batch_size = batch_size

    def __len__(self):
        return len(self.indices) // self.batch_size

    def __getitem__(self, idx):
        batch_indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        X_batch = self.X[batch_indices]
        X_batch = preprocess_input(X_batch)  
        y_batch = np.eye(2)[self.y[batch_indices]]  
        return X_batch, y_batch

class StepTimerCallback(Callback):
    def on_epoch_begin(self, epoch, logs=None):
        print(f"\n--- Starting Epoch {epoch + 1} ---")
        self.epoch_start_time = time.time()

    def on_epoch_end(self, epoch, logs=None):
        epoch_time = time.time() - self.epoch_start_time
        print(f"--- Epoch {epoch + 1} completed in {epoch_time:.2f} seconds ---\n")

    def on_train_batch_begin(self, batch, logs=None):
        self.step_start_time = time.time()
        print(f"Step {batch + 1}/{self.params['steps']} - ", end="")

    def on_train_batch_end(self, batch, logs=None):
        step_time = time.time() - self.step_start_time
        print(f"Loss: {logs['loss']:.4f}, Accuracy: {logs['accuracy']:.4f}, Time: {step_time:.2f} seconds")

class BatchEarlyStopping(Callback):
    def __init__(self, monitor='loss', threshold=0.1, patience=5):
        super().__init__()
        self.monitor = monitor
        self.threshold = threshold
        self.patience = patience
        self.wait = 0

    def on_train_batch_end(self, batch, logs=None):
        current_value = logs.get(self.monitor)
        if current_value is not None and current_value < self.threshold:
            self.wait += 1
            if self.wait >= self.patience:
                print(f"\nEarly stopping triggered at batch {batch + 1}: {self.monitor} = {current_value:.4f}")
                self.model.stop_training = True
        else:
            self.wait = 0

batch_early_stopping_callback = BatchEarlyStopping(monitor='loss', threshold=0.1, patience=2)

checkpoint_dir = './Finetune_Checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

checkpoint_path = os.path.join(checkpoint_dir, 'model_epoch_{epoch:02d}_val_loss_{val_loss:.2f}.keras')
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='val_loss',
    save_best_only=False,
    save_weights_only=False,
    verbose=1
)

def scheduler(epoch, lr):
    if epoch < 2:
        return lr
    return lr * 0.9

lr_callback = LearningRateScheduler(scheduler)

# Load your existing model
latest_checkpoint = r'E:\PosePerfect\Model\Further_improvements_Checkpoints\model_epoch_05_val_loss_0.37.keras'
cnn_lstm_model = load_model(latest_checkpoint)

indices = np.arange(1000)
train_indices, val_indices = train_test_split(indices, test_size=0.2, random_state=42)

train_gen = DataGenerator(r'E:\PosePerfect\Dataset Creation\X_final_15k.dat', r'E:\PosePerfect\Dataset Creation\y_final_15k.dat', train_indices, batch_size=10)
val_gen = DataGenerator(r'E:\PosePerfect\Dataset Creation\X_final_15k.dat', r'E:\PosePerfect\Dataset Creation\y_final_15k.dat', val_indices, batch_size=10)

# Unfreeze the last 10 layers of MobileNet
cnn_lstm_model = unfreeze_and_fine_tune(cnn_lstm_model, num_layers_to_unfreeze=20)

# Print the trainable status of all layers
print("\nTrainable status after unfreezing:")
for layer in cnn_lstm_model.layers:
    print(f"{layer.name}: {layer.trainable}")
        
cnn_lstm_model.summary()

# Continue training with the same data generators and callbacks
cnn_lstm_model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10,
    steps_per_epoch=len(train_gen),
    validation_steps=len(val_gen),
    callbacks=[checkpoint_callback, lr_callback, StepTimerCallback(), batch_early_stopping_callback],
    verbose=1
)

Model Layer Structure:
TimeDistributed wrapping: Functional
Sublayers:
  - input_layer_2: InputLayer
  - conv1: Conv2D
  - conv1_bn: BatchNormalization
  - conv1_relu: ReLU
  - conv_dw_1: DepthwiseConv2D
  - conv_dw_1_bn: BatchNormalization
  - conv_dw_1_relu: ReLU
  - conv_pw_1: Conv2D
  - conv_pw_1_bn: BatchNormalization
  - conv_pw_1_relu: ReLU
  - conv_pad_2: ZeroPadding2D
  - conv_dw_2: DepthwiseConv2D
  - conv_dw_2_bn: BatchNormalization
  - conv_dw_2_relu: ReLU
  - conv_pw_2: Conv2D
  - conv_pw_2_bn: BatchNormalization
  - conv_pw_2_relu: ReLU
  - conv_dw_3: DepthwiseConv2D
  - conv_dw_3_bn: BatchNormalization
  - conv_dw_3_relu: ReLU
  - conv_pw_3: Conv2D
  - conv_pw_3_bn: BatchNormalization
  - conv_pw_3_relu: ReLU
  - conv_pad_4: ZeroPadding2D
  - conv_dw_4: DepthwiseConv2D
  - conv_dw_4_bn: BatchNormalization
  - conv_dw_4_relu: ReLU
  - conv_pw_4: Conv2D
  - conv_pw_4_bn: BatchNormalization
  - conv_pw_4_relu: ReLU
  - conv_dw_5: DepthwiseConv2D
  - conv_dw_5_bn: BatchNorma

  self._warn_if_super_not_called()



--- Starting Epoch 1 ---
Epoch 1/10
Step 1/80 - Loss: 0.7564, Accuracy: 0.3000, Time: 105.32 seconds
[1m 1/80[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:21:41[0m 108s/step - accuracy: 0.3000 - loss: 0.7564Step 2/80 - Loss: 0.7547, Accuracy: 0.4000, Time: 3.93 seconds
[1m 2/80[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5:12[0m 4s/step - accuracy: 0.3500 - loss: 0.7556     Step 3/80 - Loss: 0.7723, Accuracy: 0.4000, Time: 3.76 seconds
[1m 3/80[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4:59[0m 4s/step - accuracy: 0.3667 - loss: 0.7611Step 4/80 - Loss: 0.7177, Accuracy: 0.4500, Time: 3.70 seconds
[1m 4/80[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m4:50[0m 4s/step - accuracy: 0.3875 - loss: 0.7503Step 5/80 - Loss: 0.7063, Accuracy: 0.4800, Time: 3.65 seconds
[1m 5/80[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m4:43[0m 4s/step - accuracy: 0.4060 - loss: 0.7415Step 6/80 - Loss: 0.6983, Accuracy: 0.5000, Time: 3.63 seconds
[1m 6/80[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m4:37[0m

<keras.src.callbacks.history.History at 0x25d5ad74290>