In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input  # Correct import for MobileNetV2
import os

# Define path to your LFW dataset
lfw_path = r"\lfw_subset"  # Replace with your actual path

# Create ImageDataGenerator for LFW with data augmentation for training
lfw_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,  # Use MobileNetV2's preprocess_input
    validation_split=0.3,  # Reduced validation split to 30%
    rotation_range=20,  # Randomly rotate images in the range (degrees)
    width_shift_range=0.2,  # Randomly shift images horizontally
    height_shift_range=0.2,  # Randomly shift images vertically
    shear_range=0.2,  # Shear intensity
    zoom_range=0.2,  # Randomly zoom into images
    horizontal_flip=True,  # Randomly flip images
    fill_mode='nearest'  # Fill pixels that are created after rotation or shifting
)

# Get all class names and limit to first 250 classes
all_class_names = os.listdir(lfw_path)
limited_class_names = all_class_names[:250]  # Adjust this based on your needs

# Load training data for limited classes
lfw_train_generator = lfw_datagen.flow_from_directory(
    lfw_path,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    subset='training',  # Specify that this is the training subset
    classes=limited_class_names  # Specify limited classes here
)

# Load validation data without augmentation
lfw_validation_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,  # Use MobileNetV2's preprocess_input
    validation_split=0.3  # Ensure this matches the training split for consistency
)

lfw_validation_generator = lfw_validation_datagen.flow_from_directory(
    lfw_path,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    subset='validation',  # Specify that this is the validation subset
    classes=limited_class_names  # Specify limited classes here
)

# Get number of classes from the training generator
num_classes = len(lfw_train_generator.class_indices)

# Save class indices and names to a text file
class_indices = lfw_train_generator.class_indices

# Define the output file path for class indices
output_file_path = r"lfw_subset_class_indices.txt"  # Replace with your desired output path

with open(output_file_path, 'w') as f:
    for class_name, class_index in class_indices.items():
        f.write(f"{class_index}: {class_name}\n")

print(f"Class indices saved to {output_file_path}")


: 

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input  # Use MobileNetV2's preprocess_input
import os

# Define path to your QMUL dataset
qmul_path = r"qmul_subset"  # Replace with your actual path

# Get all class names (subdirectories) in the QMUL dataset directory
all_class_names = os.listdir(qmul_path)

# Limit to the first 250 classes (or adjust based on your needs)
limited_class_names = all_class_names[:250]

# Create ImageDataGenerator for QMUL with data augmentation for training
qmul_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,  # Use MobileNetV2's preprocess_input
    validation_split=0.3,  # Adjusted validation split to 30%
    rotation_range=20,  # Randomly rotate images in the range (degrees)
    width_shift_range=0.2,  # Randomly shift images horizontally
    height_shift_range=0.2,  # Randomly shift images vertically
    shear_range=0.2,  # Shear intensity
    zoom_range=0.2,  # Randomly zoom into images
    horizontal_flip=True,  # Randomly flip images
    fill_mode='nearest'  # Fill pixels that are created after rotation or shifting
)

# Load training data for limited classes
qmul_train_generator = qmul_datagen.flow_from_directory(
    qmul_path,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    subset='training',  # Specify that this is the training subset
    classes=limited_class_names  # Specify limited classes here
)

# Load validation data without augmentation for limited classes
qmul_validation_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,  # Use MobileNetV2's preprocess_input
    validation_split=0.3  # Ensure this matches the training split for consistency
)

qmul_validation_generator = qmul_validation_datagen.flow_from_directory(
    qmul_path,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    subset='validation',  # Specify that this is the validation subset
    classes=limited_class_names  # Specify limited classes here
)

# Get number of classes from the training generator
num_classes = len(qmul_train_generator.class_indices)

# Save class indices and names to a text file
class_indices = qmul_train_generator.class_indices

# Define the output file path for class indices
output_file_path = r"qmul_subset_class_indices.txt"  # Replace with your desired output path

with open(output_file_path, 'w') as f:
    for class_name, class_index in class_indices.items():
        f.write(f"{class_index}: {class_name}\n")

print(f"Class indices saved to {output_file_path}")


Found 11311 images belonging to 250 classes.
Found 4701 images belonging to 250 classes.
Class indices saved to qmul_subset_class_indices.txt


In [3]:
import tensorflow as tf
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, BatchNormalization,Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.mixed_precision import set_global_policy

# Set mixed precision policy
set_global_policy('mixed_float16')

# Define the base model (MobileNetV2)
base_model = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze all layers except the last few layers
for layer in base_model.layers[:-10]:
    layer.trainable = False

# Create the model architecture
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Reduce dimensions to 1280

# Use dropout for regularization
x = Dropout(0.3)(x)  # Regularization
x = BatchNormalization()(x)  # Normalize activations
x = Dropout(0.325)(x)  # Regularization
x = Dense(250, activation='softmax')(x)  # Add a classification layer for 250 classes

# Create the final model for feature extraction
model = Model(inputs=base_model.input, outputs=x)

# Compile the model (optional for feature extraction)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])  # Loss and metrics are optional

# Callbacks for training
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6)

print("Model ready to train")


Model ready to train


In [4]:
import os
import tensorflow as tf
import matplotlib.pyplot as plt  
import numpy as np
from tensorflow.keras.callbacks import Callback, TensorBoard, EarlyStopping, ReduceLROnPlateau

# Function to train the model on a given dataset
def train_on_dataset(model, train_generator, validation_generator, dataset_name):
    # Set up TensorBoard logging directory for the current dataset
    log_dir = os.path.join("logs", "fit", f"{dataset_name}_model")
    tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

    # Train the model on the current dataset
    print(f"Training on {dataset_name} dataset...")
    history = model.fit(
        train_generator,
        validation_data=validation_generator,
        epochs=1,
        callbacks=[tensorboard_callback, early_stopping, lr_scheduler]
    )

    # Print final validation metrics
    val_loss = history.history['val_loss'][-1]
    print(f"Final Validation Loss for {dataset_name}: {val_loss}")

# List of datasets and their corresponding generators (ensure these are defined)
datasets = [
    {"name": "lfw", "train_generator": lfw_train_generator, "validation_generator": lfw_validation_generator},
    {"name": "qmul", "train_generator": qmul_train_generator, "validation_generator": qmul_validation_generator}
]

# Initialize callbacks for training
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6)

# Sequentially train on each dataset
for dataset in datasets:
    train_on_dataset(model, dataset["train_generator"], dataset["validation_generator"], dataset["name"])

# Save the final model after training on all datasets
final_model_path = "final_1_model.keras"
model.save(final_model_path)
print(f"Final model saved at: {final_model_path}")


Training on lfw dataset...


  self._warn_if_super_not_called()


[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14s/step - accuracy: 0.0129 - loss: 6.4547 

  self._warn_if_super_not_called()


[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2816s[0m 16s/step - accuracy: 0.0130 - loss: 6.4532 - val_accuracy: 0.1945 - val_loss: 4.8334 - learning_rate: 1.0000e-04
Final Validation Loss for lfw: 4.833428382873535
Training on qmul dataset...
[1m707/707[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9760s[0m 14s/step - accuracy: 0.0462 - loss: 5.4972 - val_accuracy: 0.0277 - val_loss: 10.9645 - learning_rate: 1.0000e-04
Final Validation Loss for qmul: 10.96452808380127
Final model saved at: final_1_model.keras
