In [5]:
import os
import numpy as np
import tensorflow as tf
from keras_vggface.vggface import VGGFace
from keras_vggface.utils import preprocess_input
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity


def load_dataset(data_dir, target_size=(224, 224)):
    images = []
    labels = []
    class_names = []
    for idx, person_name in enumerate(os.listdir(data_dir)):
        person_dir = os.path.join(data_dir, person_name)
        if not os.path.isdir(person_dir):
            continue
        class_names.append(person_name)
        for img_name in os.listdir(person_dir):
            img_path = os.path.join(person_dir, img_name)
            if not img_name.lower().endswith(('.png', '.jpg', '.jpeg')):  # Skip non-image files
                continue
            try:
                img = load_img(img_path, target_size=target_size)
                img_array = img_to_array(img)
                images.append(preprocess_input(img_array, version=2))
                labels.append(idx)
            except Exception as e:
                print(f"Error loading image {img_path}: {e}")

    return np.array(images), np.array(labels), class_names

# Path to the dataset
data_dir = r'C:\Users\Alpha\Desktop\Project in secure ML\dataset\lfw-deepfunneled\lfw-deepfunneled'

# Load and preprocess the dataset
images, labels, class_names = load_dataset(data_dir)
print(f"Loaded {len(images)} images from {len(class_names)} classes.")

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Convert labels to categorical
y_train = tf.keras.utils.to_categorical(y_train, num_classes=len(class_names))
y_test = tf.keras.utils.to_categorical(y_test, num_classes=len(class_names))

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
datagen.fit(X_train)

# Load the VGGFace model as a base model
base_model = VGGFace(model='resnet50', include_top=False, input_shape=(224, 224, 3), pooling='avg')

# Unfreeze the last few layers of the base model for fine-tuning
for layer in base_model.layers[-10:]:
    layer.trainable = True

# Define the complete model
model = Sequential([
    base_model,
    Dense(512, activation='relu'),  # Added an additional dense layer
    Dropout(0.5),
    Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),  # Added L2 regularization
    Dropout(0.5),
    Dense(len(class_names), activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Add a learning rate scheduler
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-6,
    verbose=1
)

# Train the model
model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    validation_data=(X_test, y_test),
    epochs=30,  # Increased number of epochs
    callbacks=[lr_scheduler]
)

# Evaluate the model
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)
y_test_labels = np.argmax(y_test, axis=1)

accuracy = accuracy_score(y_test_labels, y_pred_labels)
print(f'Classification Accuracy: {accuracy * 100:.2f}%')

# Extract embeddings for biometric identification
feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)
embeddings_test = feature_extractor.predict(X_test)
embeddings_train = feature_extractor.predict(X_train)

# Compute cosine similarity for biometric identification
scores = cosine_similarity(embeddings_test, embeddings_train)

# Identify closest matches
closest_match = np.argmax(scores, axis=1)
biometric_accuracy = np.mean(closest_match == np.argmax(y_test, axis=1))
print(f'Biometric Identification Accuracy: {biometric_accuracy * 100:.2f}%')


Loaded 13233 images from 5749 classes.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 11: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 16: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 19: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 22: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 25: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 28: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.
Epoch 29/30
Epoch 30/30
Classification Accuracy: 24.18%


InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.