In [None]:

import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import Dense, Flatten, Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
import seaborn as sns
import kagglehub


In [None]:
# Download the latest dataset version
path = kagglehub.dataset_download("jessicali9530/lfw-dataset")
print("Path to dataset files:", path)

In [None]:
# Define the folder containing images
image_folder = os.path.join(path, "lfw-deepfunneled")  # Update if folder name differs

In [None]:
# List all images in the dataset folder
image_files = []
for root, dirs, files in os.walk(image_folder):
    for file in files:
        if file.endswith(('.jpg', '.jpeg', '.png')):  # Check for image files
            image_files.append(os.path.join(root, file))

print(f"Found {len(image_files)} image files.")

# Display the first ten images
for i, img_path in enumerate(image_files[:10]):
    img = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(2, 2))
    plt.imshow(img_rgb)
    plt.title(f"Image {i+1}")
    plt.axis('off')
plt.show()

In [None]:
# Preprocessing with Data Augmentation
def preprocess_images(image_paths, target_size=(224, 224)):
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

    images = []
    labels = []
    for img_path in image_paths:
        img = cv2.imread(img_path)
        img = cv2.resize(img, target_size)
        img = img / 255.0
        images.append(img)
        label = img_path.split('/')[-2]
        labels.append(label)

    return np.array(images), np.array(labels)

In [None]:
# Preprocess Images
images, labels = preprocess_images(image_files[:6000])
print("Processed images shape:", images.shape)
print("Unique labels:", set(labels))

In [None]:
# Label Encoding
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
categorical_labels = to_categorical(encoded_labels)
print("Encoded labels shape:", categorical_labels.shape)

In [None]:
# Splitting Dataset
X_train, X_val, y_train, y_val = train_test_split(images, categorical_labels, test_size=0.2, random_state=42)
print("Training set size:", X_train.shape)
print("Validation set size:", X_val.shape)

In [None]:
# Model Fine-Tuning
def create_densenet_model(input_shape=(224, 224, 3), num_classes=len(set(labels))):
    base_model = DenseNet121(weights='imagenet', include_top=False, input_tensor=Input(shape=input_shape))
    for layer in base_model.layers[-40:]:
        layer.trainable = True

    x = Flatten()(base_model.output)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)  # Adding dropout to prevent overfitting
    output = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=output)
    return model

model = create_densenet_model()
model.summary()

In [None]:
# Compile the Model
model.compile(
    optimizer=Adam(learning_rate=0.0001),  # Lower initial learning rate
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
# Training with Callbacks
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=40,  # Increase epochs with early stopping
    batch_size=16,  # Adjust as per memory availability
)

In [None]:
# Evaluation Metrics Section
def plot_training_history(history):
    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

plot_training_history(history)

In [None]:
def plot_binary_confusion_matrix(y_true, y_pred):
    """
    Plots a binary confusion matrix indicating correct vs incorrect predictions.
    """
    cm = confusion_matrix(y_true, y_pred)
    
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Incorrect', 'Correct'], 
                yticklabels=['Incorrect', 'Correct'])
    
    plt.xticks(rotation=0, fontsize=10)
    plt.yticks(rotation=0, fontsize=10)
    plt.xlabel('Predicted', fontsize=12)
    plt.ylabel('True', fontsize=12)
    plt.title('Binary Confusion Matrix', fontsize=14)
    plt.tight_layout()
    plt.show()

# Generate predictions
y_pred = model.predict(X_val)  # Your original prediction step
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_val, axis=1)

# Identify the indices of all correct predictions
correct_indices = np.where(y_pred_classes == y_true)[0]

# Print the first 50 correct predictions
print("First 50 Correct Predictions:")
for idx in correct_indices[:50]:  # Limit to the first 50 correct predictions
    print(f"Index: {idx}, True Label: {y_true[idx]}, Predicted Label: {y_pred_classes[idx]}")

# Convert to binary classification (correct/incorrect)
y_binary_true = (y_pred_classes == y_true)  # True for correct predictions, False otherwise
y_binary_pred = y_binary_true  # Same as y_binary_true for binary confusion matrix

# Plot the binary confusion matrix
plot_binary_confusion_matrix(y_binary_true, y_binary_pred)

In [None]:
def classification_metrics(y_true, y_pred, label_encoder):
    # Find unique classes in y_true and y_pred
    unique_classes = np.unique(np.concatenate((y_true, y_pred)))
    
    # Map unique classes to their names using label_encoder
    target_names = label_encoder.classes_[unique_classes]
    
    # Generate the classification report
    report = classification_report(y_true, y_pred, labels=unique_classes, target_names=target_names)
    print("Classification Report:\n", report)

# Call the function
classification_metrics(y_true, y_pred_classes, label_encoder)


In [None]:
# ROC and AUC Section
def plot_roc_auc(y_true, y_pred):
    fpr, tpr, _ = roc_curve(y_true.ravel(), y_pred.ravel())
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='blue', label=f'ROC Curve (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='red', linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend()
    plt.show()

plot_roc_auc(y_val, y_pred)