In [18]:
import tensorflow as tf
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.applications.densenet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
import os
import pandas as pd
from collections import Counter
from sklearn.model_selection import train_test_split

In [19]:
Dataset = '/kaggle/input/pins-face-recognition/105_classes_pins_dataset'

# Load all image paths and labels
all_image_paths = []
all_labels = []
for root, _, files in os.walk(Dataset):
    for file in files:
        if file.endswith(('.jpg', '.jpeg', '.png')):
            label = os.path.basename(root)
            all_image_paths.append(os.path.join(root, file))
            all_labels.append(label)

# Count the number of images per label
label_counts = Counter(all_labels)

In [20]:
# Simple random split without stratification
train_paths, test_paths, train_labels, test_labels = train_test_split(
    all_image_paths, all_labels, test_size=0.2, random_state=42
)
print(f"Number of Classes : {len(label_counts)}")
print(f"Number of training samples: {len(train_paths)}")
print(f"Number of testing samples: {len(test_paths)}")

# Define image data generators
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,  # Preprocess for DenseNet
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input  # Preprocess for DenseNet
)

Number of Classes : 105
Number of training samples: 14027
Number of testing samples: 3507


In [21]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'filename': train_paths, 'class': train_labels}),
    x_col='filename',
    y_col='class',
    target_size=(224, 224),  # DenseNet input size
    batch_size=64,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'filename': test_paths, 'class': test_labels}),
    x_col='filename',
    y_col='class',
    target_size=(224, 224),  # DenseNet input size
    batch_size=64,
    class_mode='categorical'
)

Found 14027 validated image filenames belonging to 105 classes.
Found 3507 validated image filenames belonging to 105 classes.


In [22]:
base_model = DenseNet201(
    weights='imagenet', include_top=False, input_shape=(224, 224, 3)
)
print(f"Number of layers in the model: {len(base_model.layers)}")

Number of layers in the model: 707


In [23]:
strategy = tf.distribute.MirroredStrategy()
print(f"Number of devices: {strategy.num_replicas_in_sync}")

# Open a strategy scope
with strategy.scope():
    # Load DenseNet201 model without top layers
    base_model = DenseNet201(
        weights='imagenet', include_top=False, input_shape=(224, 224, 3)
    )
    
    # Freeze the base model
    base_model.trainable = False
    
    # Add custom layers on top of DenseNet
    model = tf.keras.Sequential([
        tf.keras.Input(shape=(224, 224, 3)),  # Input layer
        base_model,  # DenseNet base model
        layers.GlobalAveragePooling2D(),  # Global pooling layer
        layers.Dense(512, activation='relu'),  # Custom dense layer
        layers.Dropout(0.5),  # Dropout to reduce overfitting
        layers.Dense(len(train_generator.class_indices), activation='softmax')  # Output layer
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

model.summary()
print(f"Number of layers in the model: {len(model.layers)}")

Number of devices: 1


Number of layers in the model: 5


In [None]:
with strategy.scope():
    history = model.fit(
        train_generator,
        validation_data=test_generator,
        epochs=5
    )
model.summary()

Epoch 1/5


  self._warn_if_super_not_called()


[1m 39/220[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m2:19[0m 771ms/step - accuracy: 0.0128 - loss: 4.8709

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
with strategy.scope():
    base_model.trainable = True

    for layer in base_model.layers[:100]:
        layer.trainable = False
    
    # Recompile the model after unfreezing layers (important step)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )


    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    history = model.fit(
        train_generator,
        validation_data=test_generator,
        epochs=27, # Max number of epochs
        callbacks=[early_stopping] # Include the callback here
    )
model.summary()

In [None]:
import matplotlib.pyplot as plt
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

# Create the plot
plt.figure(figsize=(10, 6))
plt.plot(train_acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')

# Add labels and title
plt.title('Training and Validation Accuracy Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Show the plot
plt.show()

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_generator)

# Print the evaluation results
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

In [None]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np
img_path = '/kaggle/input/pins-face-recognition/105_classes_pins_dataset/pins_amber heard/amber heard219_327.jpg'

img = load_img(img_path, target_size=(224, 224))  
img_array = img_to_array(img) / 255.0  
img_array = np.expand_dims(img_array, axis=0)  

# Get the mapping of class indices to class names
class_indices = train_generator.class_indices  # Assuming the train_generator was used in training
class_names = {v: k for k, v in class_indices.items()}  # Reverse the dictionary

# Predict the class
predictions = model.predict(img_array)
predicted_class_index = np.argmax(predictions, axis=1)[0]
predicted_class_name = class_names[predicted_class_index]


# Print the predicted class number and name
print(f"Predicted Class Index: {predicted_class_index}")
print(f"Predicted Class Name: {predicted_class_name}")

# Display the image with the predicted class name
plt.imshow(img)
plt.title(f"Predicted: {predicted_class_name}")
plt.axis('off')
plt.show()

In [None]:
import numpy as np
import seaborn as sns
from sklearn.metrics import confusion_matrix, roc_curve, auc
from sklearn.preprocessing import label_binarize

# Generate predictions for the test dataset
y_pred_probs = model.predict(test_generator)
y_pred_classes = np.argmax(y_pred_probs, axis=1)  # Convert probabilities to class indices
y_true_classes = test_generator.classes            # True labels

# Confusion Matrix
conf_matrix = confusion_matrix(y_true_classes, y_pred_classes)

# Plot Confusion Matrix
plt.figure(figsize=(12, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues",
            xticklabels=test_generator.class_indices.keys(),
            yticklabels=test_generator.class_indices.keys())
plt.title("Confusion Matrix")
plt.xlabel("Predicted Class")
plt.ylabel("True Class")
plt.show()

# ROC Curve
# One-hot encode the true labels
y_true_binarized = label_binarize(y_true_classes, classes=list(range(len(test_generator.class_indices))))
n_classes = y_true_binarized.shape[1]

# Calculate ROC curve and AUC for each class
fpr = {}
tpr = {}
roc_auc = {}

for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_true_binarized[:, i], y_pred_probs[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve for each class
plt.figure(figsize=(10, 8))
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], label=f"Class {i} (AUC = {roc_auc[i]:.2f})")

plt.plot([0, 1], [0, 1], 'k--', label="Random Guess")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve for Multi-Class Classification")
plt.legend(loc="lower right")
plt.show()
