In [None]:
import os
import cv2
import numpy as np
from mtcnn import MTCNN
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import os


In [None]:


# Paths to the directories
original_dir = r"D:\dataFaces\archive\105_classes_pins_dataset"
cropped_dir = r"D:\dataFaces\cropped"

# Initialize the face detector
detector = MTCNN()

# Prepare training data
X = []
X = []
y_bbox = []
y_class_labels = []

# Assign a numeric label to each celebrity folder
celebrity_labels = {folder: idx for idx, folder in enumerate(os.listdir(original_dir))}
num_classes = len(celebrity_labels)  # Total number of classes

# Loop through each celebrity folder
for celebrity, label in celebrity_labels.items():
    original_celebrity_folder = os.path.join(original_dir, celebrity)
    cropped_celebrity_folder = os.path.join(cropped_dir, celebrity)

    # Process images in the original folder for bounding boxes
    for img_file in os.listdir(original_celebrity_folder):
        img_path = os.path.join(original_celebrity_folder, img_file)
        img = cv2.imread(img_path)
        if img is None:
            continue
        
        # Detect faces in the original image
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        faces = detector.detect_faces(img_rgb)

        # If a face is detected, use the first face for simplicity
        if faces:
            # Bounding box for the detected face
            x, y, w, h = faces[0]['box']
            y_bbox.append([x, y, w, h])

            # Resize the image to 224x224 for model input
            img_resized = cv2.resize(img, (224, 224))
            X.append(img_resized)
            y_class_labels.append(label)

# Convert lists to arrays
X = np.array(X, dtype="float32") / 255.0  # Normalize images
y_bbox = np.array(y_bbox, dtype="float32")
y_class_labels = to_categorical(y_class_labels, num_classes=num_classes)

print("Total images:", X.shape[0])
print("Bounding boxes shape:", y_bbox.shape)
print("Class labels shape:", y_class_labels.shape)

# Split data into training and testing sets
X_trainn, X_testt, y_bbox_trainn, y_bbox_testt, y_class_labels_trainn, y_class_labels_testt = train_test_split(
    X, y_bbox, y_class_labels, test_size=0.2, random_state=42)

# Save datasets
np.save('X_trainn.npy', X_trainn)
np.save('y_bbox_trainn.npy', y_bbox_trainn)
np.save('y_class_labels_trainn.npy', y_class_labels_trainn)

np.save('X_testt.npy', X_testt)
np.save('y_bbox_testt.npy', y_bbox_testt)
np.save('y_class_labels_testt.npy', y_class_labels_testt)


Total images: 17423
Bounding boxes shape: (17423, 4)
Class labels shape: (17423, 105)


In [2]:
import numpy as np

# Load the datasets
X_trainn = np.load('X_trainn.npy')
y_bbox_trainn = np.load('y_bbox_trainn.npy')
y_class_labels_trainn = np.load('y_class_labels_trainn.npy')

X_testt = np.load('X_testt.npy')
y_bbox_testt = np.load('y_bbox_testt.npy')
y_class_labels_testt = np.load('y_class_labels_testt.npy')

# Optionally, print the shapes of the loaded datasets to verify
print(f'X_trainn shape: {X_trainn.shape}')
print(f'y_bbox_trainn shape: {y_bbox_trainn.shape}')
print(f'y_class_labels_trainn shape: {y_class_labels_trainn.shape}')

print(f'X_testt shape: {X_testt.shape}')
print(f'y_bbox_testt shape: {y_bbox_testt.shape}')
print(f'y_class_labels_testt shape: {y_class_labels_testt.shape}')

X_trainn shape: (13938, 224, 224, 3)
y_bbox_trainn shape: (13938, 4)
y_class_labels_trainn shape: (13938, 105)
X_testt shape: (3485, 224, 224, 3)
y_bbox_testt shape: (3485, 4)
y_class_labels_testt shape: (3485, 105)


In [3]:
print("X_train shape:", X_trainn.shape)
print("y_bbox_train shape:", y_bbox_trainn.shape)
print("y_class_labels_train shape:", y_class_labels_trainn.shape)

print("X_test shape:", X_testt.shape)
print("y_bbox_test shape:", y_bbox_testt.shape)
print("y_class_labels_test shape:", y_class_labels_testt.shape)


X_train shape: (13938, 224, 224, 3)
y_bbox_train shape: (13938, 4)
y_class_labels_train shape: (13938, 105)
X_test shape: (3485, 224, 224, 3)
y_bbox_test shape: (3485, 4)
y_class_labels_test shape: (3485, 105)


In [None]:
from tensorflow.keras import layers
# Build the multi-input, multi-output model
def build_model(input_shape=(224, 224, 3), num_classes=105):
    # Input layer
    input_image = Input(shape=input_shape)

    # Pre-trained InceptionV3 model (without the top layers)
    base_model = InceptionV3(weights="imagenet", include_top=False, input_shape=input_shape)
    base_model.trainable = False  # Freeze base model layers

    # Pass the input through the base model
        # Pass the input through the base model
    x = base_model(input_image)

    x = layers.Flatten()(x)
    # x = layers.Dense(1024, activation='relu')(x)
    # x = layers.Dropout(0.2)(x)
    # x = layers.Dense(512, activation='elu')(x)
    # x = layers.Dropout(0.1)(x)

    # Output for face detection (bounding box)
    bbox_output = Dense(4, name="bbox")(x)  # 4 values for [x, y, w, h]

    # Output for celebrity recognition (class label)
    class_output = Dense(num_classes, activation="softmax", name="class")(x)

    # Create the model
    model = Model(inputs=input_image, outputs=[bbox_output, class_output])

    # Compile the model
    model.compile(optimizer="adam", 
                  loss={"bbox": "mean_squared_error", "class": "categorical_crossentropy"},
                  metrics={"bbox": "mae", "class": "accuracy"})
    
    return model

# Build the model
model = build_model(input_shape=(224, 224, 3), num_classes=105)
#model.summary()


In [8]:
early_stop = EarlyStopping(
    monitor="val_loss",  # Metric to monitor
    patience=5,          # Number of epochs with no improvement to wait
    restore_best_weights=True  # Restore the model weights from the epoch with the best value of the monitored metric
)

In [None]:

# Train the model
model.fit(X_trainn, {"bbox": y_bbox_trainn, "class": y_class_labels_trainn}, epochs=30, batch_size=128, callbacks=[early_stop])


Epoch 1/30
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4979s[0m 43s/step - bbox_loss: 5174.5107 - bbox_mae: 43.2692 - class_accuracy: 0.1051 - class_loss: 15.1064 - loss: 5189.6426
Epoch 2/30
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m787s[0m 6s/step - bbox_loss: 1711.4999 - bbox_mae: 23.7227 - class_accuracy: 0.6136 - class_loss: 2.1038 - loss: 1713.6016
Epoch 3/30
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m425s[0m 4s/step - bbox_loss: 1366.5826 - bbox_mae: 21.3363 - class_accuracy: 0.7974 - class_loss: 0.9241 - loss: 1367.5098
Epoch 4/30
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m438s[0m 4s/step - bbox_loss: 1247.6913 - bbox_mae: 20.4635 - class_accuracy: 0.8848 - class_loss: 0.4671 - loss: 1248.1641
Epoch 5/30
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m445s[0m 4s/step - bbox_loss: 1212.2843 - bbox_mae: 19.9752 - class_accuracy: 0.9033 - class_loss: 0.4159 - loss: 1212.7043
Epoch 6/30
[1m109/109[0m 

<keras.src.callbacks.history.History at 0x1c5641f7820>

In [None]:

# Evaluate the model on the test set
test_loss, test_bbox_loss, test_class_loss, test_bbox_mae, test_class_accuracy = model.evaluate(
    X_testt, {"bbox": y_bbox_testt, "class": y_class_labels_testt})

print(f"Test Loss: {test_loss}")
print(f"Test Bounding Box Loss: {test_bbox_loss}")
print(f"Test Class Loss: {test_class_loss}")
print(f"Test Bounding Box MAE: {test_bbox_mae}")
print(f"Test Class Accuracy: {test_class_accuracy}")


[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 999ms/step - bbox_loss: 997.4100 - bbox_mae: 19.7283 - class_accuracy: 0.3531 - class_loss: 18.7342 - loss: 1016.1475
Test Loss: 1001.5535888671875
Test Bounding Box Loss: 983.0631713867188
Test Class Loss: 18.315183639526367
Test Bounding Box MAE: 19.534217834472656
Test Class Accuracy: 0.37015780806541443


In [None]:
# Save the model weights
model.save_weights(r"D:\assigment5Deeplearning\MIMO_one.weights.h5")


In [None]:
import cv2
import numpy as np
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.models import load_model

# Load your pre-trained model
# model = build_model(input_shape=(224, 224, 3), num_classes=105)
# model.load_weights('MIMO_one_Weights.h5')  # Adjust the path to your trained weights

# Initialize webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess the frame for the model
    input_frame = cv2.resize(frame, (224, 224))
    input_frame = preprocess_input(input_frame)
    input_frame = np.expand_dims(input_frame, axis=0)

    # Make predictions
    bbox_pred, class_pred = model.predict(input_frame)

    # Extract bounding box and class predictions
    x, y, w, h = bbox_pred[0]  # bounding box coordinates
    class_id = np.argmax(class_pred[0])  # Get class ID with highest probability
    class_confidence = class_pred[0][class_id]  # Probability of the predicted class

    # Convert bounding box to original frame scale
    height, width, _ = frame.shape
    x, y, w, h = int(x * width), int(y * height), int(w * width), int(h * height)

    # Draw bounding box and label on the frame
    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
    label = f"Class: {class_id}, Confidence: {class_confidence:.2f}"
    cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

    # Display the frame
    cv2.imshow("Live Face Detection and Classification", frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
