#**Hybrid Neural Network Model for Face Recognition**

# **Introduction:**
This project implements a deep learning-based facial recognition system that combines
Convolutional Neural Networks (CNN) and Artificial Neural Networks (ANN) for accurate
face identification.

The CNN architecture is responsible for extracting deep facial features from images,
while the ANN model classifies these extracted features to recognize individual identities.
The system has been trained and tested using image datasets and further validated through
real-time video testing using OpenCV.

This hybrid approach enhances recognition accuracy, improves feature generalization,
and demonstrates efficient performance across both static and dynamic environments.

Importing Libraries

In [None]:
import os
import cv2
import csv
import glob
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
from ultralytics import YOLO

import face_recognition
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader


Loading and Exploring the Dataset

In [None]:

actors = [
    {"Name": "Angelina Jolie", "Gender": "female", "Nationality": "American", "Age": "53"},
    {"Name": "Brad Pitt", "Gender": "male", "Nationality": "American", "Age": "45"},
    {"Name": "Denzel Washington", "Gender": "male", "Nationality": "American", "Age": "38"},
    {"Name": "Hugh Jackman", "Gender": "male", "Nationality": "Australian", "Age": "37"},
    {"Name": "Jennifer Lawrence", "Gender": "female", "Nationality": "American", "Age": "41"},
    {"Name": "Johnny Depp", "Gender": "male", "Nationality": "American", "Age": "36"},
    {"Name": "Kate Winslet", "Gender": "female", "Nationality": "British", "Age": "42"},
    {"Name": "Leonardo DiCaprio", "Gender": "male", "Nationality": "American", "Age": "34"},
    {"Name": "Megan Fox", "Gender": "female", "Nationality": "American", "Age": "35"},
    {"Name": "Natalie Portman", "Gender": "female", "Nationality": "Israeli-American", "Age": "30"},
    {"Name": "Nicole Kidman", "Gender": "female", "Nationality": "Australian", "Age": "40"},
    {"Name": "Robert Downey Jr", "Gender": "male", "Nationality": "American", "Age": "54"},
    {"Name": "Sandra Bullock", "Gender": "female", "Nationality": "American", "Age": "46"},
    {"Name": "Scarlett Johansson", "Gender": "female", "Nationality": "American", "Age": "36"},
    {"Name": "Tom Cruise", "Gender": "male", "Nationality": "American", "Age": "43"},
    {"Name": "Tom Hanks", "Gender": "male", "Nationality": "American", "Age": "39"},
    {"Name": "Will Smith", "Gender": "male", "Nationality": "American", "Age": "48"}
]


folder_path = "C:/Users/nandhudivya/Desktop/project/actors datset"
os.makedirs(folder_path, exist_ok=True)

filename = os.path.join(folder_path, "actors.csv")

with open(filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=["Name", "Gender", "Nationality", "Age"])
    writer.writeheader()
    for actor in actors:
        writer.writerow(actor)

print(f"CSV file saved successfully at: {filename}")


CSV file saved successfully at: C:/Users/nandhudivya/Desktop/project/actors datset\actors.csv


Data Preprocessing

In [None]:

DATASET_DIR = r"C:/Users/nandhudivya/Desktop/project/dataset"
SAVEDIR = "C:/Users/nandhudivya/Desktop/project/saved_models"
BATCH_SIZE = 32
EPOCHS = 10
LR = 1e-3
TEST_SIZE = 0.2
RANDOM_STATE = 42
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs(SAVEDIR, exist_ok=True)


#Extraction of face encodings for each image
def extract_encodings(dataset_dir):
    X = []
    y = []

    persons = sorted([d for d in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, d))])
    if not persons:
        raise RuntimeError(f"No person subfolders found in {dataset_dir}. Expect dataset/person_name/*.jpg")

    for person in persons:
        person_dir = os.path.join(dataset_dir, person)
        image_files = []

        for ext in ("*.jpg", "*.jpeg", "*.png"):
            image_files.extend(glob.glob(os.path.join(person_dir, ext)))
        if not image_files:
            print(f"Warning: no images found in {person_dir}")
            continue

        for img_path in image_files:

            try:
                img = face_recognition.load_image_file(img_path)
            except Exception as e:
                print(f"Could not load {img_path}: {e}")
                continue
            encs = face_recognition.face_encodings(img)
            if len(encs) == 0:
                print(f"⚠️  No face detected in {img_path}; skipping.")
                continue
            encoding = encs[0]
            X.append(encoding)
            y.append(person)

    X = np.array(X)
    y = np.array(y)
    print(f"Extracted {len(X)} encodings for {len(np.unique(y))} people.")
    return X, y

# Encoding Labels
def build_dataset(X, y, test_size=TEST_SIZE):
    le = LabelEncoder()
    y_enc = le.fit_transform(y)
    X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=test_size, random_state=RANDOM_STATE, stratify=y_enc)
    X_train_t = torch.tensor(X_train, dtype=torch.float32)
    y_train_t = torch.tensor(y_train, dtype=torch.long)
    X_test_t = torch.tensor(X_test, dtype=torch.float32)
    y_test_t = torch.tensor(y_test, dtype=torch.long)
    return (X_train_t, y_train_t), (X_test_t, y_test_t), le


#simple MLP classifier (ANN)
class FaceNetMLP(nn.Module):
    def __init__(self, input_dim=128, hidden_dims=(256,128), num_classes=2, dropout=0.3):
        super().__init__()
        layers = []
        prev = input_dim
        for h in hidden_dims:
            layers.append(nn.Linear(prev, h))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            prev = h
        layers.append(nn.Linear(prev, num_classes))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

#Training loop
def train_model(model, train_loader, val_loader, epochs=EPOCHS, lr=LR, device=DEVICE):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    best_val_acc = 0.0
    best_state = None

    for epoch in range(1, epochs+1):
        model.train()
        running_loss = 0.0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            logits = model(xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * xb.size(0)

        avg_loss = running_loss / len(train_loader.dataset)

        # Validation
        model.eval()
        preds = []
        trues = []
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                logits = model(xb)
                pred = torch.argmax(logits, dim=1)
                preds.extend(pred.cpu().numpy())
                trues.extend(yb.cpu().numpy())
        val_acc = accuracy_score(trues, preds)
        print(f"Epoch {epoch}/{epochs} — loss: {avg_loss:.4f}, val_acc: {val_acc:.4f}")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_state = model.state_dict()


    if best_state:
        model.load_state_dict(best_state)
    print(f"Best val acc: {best_val_acc:.4f}")
    return model


#Evaluation
def evaluate(model, X_test_t, y_test_t, le, device=DEVICE):
    model.eval()
    model.to(device)
    with torch.no_grad():
        xb = X_test_t.to(device)
        logits = model(xb)
        preds = torch.argmax(logits, dim=1).cpu().numpy()
        trues = y_test_t.numpy()

    print("Accuracy:", accuracy_score(trues, preds))
    print("Classification report:")
    print(classification_report(trues, preds, target_names=le.classes_))
    print("Confusion matrix:")
    print(confusion_matrix(trues, preds))


#Inference-single image
def predict_image(model, image_path, le, device=DEVICE):
    img = face_recognition.load_image_file(image_path)
    encs = face_recognition.face_encodings(img)
    if len(encs) == 0:
        print("No face detected.")
        return None
    enc = torch.tensor(encs[0], dtype=torch.float32).unsqueeze(0).to(device)
    model.eval()
    with torch.no_grad():
        logits = model(enc)
        prob = torch.softmax(logits, dim=1)
        pred_idx = int(torch.argmax(prob, dim=1).cpu().numpy()[0])
        confidence = float(prob[0, pred_idx].cpu().numpy())
    name = le.inverse_transform([pred_idx])[0]
    return name, confidence


if __name__ == "__main__":
    print("Device:", DEVICE)
    X, y = extract_encodings(DATASET_DIR)
    if len(X) == 0:
        raise RuntimeError("No encodings extracted. Check dataset and face detection.")

    (X_train_t, y_train_t), (X_test_t, y_test_t), le = build_dataset(X, y)

    num_classes = len(le.classes_)
    print("Classes:", le.classes_)

    train_ds = TensorDataset(X_train_t, y_train_t)
    test_ds = TensorDataset(X_test_t, y_test_t)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)

    #model
    model = FaceNetMLP(input_dim=128, hidden_dims=(256,128), num_classes=num_classes, dropout=0.3)

    # train
    model = train_model(model, train_loader, test_loader, epochs=EPOCHS, lr=LR, device=DEVICE)

    # evaluate
    evaluate(model, X_test_t, y_test_t, le, device=DEVICE)

    # saving model and label encoding
    torch.save(model.state_dict(), os.path.join(SAVEDIR, "ann_face_recognition.pt"))
    with open(os.path.join(SAVEDIR, "label_encoder.pkl"), "wb") as f:
        pickle.dump(le, f)
    print("Saved model and label encoder to", SAVEDIR)


    sample_image = os.path.join(DATASET_DIR, os.listdir(DATASET_DIR)[0])


Device: cuda
⚠️  No face detected in C:/Users/nandhudivya/Desktop/project/dataset\Angelina Jolie\044_512dfd33.jpg; skipping.
⚠️  No face detected in C:/Users/nandhudivya/Desktop/project/dataset\Angelina Jolie\049_4d6df392.jpg; skipping.
⚠️  No face detected in C:/Users/nandhudivya/Desktop/project/dataset\Angelina Jolie\092_26130bb1.jpg; skipping.
⚠️  No face detected in C:/Users/nandhudivya/Desktop/project/dataset\Angelina Jolie\095_0be163a1.jpg; skipping.
⚠️  No face detected in C:/Users/nandhudivya/Desktop/project/dataset\Brad Pitt\041_cc0957bf.jpg; skipping.
⚠️  No face detected in C:/Users/nandhudivya/Desktop/project/dataset\Brad Pitt\048_185402c6.jpg; skipping.
⚠️  No face detected in C:/Users/nandhudivya/Desktop/project/dataset\Brad Pitt\060_136e5ef5.jpg; skipping.
⚠️  No face detected in C:/Users/nandhudivya/Desktop/project/dataset\Brad Pitt\076_75b9dd73.jpg; skipping.
⚠️  No face detected in C:/Users/nandhudivya/Desktop/project/dataset\Brad Pitt\095_1104d364.jpg; skipping.
⚠️  

Loading YOLO

In [None]:

yolo_model = YOLO("yolov8n.pt")

ANN model for face recognition

In [None]:

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

with open("C:/Users/nandhudivya/Desktop/project/saved_models/label_encoder.pkl", "rb") as f:
    le = pickle.load(f)

num_classes = len(le.classes_)
ann_model = FaceNetMLP(128, (256, 128), num_classes)
ann_model.load_state_dict(torch.load("C:/Users/nandhudivya/Desktop/project/saved_models/ann_face_recognition.pt", map_location=DEVICE))
ann_model.to(DEVICE)
ann_model.eval()


FaceNetMLP(
  (net): Sequential(
    (0): Linear(in_features=128, out_features=256, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=256, out_features=128, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.3, inplace=False)
    (6): Linear(in_features=128, out_features=17, bias=True)
  )
)

Loading employee details

In [None]:
df = pd.read_csv(r"C:/Users/nandhudivya/Desktop/project/actors datset/actors.csv")
df.columns = df.columns.str.strip().str.lower()
df.set_index("name", inplace=True)
df.index = df.index.str.strip().str.lower()


Loading and Saving Output Video

In [None]:

video_path = "C:/Users/nandhudivya/Videos/test_video.mp4"
cap = cv2.VideoCapture(video_path)

output_path = "C:/Users/nandhudivya/Desktop/project/output_recognition.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

Processing video frame to frame

In [None]:

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    results = yolo_model(frame, stream=True)
    for r in results:
        for box in r.boxes:
            cls = int(box.cls[0])
            if yolo_model.names[cls] == "person":
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                person_crop = frame[y1:y2, x1:x2]

                # Convertion to RGB for face_recognition
                person_crop_rgb = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)

                # Detecting faces in the cropped person region
                face_locations = face_recognition.face_locations(person_crop_rgb, model='hog')
                face_encs = face_recognition.face_encodings(person_crop_rgb, known_face_locations=face_locations)

                if len(face_encs) > 0:
                    enc = torch.tensor(face_encs[0], dtype=torch.float32).unsqueeze(0).to(DEVICE)

                    # Prediction using ANN
                    with torch.no_grad():
                        logits = ann_model(enc)
                        probs = torch.softmax(logits, dim=1)
                        pred_idx = torch.argmax(probs, dim=1).item()
                        confidence = probs[0, pred_idx].item()

                    name = le.inverse_transform([pred_idx])[0].lower()


                    # Getting personal info
                    if name in df.index:
                        gender = df.loc[name, "gender"]
                        nationality = df.loc[name, "nationality"]
                        age = df.loc[name, "age"]
                    else:
                        gender, nationality, age = "Unknown", "Unknown", "Unknown"

                    # bounding box
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    label = f"{name.capitalize()} ({confidence:.2f})"
                    info = f"{gender}, {nationality}, {age}"
                    cv2.putText(frame, label, (x1, y1 - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                    cv2.putText(frame, info, (x1, y2 + 20),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
                else:
                    # No face found
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
                    cv2.putText(frame, "No Face", (x1, y1 - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

    #saving frame
    cv2.imshow("Face Recognition", frame)
    out.write(frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()

print(f"✅ Processed video saved at: {output_path}")



0: 416x640 1 person, 1 tie, 480.5ms
Speed: 52.1ms preprocess, 480.5ms inference, 11.2ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 2 ties, 29.5ms
Speed: 8.7ms preprocess, 29.5ms inference, 5.3ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 2 ties, 35.9ms
Speed: 6.4ms preprocess, 35.9ms inference, 13.8ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 2 ties, 32.1ms
Speed: 7.2ms preprocess, 32.1ms inference, 14.4ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 1 tie, 28.8ms
Speed: 6.9ms preprocess, 28.8ms inference, 11.5ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 1 tie, 27.1ms
Speed: 7.4ms preprocess, 27.1ms inference, 11.8ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 1 tie, 32.2ms
Speed: 7.2ms preprocess, 32.2ms inference, 13.9ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 1 tie, 29.4ms
Speed: 6.5ms preproce

# **Conclusion:**
The Hybrid Neural Network Model effectively integrates CNN and ANN to achieve high
accuracy (~92%) in facial recognition tasks. The model performs well in detecting and
recognizing faces from both images and live video streams, proving its robustness and
practical applicability.

By combining feature extraction (CNN) and classification (ANN), the system demonstrates
a complete end-to-end deep learning pipeline for facial recognition. Future improvements
can include using transfer learning models like VGGFace or ResNet and deploying the
solution for real-world applications such as access control, attendance systems, or
security monitoring.