# Generate dataset

In [1]:
import cv2
import os
import pandas as pd

# Step 1: Generate Dataset
def generate_dataset():
    face_classifier = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")

    def face_cropped(img):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        faces = face_classifier.detectMultiScale(gray, 1.3, 5)
        if len(faces) == 0:
            return None
        for (x, y, w, h) in faces:
            cropped_face = img[y:y+h, x:x+w]
        return cropped_face

    cap = cv2.VideoCapture(0)

    # Ask for user ID until a valid one is given
    while True:
        user_id = input("Enter user ID (numeric): ")
        try:
            user_id = int(user_id)
            # Check if ID is already taken
            if os.path.exists('data/faces_data.csv'):
                df = pd.read_csv('data/faces_data.csv')
                if user_id in df['id'].values:
                    print("ID already taken. Please choose a different ID.")
                    continue
            break
        except ValueError:
            print("Invalid ID. Please enter a numeric value.")

    user_name = input("Enter your name: ")

    img_id = 0

    # Create data directory if it doesn't exist
    if not os.path.exists('data'):
        os.makedirs('data')

    # Prepare CSV file for saving metadata
    csv_file_path = 'data/faces_data.csv'
    if not os.path.exists(csv_file_path):
        with open(csv_file_path, 'w') as f:
            f.write("id,name,img_id,file_path\n")  # Write header

    while True:
        ret, frame = cap.read()
        if face_cropped(frame) is not None:
            img_id += 1
            face = cv2.resize(face_cropped(frame), (200, 200))
            face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
            file_name_path = f"data/user.{user_id}.{img_id}.jpg"
            cv2.imwrite(file_name_path, face)

            # Append metadata to CSV using pd.concat
            df = pd.read_csv(csv_file_path)
            new_row = pd.DataFrame({'id': [user_id], 'name': [user_name], 'img_id': [img_id], 'file_path': [file_name_path]})
            df = pd.concat([df, new_row], ignore_index=True)
            df.to_csv(csv_file_path, index=False)

            cv2.putText(face, str(img_id), (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)
            cv2.imshow("Cropped face", face)

        if cv2.waitKey(1) == 13 or img_id == 200:  # 13 is the ASCII character for Enter
            break

    cap.release()
    cv2.destroyAllWindows()
    print("Collecting samples is completed....")
    
generate_dataset()


Collecting samples is completed....


# Train Classifier

In [2]:
import numpy as np
from PIL import Image

# Step 2: Train the Classifier and Save It
def train_classifier(data_file):
    # Load the data from the CSV
    df = pd.read_csv(data_file)

    faces = []
    ids = []

    for index, row in df.iterrows():
        try:
            img_path = row['file_path']
            id = int(row['id'])  # Ensure that ID is an integer
            img = Image.open(img_path).convert('L')
            imageNp = np.array(img, 'uint8')
            faces.append(imageNp)
            ids.append(id)
        except FileNotFoundError as e:
            print(f"File not found for row {index}: {e}")
            continue  # Skip the current row if there's an error
        except ValueError as e:
            print(f"Skipping row {index} due to error: {e}")
            continue  # Skip the current row if there's an error

    ids = np.array(ids, dtype=np.int32)  # Convert to a NumPy array of type int32

    # Train the classifier and save it
    clf = cv2.face.LBPHFaceRecognizer_create()
    clf.train(faces, ids)
    clf.write("classifier.xml")
    print("Classifier training completed and saved as 'classifier.xml'.")

train_classifier('data/faces_data.csv')


Classifier training completed and saved as 'classifier.xml'.


# Recognizer Faces

In [3]:
def draw_boundary(img, classifier, scaleFactor, minNeighbors, color, clf):
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = classifier.detectMultiScale(gray_img, scaleFactor, minNeighbors)

    for (x, y, w, h) in faces:
        cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)

        id, pred = clf.predict(gray_img[y:y + h, x:x + w])
        confidence = int(100 * (1 - pred / 300))

        if confidence > 75:
            df = pd.read_csv('data/faces_data.csv')
            user_name = df[df['id'] == id]['name'].values
            
            if len(user_name) > 0:
                cv2.putText(img, user_name[0], (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 1, cv2.LINE_AA)
            else:
                cv2.putText(img, "Unknown", (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 1, cv2.LINE_AA)
        else:
            cv2.putText(img, "UNKNOWN", (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 1, cv2.LINE_AA)

    return img

def recognize_faces():
    # Loading classifier
    faceCascade = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
    clf = cv2.face.LBPHFaceRecognizer_create()
    clf.read("classifier.xml")

    video_capture = cv2.VideoCapture(0)

    while True:
        ret, img = video_capture.read()
        img = draw_boundary(img, faceCascade, 1.1, 10, (255, 255, 255), clf)
        cv2.imshow("Face Detection", img)

        if cv2.waitKey(1) == 13:  # 13 is the ASCII code for Enter
            break

    video_capture.release()
    cv2.destroyAllWindows()
recognize_faces()