### Face Detection and Data Collection

In [3]:
!pip install opencv-python


Defaulting to user installation because normal site-packages is not writeable


In [4]:
import cv2
import numpy as np
import os

In [5]:
# Load Haar Cascade classifier from XML file
classifier = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

#### Detect the face in the images:

In [6]:
def extract_faces(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error loading image: {image_path}")
        return []
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = classifier.detectMultiScale(gray, 1.1, 4)
    face_data = []
    for (x, y, w, h) in faces:
        face = gray[y:y+h, x:x+w]
        face = cv2.resize(face, (100, 100))  # Resize to fixed size
        face_data.append(face.flatten())
    return face_data

In [7]:
def collect_face_data(directory, label):
    X, y = [], []
    for filename in os.listdir(directory):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            face_features = extract_faces(os.path.join(directory, filename))
            if face_features:
                X.extend(face_features)
                y.extend([label] * len(face_features))
    return X, y

# Collect face data for Nicolas Cage and others
X_cage, y_cage = collect_face_data(r'face_dataset\nicolas_cage\Nicolas Cage portrait', 1)
X_others, y_others = collect_face_data(r'face_dataset\others\random people portrait', 0)

In [8]:
# Combine the data
X = np.array(X_cage + X_others)
y = np.array(y_cage + y_others)

print(f"Total samples: {len(X)}")

Total samples: 276


#### Dataset Prep:

In [9]:
import pandas as pd

def save_to_csv(filename, X, y):
    data = np.hstack((X, y.reshape(-1, 1)))
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)

# Save data to CSV
save_to_csv('face_data.csv', X, y)


### Tunning KNN

In [10]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score

In [11]:
# Load data from CSV
data = pd.read_csv('face_data.csv').values
X, y = data[:, :-1], data[:, -1]

In [12]:
# Function to evaluate different training splits
def evaluate_split_ratios(X, y, splits, param_grid):
    best_split = None
    best_score = 0
    best_model = None

    for split in splits:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split, random_state=42)

        knn = KNeighborsClassifier()
        grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
        grid_search.fit(X_train, y_train)
        
        score = grid_search.best_score_
        if score > best_score:
            best_score = score
            best_split = split
            best_model = grid_search.best_estimator_

    return best_split, best_score, best_model

# Define the parameter grid
param_grid = {
    'n_neighbors': range(1, 11),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

# Define different split ratios to evaluate
split_ratios = [0.1, 0.2, 0.3, 0.4]

# Find the best split ratio
best_split, best_score, best_model = evaluate_split_ratios(X, y, split_ratios, param_grid)

print(f"Best split ratio: {best_split}")
print(f"Best cross-validation score: {best_score}")


Best split ratio: 0.2
Best cross-validation score: 0.7272727272727273


### Train Best KNN

In [13]:
# Split the dataset using the best split ratio
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=best_split, random_state=42)

# Train the model with the best parameters
best_model.fit(X_train, y_train)

# Predict on the test set
y_pred = best_model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

              precision    recall  f1-score   support

           0       0.64      0.72      0.68        29
           1       0.65      0.56      0.60        27

    accuracy                           0.64        56
   macro avg       0.64      0.64      0.64        56
weighted avg       0.64      0.64      0.64        56

Accuracy: 0.6428571428571429


## Indentifier

In [21]:
test_image_path = 'images.jpg' 

In [22]:
def recognize_faces_in_image(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Unable to load image at {image_path}")
        return None

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = classifier.detectMultiScale(gray, 1.5, 5)

    if len(faces) == 0:
        print("No faces detected in the image.")
        return image

    X_test = []
    # Detect faces in the image
    for face in faces:
        x, y, w, h = face
        im_face = gray[y:y + h, x:x + w]
        im_face = cv2.resize(im_face, (100, 100))
        X_test.append(im_face.reshape(-1))
    
    response = best_model.predict(np.array(X_test))
    
    # Detect faces in the image
    for i, face in enumerate(faces):
        x, y, w, h = face
        center = (x + w // 2, y + h // 2)
        radius = w // 2
        cv2.circle(image, center, radius, (0, 255, 0), 3)  
    
        text = "You've been Caged!" if response[i] == 1 else "Unknown"
        text_color = (0, 225, 0) if response[i] == 1 else (255, 255, 255)  
        cv2.putText(image, text, (x - 50, y - 10), cv2.FONT_HERSHEY_TRIPLEX, 1.0, text_color, 2)  
    
    if response[i] == 1:
        cage_detected_text = "(Nicolas Cage detected)"
        cage_detected_color = (0, 255, 0)  
        cv2.putText(image, cage_detected_text, (x - 50, y + h + 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.0, cage_detected_color, 2) 
        
    return image


In [23]:
# Process and display the single test image
result_image = recognize_faces_in_image(test_image_path)

if result_image is not None:
    cv2.imshow("Face Recognition", result_image)
    cv2.waitKey(0)  # Wait for a key press to close the image window
    cv2.destroyAllWindows()
else:
    print("Failed to process the image.")

KeyboardInterrupt: 