<a href="https://www.kaggle.com/code/duaanaz/face-recognition?scriptVersionId=251890935" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# *Face Recognition Project*

In [None]:
# Load important libraries
import os
import cv2
import numpy as np
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt
from skimage.feature import hog
from skimage import exposure
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay

# Set the plotting style
plt.style.use('ggplot')


In [None]:
# Load pre-trained Haar Cascade classifiers for face, eyes, smile, and nose detection

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_eye.xml")
smile_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_smile.xml")
# nose_cascade = cv2.CascadeClassifier(os.path.expanduser("~/haarcascade_mcs_nose.xml"))


In [None]:
base_path = '../input/students-images/'
student_ids= sorted(os.listdir(base_path))

# Exploratory Data Analysis
**Data Frame**

In [None]:
# Make data frame
df = []
for std_id in student_ids:
    paths = sorted(glob(os.path.join(base_path,std_id,std_id,'*.jpg'))) 
    for p in paths:
        img = cv2.imread(p)
        if img is None:
            print("there is no image")
            continue
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        df.append({
            'student_id': std_id,
            'path': p,
            'height': img.shape[0],
            'width': img.shape[1],
            'aspect_ratio': img.shape[1] / img.shape[0],
            'brightness': np.mean(gray),
            'contrast': np.std(gray),
            'blur_score': cv2.Laplacian(gray, cv2.CV_64F).var()
        })

# Convert the list to a DataFrame
df = pd.DataFrame(df)

# print data frame
print(df)


# Assessing Data Quantity

In [None]:
# Print number of images per student
print("Number of images per student:")
print(df['student_id'].value_counts().sort_index())

# Visualizations of: 
# No. of Images per student 
plt.figure(figsize=(10, 4))
df['student_id'].value_counts().sort_index().plot(kind='bar', color='skyblue')
plt.title("Number of Images per Student")
plt.xlabel("Student ID")
plt.ylabel("Number of Images")
plt.grid(axis='y')
plt.show()

# *Face Detection and Cropping with Grayscale Conversion*

In [None]:
# Visualize 1 face per student
plt.figure(figsize=(15, 6))

for i, sid in enumerate(sorted(df['student_id'].unique())):
    # Get first image path for each student
    sample_row = df[df['student_id'] == sid].iloc[0]
    path = sample_row['path']
    
    img = cv2.imread(path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to Grayscale
    faces = face_cascade.detectMultiScale(gray, 1.1, 5)

    # Try detecting face
    if len(faces) > 0:
        (x, y, w, h) = faces[0]
        face = gray[y:y+h, x:x+w]
        face = cv2.resize(face, (100, 100))
    else:
        face = cv2.resize(gray, (100, 100))  # fallback

    # Show extracted face
    plt.subplot(2, 6, i+1)
    plt.imshow(face,cmap='gray')
    plt.title(f"Student {std_id}")
    plt.axis('off')

plt.suptitle("One Face per Student (Grayscale, Cropped)", fontsize=16)
plt.tight_layout()
plt.show()

# Face Detection Coverage

In [None]:
detected_faces = 0
for p in df['path']:
    img = cv2.imread(p)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.1, 5)
    if len(faces) > 0:
        detected_faces += 1

print(f"Faces Detected: {detected_faces} / {len(df)}")

In [None]:
# pie chart
plt.pie([detected_faces, len(df)-detected_faces], 
        labels=["Detected", "Not Detected"], 
        autopct='%1.1f%%', colors=['green', 'red'])
plt.title("Face Detection Coverage")
plt.show()

# Image Quality Analysis
# Aspect Ratio Insight

In [None]:
# Aspect Ratio per image
plt.figure(figsize=(10, 4))
df.boxplot(column='aspect_ratio', by='student_id')
plt.title("Aspect Ratio per Student")
plt.suptitle("")
plt.xlabel("Student ID")
plt.ylabel("Aspect Ratio (Width / Height)")
plt.grid(True)
plt.show()

# Bright vs Contrast Insight

In [None]:
# Brightness vs Contrast
plt.figure(figsize=(10, 4))
for std_id in sorted(df['student_id'].unique()):
    sub = df[df['student_id'] == std_id]
    plt.scatter(sub['brightness'], sub['contrast'], label=std_id, alpha=0.5)

plt.xlabel("Brightness (Mean Pixel Intensity)")
plt.ylabel("Contrast (Pixel Intensity Std Dev)")
plt.title("Brightness vs Contrast per Student")
plt.legend(title="Student ID", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()
plt.show()

# Sharpness of the Image

In [None]:
# Blur Score of Images
plt.figure(figsize=(10, 4))
df.boxplot(column='blur_score', by='student_id')
plt.title("Image Sharpness (Blur Score)")
plt.suptitle("")
plt.xlabel("Student ID")
plt.ylabel("Blur Score (Variance of Laplacian)")
plt.grid(True)
plt.show()

# Size Distribution

In [None]:
# Image size distribution
plt.figure(figsize=(10, 4))
plt.hist(df['width'] * df['height'], bins=20, color='skyblue')
plt.title("Image Size Distribution (in pixels)")
plt.xlabel("Image Size (Width × Height)")
plt.ylabel("Number of Images")
plt.grid(True)
plt.show()



# Feature Extraction:
# HAAR Cascade Facial Features Detection

In [None]:
# Load image
sample_path = df.iloc[1450]['path']
img = cv2.imread(sample_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Detect face
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x, y, w, h) in faces:
    cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
    
    roi_gray = gray[y:y+h, x:x+w]
    roi_color = img[y:y+h, x:x+w]
    
    # Eyes
    eyes = eye_cascade.detectMultiScale(roi_gray, 1.1, 10)
    for (ex, ey, ew, eh) in eyes:
        cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 255), 2)
        
    # Smile
    smiles = smile_cascade.detectMultiScale(roi_gray, 1.7, 22)
    for (sx, sy, sw, sh) in smiles:
        cv2.rectangle(roi_color, (sx, sy), (sx+sw, sy+sh), (0, 255, 0), 2)
    
    # # Nose
    # if not nose_cascade.empty():
    #     noses = nose_cascade.detectMultiScale(roi_gray, 1.3, 5)
    #     for (nx, ny, nw, nh) in noses:
    #         cv2.rectangle(roi_color, (nx, ny), (nx+nw, ny+nh), (255, 0, 255), 2)

# Show image using matplotlib
plt.figure(figsize=(8, 6))
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title("Facial Features Detection")
plt.axis('off')
plt.show()

# Edge Detection Using HOG

In [None]:
# --- HOG Visualization ---

# Pick one example face
sample_path = df.iloc[1450]['path']
img = cv2.imread(sample_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 5)

(x, y, w, h) = max(faces, key=lambda r: r[2]*r[3])
face = gray[y:y+h, x:x+w]
face = cv2.resize(face, (100, 100))
norm = face.astype('float32') / 255.0

# Extract HOG features and image
features, hog_image = hog(norm, pixels_per_cell=(8, 8), cells_per_block=(2, 2),
                          visualize=True, feature_vector=True)

# Enhance contrast
hog_image = exposure.rescale_intensity(hog_image, in_range=(0, 10))

# Original Face
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.imshow(norm, cmap='gray')
plt.title("Original Face")
plt.axis('off')

# Hog
plt.subplot(1,2,2)
plt.imshow(hog_image, cmap='gray')
plt.title("HOG Visualization")
plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# Data Splitting - Train&Test
X_train, X_test, y_train, y_test = [], [], [], []
le = LabelEncoder()

print("Extracting HOG features...")
for std_id in student_ids:
    feats = []
    paths = sorted(glob(os.path.join(base_path, std_id,std_id, '*.jpg')))
    for p in paths:
        img = cv2.imread(p)
        if img is None:
            continue
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.1, 5)
        if len(faces) == 0:
            continue
        x, y, w, h = max(faces, key=lambda r: r[2] * r[3])
        face = gray[y:y + h, x:x + w]
        if np.var(face) < 100:
            continue
        face = cv2.resize(face, (100, 100)).astype('float32') / 255.0
        feats.append(hog(face, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True))
    
    if len(feats) >= 2:
        a, b = train_test_split(feats, test_size=0.5, random_state=42)
        X_train += a
        X_test += b
        y_train += [std_id] * len(a)
        y_test += [std_id] * len(b)

# Encode the labels
y_train_enc = le.fit_transform(y_train)
y_test_enc = le.transform(y_test)

print("Training samples:", len(X_train))
print("Testing samples:", len(X_test))

In [None]:
# Model Training
print("Training SVM...")
model = SVC(kernel='linear', probability=True)
model.fit(X_train, y_train_enc)
print("Training complete.")

In [None]:
# --- Evaluation ---
y_pred = model.predict(X_test)
acc = accuracy_score(y_test_enc, y_pred)
print(f"Test Accuracy: {acc:.2%}")

In [None]:
# Confusion Matrix
ConfusionMatrixDisplay(confusion_matrix(y_test_enc, y_pred), display_labels=le.classes_).plot(cmap='Blues', xticks_rotation=45)
plt.title("Confusion Matrix")
plt.show()

In [None]:
print(classification_report(y_test_enc, y_pred, target_names=le.classes_))

In [None]:
from sklearn.metrics import classification_report
import pandas as pd
import matplotlib.pyplot as plt

# Generate the classification report dictionary
report_dict = classification_report(y_test_enc, y_pred, target_names=le.classes_, output_dict=True)

# Convert to DataFrame
report_df = pd.DataFrame(report_dict).transpose()

# Keep only student IDs
report_df = report_df[report_df.index.isin(le.classes_)]

# Round for display
report_df[['precision', 'recall', 'f1-score']] = report_df[['precision', 'recall', 'f1-score']].round(2)

# Plotting with support as annotation
ax = report_df[['precision', 'recall', 'f1-score']].plot(kind='bar', figsize=(12, 6), colormap='Set2')
plt.title("Classification Metrics per Student")
plt.ylabel("Score")
plt.ylim(0, 1.1)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.xticks(rotation=0)

# Add support (sample count) as labels above each group
for i, (idx, row) in enumerate(report_df.iterrows()):
    support = int(row['support'])
    plt.text(i, 1.05, f'n={support}', ha='center', fontsize=9, color='black')

plt.tight_layout()
plt.legend(loc='upper right')
plt.show()

In [None]:
report_df['support'].plot(kind='bar', color='lightblue')
plt.title("Number of Test Images per Student")
plt.ylabel("Support (count)")
plt.xticks(rotation=0)
plt.grid(axis='y')
plt.tight_layout()
plt.show()