In [None]:
# -*- coding: utf-8 -*-
"""SeekfaceAPI version 1 alpha.ipynb

"""

!pip install mediapipe scikit-learn tensorflow scikeras opencv-python

import cv2
import mediapipe as mp
import numpy as np
import os
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow

# Initialize MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# Data storage
embeddings = []
names = []

def extract_landmarks(image):
    results = face_mesh.process(image)
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            landmarks = []
            for landmark in face_landmarks.landmark:
                landmarks.append([landmark.x, landmark.y])
            return landmarks
    return None

def flatten_landmarks(landmarks):
    return [coord for point in landmarks for coord in point]

def augment_image(image):
  augmented_images = []
  # Flip horizontally
  augmented_images.append(cv2.flip(image, 1))
  # Rotate 15 degrees
  rows, cols = image.shape[:2]
  M = cv2.getRotationMatrix2D((cols/2,rows/2), 15, 1)
  augmented_images.append(cv2.warpAffine(image, M, (cols, rows)))
  # Rotate -15 degrees
  M = cv2.getRotationMatrix2D((cols/2,rows/2), -15, 1)
  augmented_images.append(cv2.warpAffine(image, M, (cols, rows)))
  return augmented_images

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# --- Training Phase ---
dataset_path = "/content/drive/My Drive/Colab Notebooks/SINDIT/"  # Reemplazar si cambia el lugar de ejecución!
for filename in os.listdir(dataset_path):
    if filename.endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(dataset_path, filename)
        image = cv2.imread(image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        landmarks = extract_landmarks(image_rgb)
        if landmarks:
            name = input(f"Enter the name for the person in image {filename}: ")
            names.append(name)

            flattened_landmarks = flatten_landmarks(landmarks)
            embeddings.append(flattened_landmarks)

            # Image Augmentation
            augmented_images = augment_image(image_rgb)
            for augmented_image in augmented_images:
                aug_landmarks = extract_landmarks(augmented_image)
                if aug_landmarks:
                    names.append(name)  # Same name for augmented images
                    flattened_aug_landmarks = flatten_landmarks(aug_landmarks)
                    embeddings.append(flattened_aug_landmarks)

# --- Prepare data for training ---
X = np.array(embeddings)
y = np.array(names)

# Convert labels to one-hot encoding
name_to_label = {name: i for i, name in enumerate(set(names))}
y_encoded = np.array([name_to_label[name] for name in y])
y_onehot = to_categorical(y_encoded)

X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

# --- Keras Model ---
def create_model(hidden_units=128):
  model = Sequential()
  model.add(Dense(hidden_units, activation='relu', input_shape=(X_train.shape[1],)))
  model.add(Dense(len(set(names)), activation='softmax'))  # Output layer
  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  return model

estimator = KerasClassifier(build_fn=create_model, verbose=0)

# --- Hyperparameter Tuning ---
param_grid = {
    'hidden_units': [64, 128, 256],
    'epochs': [10, 20, 30],
    'batch_size': [32, 64]
}

grid = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=3)
grid.fit(X_train, y_train)
best_model = grid.best_estimator_

# --- Training ---
history = best_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=best_model.epochs, batch_size=best_model.batch_size)

# --- Plot training history ---
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# --- Testing Phase (Crowd Image) ---
test_image_path = "/content/crowd_test_image.jpg"  # Replace with your crowd test image path
test_image = cv2.imread(test_image_path)
test_image_rgb = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)

results = face_mesh.process(test_image_rgb)
if results.multi_face_landmarks:
    for face_landmarks in results.multi_face_landmarks:
        landmarks = []
        for landmark in face_landmarks.landmark:
            x = int(landmark.x * test_image.shape[1])
            y = int(landmark.y * test_image.shape[0])
            landmarks.append([x, y])

        flattened_landmarks = flatten_landmarks(landmarks)
        test_data = np.array([flattened_landmarks])
        prediction = best_model.predict(test_data)
        predicted_class = np.argmax(prediction)
        predicted_name = list(name_to_label.keys())[list(name_to_label.values()).index(predicted_class)]

        # Draw rectangle and label on the image
        cv2.rectangle(test_image, (landmarks[0][0], landmarks[0][1]), (landmarks[122][0], landmarks[152][1]), (0, 255, 0), 2)
        cv2.putText(test_image, predicted_name, (landmarks[0][0], landmarks[0][1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

cv2_imshow(test_image)


In [None]:
# --- Webcam Recognition ---
video_capture = cv2.VideoCapture(0) # 0 for default webcam

while True:
    ret, frame = video_capture.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(frame_rgb)

    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            landmarks = []
            for landmark in face_landmarks.landmark:
                x = int(landmark.x * frame.shape[1])
                y = int(landmark.y * frame.shape[0])
                landmarks.append([x, y])

            flattened_landmarks = flatten_landmarks(landmarks)
            test_data = np.array([flattened_landmarks])
            prediction = best_model.predict(test_data)
            predicted_class = np.argmax(prediction)
            predicted_name = list(name_to_label.keys())[list(name_to_label.values()).index(predicted_class)]

            # Draw rectangle and label on the frame
            cv2.rectangle(frame, (landmarks[0][0], landmarks[0][1]), (landmarks[122][0], landmarks[152][1]), (0, 255, 0), 2)
            cv2.putText(frame, predicted_name, (landmarks[0][0], landmarks[0][1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    cv2_imshow(frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to quit
        break

video_capture.release()
cv2.destroyAllWindows()