# 🎯 Dynamic Recognition: KNN + Data Generation

This notebook uses the best trained model to extract embeddings and implements a dynamic facial recognition system using KNN, allowing the addition of new identities in real time.  
To balance the number of samples per class, data augmentation techniques are applied from a single user-provided image.

**Main steps:**
- Loading the base model and removing the final layer.
- Embedding extraction for known images.
- Implementing KNN classification.
- Adding new classes dynamically with automatic image augmentation.


In [23]:
# -----------------------------
# 📦 1. Import Required Libraries
# -----------------------------
import os
import cv2
import numpy as np
import joblib
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, array_to_img, load_img
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, BatchNormalization, Dropout

from keras_vggface.vggface import VGGFace
from sklearn.neighbors import KNeighborsClassifier

In [26]:
# -----------------------------
# 🔧 2. Image Preprocessing
# -----------------------------
def preprocess_image(img_path, target_size=(224, 224)):
    """
    Load an image, resize it to the target size and normalize pixel values.
    """
    img = cv2.imread(str(img_path))
    img = cv2.resize(img, target_size)
    img = img.astype('float32') / 255.0
    return np.expand_dims(img, axis=0)


In [27]:
# -----------------------------
# 📍 3. Embedding Extraction
# -----------------------------
def get_embedding(img_path, model):
    """
    Generate the embedding for a single image using the fine-tuned model.
    """
    img = preprocess_image(img_path)
    embedding = model.predict(img)
    return embedding.flatten()


In [28]:
# -----------------------------
# 🧠 4. Build and Load Fine-Tuned Model
# -----------------------------
def build_embedding_model(weights_path='../models/enhanced_vggface.h5'):
    base_model = VGGFace(model='vgg16', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(2048, activation='relu', kernel_regularizer='l2')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(83, activation='softmax')(x)  # 83 classes

    model = Model(inputs=base_model.input, outputs=output)
    model.load_weights(weights_path)

    # Extract feature extractor model
    embedding_model = Model(inputs=model.input, outputs=model.layers[-4].output)
    return embedding_model

In [29]:
# -----------------------------
# 🏗️ 5. Create Embeddings from Dataset
# -----------------------------
def generate_embeddings_from_dataframe(csv_path, model, emb_path='../models/embeddings.pkl'):
    df = pd.read_csv(csv_path)
    all_embeddings = []
    all_labels = []

    for _, row in df.iterrows():
        img_path = "../data/" + row['image-pathname']
        label = row['label']
        try:
            emb = get_embedding(img_path, model)
            all_embeddings.append(emb)
            all_labels.append(label)
        except Exception as e:
            print(f"⚠️ Error processing {img_path}: {e}")

    embeddings = np.array(all_embeddings)
    labels = np.array(all_labels)

    os.makedirs(os.path.dirname(emb_path), exist_ok=True)
    joblib.dump((embeddings, labels), emb_path)
    print(f"✅ Initial embeddings created and saved to {emb_path}")
    return embeddings, labels

In [30]:
# 1) Carregar o KNN e ver quantas features ele espera
import joblib

knn_model,_ = joblib.load('../models/knn_model.pkl')

# A partir da versão 0.24 do scikit-learn:
print("KNN espera n_features =", knn_model.n_features_in_)  

KNN espera n_features = 83


In [15]:
# -----------------------------
# 🧠 6. Train KNN Classifier
# -----------------------------
def train_and_save_knn(embeddings, labels, path='../models/knn_model.pkl', n_neighbors=5):
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(embeddings, labels)
    joblib.dump((knn, labels), path)
    print(f"✅ KNN model trained and saved at {path}")

In [16]:
# -----------------------------
# ➕ 7. Add New Person (Images)
# -----------------------------

def augment_images(image_paths, augment_count=10):
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=10,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    
    augmented_images = []
    for path in image_paths:
        img = load_img(path, target_size=(224, 224))
        x = img_to_array(img)
        x = x.reshape((1,) + x.shape)

        i = 0
        for batch in datagen.flow(x, batch_size=1):
            augmented_images.append(batch[0])
            i += 1
            if i >= augment_count:
                break
    return augmented_images

def add_new_person(image_paths, label_name, model,
                   emb_path='../models/embeddings.pkl', knn_path='../models/knn_model.pkl',
                   augment_count=10):
    if os.path.exists(emb_path):
        embeddings, labels = joblib.load(emb_path)
    else:
        embeddings, labels = np.array([]).reshape(0, 2048), np.array([])

    augmented_images = augment_images(image_paths, augment_count=augment_count)

    new_embeddings = [model.predict(np.expand_dims(img / 255.0, axis=0)).flatten() for img in augmented_images]
    new_labels = [label_name] * len(new_embeddings)

    embeddings = np.vstack([embeddings, new_embeddings])
    labels = np.hstack([labels, new_labels])

    joblib.dump((embeddings, labels), emb_path)
    print(f"📦 Embeddings updated and saved at {emb_path}")

    train_and_save_knn(embeddings, labels, knn_path)


In [31]:
# -----------------------------
# 🔍 8. Predict Identity
# -----------------------------
def predict_image(img_path, model, knn_path='../models/knn_model.pkl'):
    embedding = get_embedding(img_path, model)
    knn, _ = joblib.load(knn_path)
    prediction = knn.predict([embedding])[0]
    return prediction

In [33]:
embedding_model = load_model('../models/enhanced_vggface.h5')

In [11]:
# Step 1: Generate embeddings from initial dataset
embeddings, labels = generate_embeddings_from_dataframe(
        '../data/train.csv',
        model=embedding_model,
        emb_path='../models/embeddings.pkl'
    )

    # Step 2: Train KNN using these embeddings
train_and_save_knn(embeddings, labels, path='../models/knn_model.pkl')


✅ Initial embeddings created and saved to ../models/embeddings.pkl
✅ KNN model trained and saved at ../models/knn_model.pkl


In [34]:
test_image = '../debug_faces/face_1745782242_unknown.png'


In [35]:
embedding = get_embedding(test_image, embedding_model)



In [37]:
len(embedding)

83

In [19]:
#  # Step 2: Predict using a new image
test_image = '../debug_faces/face_1745782242_unknown.png'
prediction = predict_image(test_image, embedding_model)
print(f"🔍 Predicted label: {prediction}")

AttributeError: 'str' object has no attribute 'shape'

In [13]:
 # Step 3: Add a new person with 3 sample images
image_paths = [
    '../temp_training_data/1.jpeg',
    '../temp_training_data/2.jpeg',
    '../temp_training_data/0.jpg',
]
add_new_person(image_paths, label_name='Lucas Piero', model=embedding_model)



📦 Embeddings updated and saved at ../models/embeddings.pkl
✅ KNN model trained and saved at ../models/knn_model.pkl


In [15]:
# Step 4: Predict using a new image
test_image = '../temp_training_data/0.jpg'
prediction = predict_image(test_image, embedding_model)
print(f"🔍 Predicted label: {prediction}")

🔍 Predicted label: Lucas Piero


In [15]:
def predict_image2(img_path, model, knn_path='../models/knn_model.pkl', top_k=5):
    embedding = get_embedding(img_path, model)
    knn, labels = joblib.load(knn_path)
    distances, indices = knn.kneighbors([embedding], n_neighbors=top_k)
    predictions = [(labels[i], distances[0][rank]) for rank, i in enumerate(indices[0])]
    return predictions


In [16]:
#test_image = '../temp_training_data/3.jpeg'
test_image = '../data/train/Adam Sandler/3.jpg'
predictions = predict_image2(test_image, embedding_model,top_k = 15)

for label, dist in predictions:
    print(f"🔍 Predicted label: {label} | Distance: {dist:.4f}")


🔍 Predicted label: Adam Sandler | Distance: 0.0000
🔍 Predicted label: Adam Sandler | Distance: 0.0000
🔍 Predicted label: Adam Sandler | Distance: 0.0001
🔍 Predicted label: Adam Sandler | Distance: 0.0001
🔍 Predicted label: Adam Sandler | Distance: 0.0001
🔍 Predicted label: Adam Sandler | Distance: 0.0001
🔍 Predicted label: Adam Sandler | Distance: 0.0001
🔍 Predicted label: Adam Sandler | Distance: 0.0001
🔍 Predicted label: Adam Sandler | Distance: 0.0002
🔍 Predicted label: Adam Sandler | Distance: 0.0002
🔍 Predicted label: Adam Sandler | Distance: 0.0002
🔍 Predicted label: Adam Sandler | Distance: 0.0002
🔍 Predicted label: Adam Sandler | Distance: 0.0002
🔍 Predicted label: Adam Sandler | Distance: 0.0002
🔍 Predicted label: Adam Sandler | Distance: 0.0002
