In [None]:
import numpy as np

# Load file .npy mà không dùng `.item()`
embeddings_data = np.load("/kaggle/input/face-embeddings1/face_embeddings.npy", allow_pickle=True)

print(type(embeddings_data))  # Kiểm tra kiểu dữ liệu
print(embeddings_data.shape)  # Kiểm tra kích thước
print(embeddings_data[:5])    # In 5 dòng đầu tiên

In [None]:
import numpy as np
from collections import defaultdict

# Load file .npy
data = np.load("/kaggle/input/face-embeddings1/face_embeddings.npy", allow_pickle=True)

# Dictionary để lưu embeddings
embeddings_dict = defaultdict(list)

# Lặp qua từng dòng trong mảng
for row in data:
    file_path = row[0]  # Đường dẫn ảnh (bỏ qua nếu không cần)
    person_id = row[1]  # Tên người
    embedding = row[2:].astype(np.float32)  # Vector 512 chiều

    # Lưu vào dictionary theo từng người
    embeddings_dict[person_id].append(embedding)

# Chuyển defaultdict về dictionary bình thường
embeddings_dict = dict(embeddings_dict)

# Kiểm tra dữ liệu sau khi chuyển đổi
print(f"🔹 Số người trong dataset: {len(embeddings_dict)}")
print(f"🔹 Số ảnh của người đầu tiên: {len(list(embeddings_dict.values())[0])}")
print(f"🔹 Một vector đặc trưng:\n{list(embeddings_dict.values())[0][0]}")

In [None]:
import random

pairs = []   # Danh sách cặp ảnh
labels = []  # Danh sách nhãn (1 hoặc 0)

people = list(embeddings_dict.keys())

for person in people:
    embeddings = embeddings_dict[person]
    num_samples = len(embeddings)

    for i in range(num_samples - 1):
        # 📌 Positive pair: Cùng một người, nhãn = 1
        pairs.append([embeddings[i], embeddings[i + 1]])
        labels.append(1)

        # 📌 Negative pair: Khác người, nhãn = 0
        other_person = random.choice([p for p in people if p != person])  # Chọn ngẫu nhiên người khác
        negative_sample = random.choice(embeddings_dict[other_person])  # Chọn 1 ảnh của người khác

        pairs.append([embeddings[i], negative_sample])
        labels.append(0)

# Chuyển thành numpy array
pairs = np.array(pairs)
labels = np.array(labels)

# 📌 Lưu dataset với các cặp ảnh và nhãn dưới dạng nén
np.savez_compressed("siamese_dataset.npz", pairs=pairs, labels=labels)

print("✅ Dataset đã được lưu thành công!")

In [None]:
import numpy as np

# 📌 Load dataset
data = np.load("siamese_dataset.npz")

pairs = data["pairs"]  # (N, 2, 512) - Mỗi cặp có 2 vector 512-d
labels = data["labels"]  # (N,) - Nhãn 1 hoặc 0

print(f"🔹 Số lượng cặp ảnh: {pairs.shape[0]}")
print(f"🔹 Kích thước vector ảnh: {pairs.shape[1:]}")  # (2, 512)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda, BatchNormalization, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns


# Giả sử đã có pairs và labels
print("📊 Shape dữ liệu ban đầu:")
print("pairs shape:", pairs.shape)
print("labels shape:", labels.shape)

# Chia tập train/validation (80-20)
X_train, X_val, y_train, y_val = train_test_split(
    pairs,
    labels, 
    test_size=0.2,
    random_state=42,
    stratify=labels
)

print("\n✅ Kết quả chia dữ liệu:")
print(f"Train data: {X_train.shape} (samples), {y_train.shape} (labels)")
print(f"Val data: {X_val.shape} (samples), {y_val.shape} (labels)")

In [None]:
def cosine_similarity(tensors):
    x, y = tensors
    x_norm = tf.norm(x, axis=-1, keepdims=True)
    y_norm = tf.norm(y, axis=-1, keepdims=True)
    return tf.reduce_sum(x * y, axis=-1, keepdims=True) / (x_norm * y_norm + 1e-7)

In [None]:
def build_siamese_model(input_shape=(512,)):
    # Shared Embedding Model
    input_layer = Input(shape=input_shape)
    x = Dense(256, activation='relu')(input_layer)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    embedding = Dense(32, activation='relu')(x)
    embedding_model = Model(input_layer, embedding)
    
    # Siamese Network
    input_A = Input(shape=input_shape)
    input_B = Input(shape=input_shape)
    
    processed_A = embedding_model(input_A)
    processed_B = embedding_model(input_B)
    
    similarity = Lambda(
        cosine_similarity,
        output_shape=(1,),
        name='cosine_similarity'
    )([processed_A, processed_B])
    
    output = Dense(1, activation='sigmoid')(similarity)
    return Model(inputs=[input_A, input_B], outputs=output)

siamese_model = build_siamese_model()
siamese_model.summary()

In [None]:
siamese_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [None]:
callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=7,
        restore_best_weights=True
    ),
    ModelCheckpoint(
        'best_siamese_model.keras',
        monitor='val_accuracy',
        save_best_only=True,
        mode='max'
    )
]

In [None]:
history = siamese_model.fit(
    [X_train[:, 0], X_train[:, 1]],
    y_train,
    validation_data=([X_val[:, 0], X_val[:, 1]], y_val),
    epochs=50,
    batch_size=64,
    callbacks=callbacks,
    verbose=2
)

In [None]:
val_loss, val_acc = siamese_model.evaluate([X_val[:, 0], X_val[:, 1]], y_val, verbose=0)
print(f"📊 Validation Accuracy: {val_acc:.4f}")
print(f"📊 Validation Loss: {val_loss:.4f}")

y_pred = (siamese_model.predict([X_val[:, 0], X_val[:, 1]]) > 0.5).astype(int)
print("\n📝 Classification Report:")
print(classification_report(y_val, y_pred, target_names=['Không khớp', 'Khớp']))

# Confusion Matrix
plt.figure(figsize=(6, 6))
sns.heatmap(confusion_matrix(y_val, y_pred), 
            annot=True, fmt='d', cmap='Blues',
            xticklabels=['Không khớp', 'Khớp'],
            yticklabels=['Không khớp', 'Khớp'])
plt.title('Confusion Matrix')
plt.show()

In [None]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Curve')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy Curve')
plt.legend()

plt.tight_layout()
plt.savefig('training_curves.png')
plt.show()

In [None]:
print("🔮 Dự đoán trên 5 mẫu ngẫu nhiên:")
sample_idx = np.random.choice(len(X_val), 5)
for i, idx in enumerate(sample_idx):
    prob = siamese_model.predict([X_val[[idx], 0], X_val[[idx], 1]])[0][0]
    pred = "Khớp" if prob > 0.5 else "Không khớp"
    actual = "Khớp" if y_val[idx] == 1 else "Không khớp"
    print(f"\nMẫu {i+1}:")
    print(f"- Thực tế: {actual}")
    print(f"- Dự đoán: {pred} (Xác suất: {prob:.4f})")
    print("-" * 40)

In [None]:
import tensorflow as tf
from tensorflow.keras.models import load_model

# 📌 Định nghĩa lại hàm cosine_similarity đúng cách
def cosine_similarity(tensors):
    """Hàm tính cosine similarity giữa 2 tensor"""
    x, y = tensors  # Unpack 2 tensors đầu vào
    x_norm = tf.norm(x, axis=-1, keepdims=True)
    y_norm = tf.norm(y, axis=-1, keepdims=True)
    similarity = tf.reduce_sum(x * y, axis=-1, keepdims=True) / (x_norm * y_norm + 1e-7)
    return similarity

# 📌 Load lại model với custom_objects chính xác
siamese_model = load_model(
    'best_siamese_model.keras',
    custom_objects={'cosine_similarity': cosine_similarity}
)

# 📌 Kiểm tra model
print("Input shape:", siamese_model.input_shape)
print("Output shape:", siamese_model.output_shape)

In [None]:
import tensorflow as tf
import numpy as np
import cv2

def load_pb_model(model_path):
    with tf.io.gfile.GFile(model_path, "rb") as f:
        graph_def = tf.compat.v1.GraphDef()
        graph_def.ParseFromString(f.read())
    
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def, name="")
    
    return graph

# 1. Định nghĩa hàm cosine similarity chính xác
def cosine_similarity(tensors):
    x, y = tensors
    x_norm = tf.norm(x, axis=-1, keepdims=True)
    y_norm = tf.norm(y, axis=-1, keepdims=True)
    return tf.reduce_sum(x * y, axis=-1, keepdims=True) / (x_norm * y_norm + 1e-7)

# 2. Load model với custom objects
siamese_model = tf.keras.models.load_model(
    'best_siamese_model.keras',
    custom_objects={'cosine_similarity': cosine_similarity}
)

# 3. Hàm xử lý ảnh và trích xuất embedding (512D từ FaceNet)
def get_face_embedding(image_path, sess):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = (img - 127.5) / 128.0  # Chuẩn hóa FaceNet
    img = np.expand_dims(img, axis=0)
    
    embedding = sess.run(
        embeddings_tensor,
        feed_dict={input_tensor: img, phase_train_tensor: False}
    )
    return embedding.flatten()  # shape (512,)

# 4. Hàm so sánh khuôn mặt hoàn chỉnh
def compare_faces(img1_path, img2_path, threshold=0.5):
    try:
        # Trích xuất và chuẩn bị dữ liệu
        emb1 = get_face_embedding(img1_path, facenet_sess).reshape(1, 512)
        emb2 = get_face_embedding(img2_path, facenet_sess).reshape(1, 512)
        
        # Chuẩn hóa L2
        emb1 = emb1 / np.linalg.norm(emb1)
        emb2 = emb2 / np.linalg.norm(emb2)
        
        # Dự đoán
        similarity = siamese_model.predict([emb1, emb2], verbose=0)[0][0]
        return "MATCH" if similarity > threshold else "NO MATCH", float(similarity)
    
    except Exception as e:
        print(f"Error processing images: {str(e)}")
        return None, None

# 5. Sử dụng
if __name__ == "__main__":
    # Khởi tạo FaceNet
    facenet_graph = load_pb_model("/kaggle/input/facenet-model-1/20180402-114759.pb")
    facenet_sess = tf.compat.v1.Session(graph=facenet_graph)
    
    # Định nghĩa tensor
    input_tensor = facenet_graph.get_tensor_by_name("input:0")
    embeddings_tensor = facenet_graph.get_tensor_by_name("embeddings:0")
    phase_train_tensor = facenet_graph.get_tensor_by_name("phase_train:0")
    
    # Test
    img1 = "/kaggle/input/vggface2-hq-cropped/VGGface2_HQ_cropped/VGGface2_HQ_cropped/n000002/0001_01.jpg"
    img2 = "/kaggle/input/vggface2-hq-cropped/VGGface2_HQ_cropped/VGGface2_HQ_cropped/n000002/0002_01.jpg"
    
    result, score = compare_faces(img1, img2)
    if result:
        print(f"Result: {result} (Score: {score:.4f})")
    
    # Đóng session
    facenet_sess.close()

In [None]:
# 1. Clean installation of all required packages
!pip uninstall -y lz4 python-lz4 mtcnn
!pip install --no-cache-dir lz4==4.3.2 python-lz4==4.0.2 mtcnn==0.1.1
!apt-get install -y liblz4-dev

print("\nPlease RESTART YOUR RUNTIME/KERNEL after this installation!")
print("After restarting, run only the main code (skip this installation cell)")

In [None]:
import tensorflow as tf
import numpy as np
import cv2
import os
import sys

# ========================
# 1. FACE DETECTION
# ========================

class FaceDetector:
    def __init__(self):
        self.detector = cv2.CascadeClassifier(
            cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
        )
    
    def detect(self, img_path, target_size=(160, 160)):
        """Detect and align face using OpenCV with error handling"""
        try:
            img = cv2.imread(img_path)
            if img is None:
                raise ValueError(f"Could not read image: {img_path}")
            
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            faces = self.detector.detectMultiScale(gray, 1.3, 5)
            
            if len(faces) == 0:
                return None
                
            x, y, w, h = faces[0]
            # Add 25% margin
            margin = 0.25
            x = max(0, x - int(w * margin / 2))
            y = max(0, y - int(h * margin / 2))
            w = min(img.shape[1] - x, w + int(w * margin))
            h = min(img.shape[0] - y, h + int(h * margin))
            
            face = img[y:y+h, x:x+w]
            face_rgb = cv2.cvtColor(cv2.resize(face, target_size), cv2.COLOR_BGR2RGB)
            return face_rgb
        
        except Exception as e:
            print(f"Face detection warning: {str(e)}")
            return None

# ========================
# 2. FACENET INTEGRATION
# ========================

class FaceNetWrapper:
    def __init__(self, pb_path):
        self.graph = self._load_graph(pb_path)
        self.sess = tf.compat.v1.Session(graph=self.graph)
        
        # Get tensors with proper error handling
        try:
            self.input_tensor = self.graph.get_tensor_by_name("input:0")
            self.embeddings_tensor = self.graph.get_tensor_by_name("embeddings:0")
            self.phase_train = self.graph.get_tensor_by_name("phase_train:0")
        except Exception as e:
            print(f"Error loading FaceNet tensors: {str(e)}")
            print("Available tensors:")
            for op in self.graph.get_operations():
                print(op.name)
            sys.exit(1)
    
    def _load_graph(self, pb_path):
        """Load FaceNet graph from protobuf file"""
        with tf.io.gfile.GFile(pb_path, "rb") as f:
            graph_def = tf.compat.v1.GraphDef()
            graph_def.ParseFromString(f.read())
        
        graph = tf.Graph()
        with graph.as_default():
            tf.import_graph_def(graph_def, name="")
        return graph
    
    def get_embedding(self, face_img):
        """Get face embedding with proper array handling"""
        if face_img is None:
            return None
        
        try:
            # Convert and normalize image
            face = (face_img.astype(np.float32) - 127.5) / 128.0
            if len(face.shape) == 3:
                face = np.expand_dims(face, axis=0)
            
            # Get embedding
            embedding = self.sess.run(
                self.embeddings_tensor,
                feed_dict={self.input_tensor: face, self.phase_train: False}
            )
            return embedding.flatten()
        except Exception as e:
            print(f"Embedding extraction error: {str(e)}")
            return None

# ========================
# 3. FACE COMPARISON
# ========================

class FaceComparisonSystem:
    def __init__(self, facenet_path, siamese_path=None):
        self.face_detector = FaceDetector()
        self.facenet = FaceNetWrapper(facenet_path)
        
        # Load Siamese model if provided
        self.siamese_model = None
        if siamese_path:
            self._load_siamese_model(siamese_path)
    
    def _load_siamese_model(self, model_path):
        """Load Siamese model with cosine similarity"""
        def cosine_similarity(tensors):
            x, y = tensors
            x_norm = tf.norm(x, axis=-1, keepdims=True)
            y_norm = tf.norm(y, axis=-1, keepdims=True)
            return tf.reduce_sum(x * y, axis=-1, keepdims=True) / (x_norm * y_norm + 1e-7)
        
        try:
            self.siamese_model = load_model(
                model_path,
                custom_objects={'cosine_similarity': cosine_similarity}
            )
        except Exception as e:
            print(f"Error loading Siamese model: {str(e)}")
            self.siamese_model = None
    
    def compare(self, img1_path, img2_path, threshold=0.5):
        """Compare two faces with proper array handling"""
        try:
            # Detect faces
            face1 = self.face_detector.detect(img1_path)
            face2 = self.face_detector.detect(img2_path)
            
            if face1 is None or face2 is None:
                print("Face detection failed for one or both images")
                return None, None
            
            # Get embeddings
            emb1 = self.facenet.get_embedding(face1)
            emb2 = self.facenet.get_embedding(face2)
            
            if emb1 is None or emb2 is None:
                print("Embedding extraction failed")
                return None, None
            
            # Normalize embeddings
            emb1_norm = (emb1 / np.linalg.norm(emb1)).reshape(1, -1)
            emb2_norm = (emb2 / np.linalg.norm(emb2)).reshape(1, -1)
            
            # Calculate similarity
            if self.siamese_model:
                similarity = self.siamese_model.predict([emb1_norm, emb2_norm], verbose=0)[0][0]
            else:
                # Fallback to direct cosine similarity
                similarity = np.dot(emb1_norm, emb2_norm.T)[0][0]
            
            return "MATCH" if similarity > threshold else "NO MATCH", float(similarity)
        
        except Exception as e:
            print(f"Comparison error: {str(e)}")
            return None, None

# ========================
# 4. MAIN EXECUTION
# ========================

if __name__ == "__main__":
    # Initialize system
    system = FaceComparisonSystem(
        facenet_path="/kaggle/input/facenet-model-1/20180402-114759.pb",
        siamese_path="best_siamese_model.keras" if os.path.exists("best_siamese_model.keras") else None
    )
    
    # Test images
    img1 = "/kaggle/input/dttatt/WIN_20250328_01_04_01_Pro.jpg"
    img2 = "/kaggle/input/trieu1/WIN_20241004_16_01_32_Pro.jpg"
    
    # Verify files exist
    for img in [img1, img2]:
        if not os.path.exists(img):
            print(f"Error: Image not found - {img}")
            sys.exit(1)
    
    # Compare faces
    result, score = system.compare(img1, img2, threshold=0.9)
    
    if result:
        print("\n" + "="*50)
        print(f"RESULT: {result}")
        print(f"SIMILARITY SCORE: {score:.4f}")
        print("="*50)
    else:
        print("\nFailed to compare faces. Please check the error messages.")
    
    # Clean up
    system.facenet.sess.close()