In [3]:
# Augmentation
# Facial Recognition System

import os
import cv2
import numpy as np
import torch
from facenet_pytorch import MTCNN, InceptionResnetV1
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, classification_report
from sklearn.preprocessing import LabelEncoder, label_binarize, StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle
import time
import gc
import psutil
from collections import defaultdict
import hashlib
import concurrent.futures
import imgaug.augmenters as iaa
from sklearn.utils.class_weight import compute_class_weight

# Augmentation pipeline
def get_augmentation_pipeline():
    return iaa.Sequential([
        iaa.Sometimes(0.7, iaa.Affine(
            scale={"x": (0.7, 1.3), "y": (0.7, 1.3)},
            translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
            rotate=(-45, 45),
            shear=(-15, 15),
            order=[0, 1],
            mode='reflect'
        )),
        iaa.Sometimes(0.7, iaa.OneOf([
            iaa.Multiply((0.5, 1.5)),
            iaa.LinearContrast((0.5, 1.5)),
            iaa.AddToHueAndSaturation((-30, 30))
        ])),
        iaa.Sometimes(0.5, iaa.AdditiveGaussianNoise(scale=(0, 0.1*255))),
        iaa.Sometimes(0.4, iaa.GaussianBlur(sigma=(0, 1.5))),
        iaa.Sometimes(0.3, iaa.PerspectiveTransform(scale=(0.05, 0.15))),
        iaa.Sometimes(0.4, iaa.Add((-50, 50))),
        iaa.Sometimes(0.5, iaa.CoarseDropout((0.0, 0.05), size_percent=(0.02, 0.25))),
    ])

# FACELOADING class
class FACELOADING:
    def __init__(self, directory, target_size=(160, 160), batch_size=32, max_workers=4):
        self.directory = directory
        self.target_size = target_size
        self.batch_size = batch_size
        self.max_workers = max_workers
        self.detector = None
        self.skipped_images = []
        self.processed_images = 0
        self.total_images = 0
        self.image_hashes = defaultdict(list)
        self.augmentation_pipeline = get_augmentation_pipeline()
        self.output_dir = "/Users/anantasingh/Desktop/advcslocal2/output/processed_images"
        os.makedirs(self.output_dir, exist_ok=True)

    def get_detector(self):
        if self.detector is None:
            self.detector = MTCNN(
                thresholds=[0.6, 0.7, 0.7],
                min_face_size=20,
                device=torch.device('cpu'),
                post_process=False,
                select_largest=True
            )
        return self.detector

    def compute_image_hash(self, img):
        small_img = cv2.resize(img, (32, 32))
        return hashlib.md5(small_img.tobytes()).hexdigest()

    def extract_face(self, filename):
        try:
            img = cv2.imread(filename)
            if img is None:
                self.skipped_images.append((filename, "Failed to load image", (0, 0)))
                return None
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            height, width = img.shape[:2]
            detector = self.get_detector()
            boxes, probs, landmarks = detector.detect(img, landmarks=True)
            if boxes is None or len(boxes) == 0:
                self.skipped_images.append((filename, "No face detected", (height, width)))
                return None
            x1, y1, x2, y2 = map(int, boxes[0])
            x1, x2 = max(0, x1), min(width, x2)
            y1, y2 = max(0, y1), min(height, y2)
            if x1 >= x2 or y1 >= y2:
                self.skipped_images.append((filename, "Invalid bounding box", (height, width)))
                return None
            face = img[y1:y2, x1:x2]
            if face.size == 0 or face.shape[0] == 0 or face.shape[1] == 0:
                self.skipped_images.append((filename, "Empty face after cropping", (height, width)))
                return None
            face = cv2.resize(face, self.target_size)
            img_hash = self.compute_image_hash(face)
            self.image_hashes[img_hash].append(filename)
            self.processed_images += 1
            return face
        except Exception as e:
            self.skipped_images.append((filename, f"Error: {str(e)}", (0, 0)))
            return None

    def augment_face(self, face, num_augmentations=5):
        augmented_faces = []
        for _ in range(num_augmentations):
            augmented = self.augmentation_pipeline(image=face)
            if augmented.mean() > 10:
                augmented_faces.append(augmented)
        return augmented_faces

    def process_image_batch(self, image_paths, class_name, augment=True, num_augmentations=5):
        results = []
        class_output_dir = os.path.join(self.output_dir, class_name)
        original_dir = os.path.join(class_output_dir, "original")
        augmented_dir = os.path.join(class_output_dir, "augmented")
        os.makedirs(original_dir, exist_ok=True)
        os.makedirs(augmented_dir, exist_ok=True)
        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            future_to_path = {executor.submit(self.extract_face, path): path for path in image_paths}
            for future in concurrent.futures.as_completed(future_to_path):
                path = future_to_path[future]
                filename = os.path.basename(path)
                face = future.result()
                if face is not None:
                    original_path = os.path.join(original_dir, filename)
                    cv2.imwrite(original_path, cv2.cvtColor(face, cv2.COLOR_RGB2BGR))
                    results.append((face, class_name, "original"))
                    if augment:
                        augmented_faces = self.augment_face(face, num_augmentations)
                        for i, aug_face in enumerate(augmented_faces):
                            aug_filename = f"{os.path.splitext(filename)[0]}_aug{i+1}{os.path.splitext(filename)[1]}"
                            aug_path = os.path.join(augmented_dir, aug_filename)
                            cv2.imwrite(aug_path, cv2.cvtColor(aug_face, cv2.COLOR_RGB2BGR))
                            results.append((aug_face, class_name, "augmented"))
        return results

    def load_faces(self, dir, class_name, augment=True, num_augmentations=5):
        image_paths = []
        for filename in os.listdir(dir):
            if not (filename.endswith('.jpg') or filename.endswith('.png') or filename.endswith('.jpeg')):
                continue
            self.total_images += 1
            path = os.path.join(dir, filename)
            image_paths.append(path)
        batch_size = self.batch_size
        face_data = []
        for i in range(0, len(image_paths), batch_size):
            batch_paths = image_paths[i:i+batch_size]
            batch_results = self.process_image_batch(batch_paths, class_name, augment, num_augmentations)
            face_data.extend(batch_results)
        faces = [item[0] for item in face_data]
        self._save_sample_images(face_data, class_name)
        return np.asarray(faces)

    def _save_sample_images(self, face_data, class_name):
        class_output_dir = os.path.join(self.output_dir, class_name)
        os.makedirs(class_output_dir, exist_ok=True)
        originals = [face for face, _, type_ in face_data if type_ == "original"]
        augmented = [face for face, _, type_ in face_data if type_ == "augmented"]
        if not originals or not augmented:
            return
        n_samples = min(3, len(originals))
        plt.figure(figsize=(15, 8))
        for i in range(n_samples):
            plt.subplot(n_samples, 3, i*3 + 1)
            plt.imshow(originals[i])
            plt.title(f"Original {i+1}")
            plt.axis('off')
            aug_indices = [j for j in range(len(augmented)) if j % len(originals) == i]
            if len(aug_indices) >= 2:
                plt.subplot(n_samples, 3, i*3 + 2)
                plt.imshow(augmented[aug_indices[0]])
                plt.title(f"Augmented {i+1}.1")
                plt.axis('off')
                plt.subplot(n_samples, 3, i*3 + 3)
                plt.imshow(augmented[aug_indices[1]])
                plt.title(f"Augmented {i+1}.2")
                plt.axis('off')
        plt.tight_layout()
        plt.savefig(f'{class_output_dir}/comparison.png')
        plt.close()

    def load_classes(self, augment=True, num_augmentations=5):
        X_original, Y_original = [], []
        class_counts = {}
        original_counts = {}
        augmented_counts = {}
        total_classes = sum(1 for item in os.listdir(self.directory) if os.path.isdir(os.path.join(self.directory, item)))
        face_data_per_class = defaultdict(list)
        
        #Original images without augmentation
        for i, sub_dir in enumerate(os.listdir(self.directory)):
            path = os.path.join(self.directory, sub_dir)
            if os.path.isdir(path):
                print(f"Processing class {i+1}/{total_classes}: {sub_dir} (Originals)")
                faces = self.load_faces(path, sub_dir, augment=False)
                if len(faces) == 0:
                    print(f"Warning: No faces found in class {sub_dir}")
                    continue
                class_output_dir = os.path.join(self.output_dir, sub_dir)
                original_dir = os.path.join(class_output_dir, "original")
                os.makedirs(original_dir, exist_ok=True)
                for j, face in enumerate(faces):
                    filename = f"original_{j+1}.jpg"
                    original_path = os.path.join(original_dir, filename)
                    cv2.imwrite(original_path, cv2.cvtColor(face, cv2.COLOR_RGB2BGR))
                    face_data_per_class[sub_dir].append((face, sub_dir, "original"))
                X_original.extend(faces)
                Y_original.extend([sub_dir] * len(faces))
        
        #Split original images into train and test sets
        X_train_orig, X_test_orig, Y_train_orig, Y_test_orig = train_test_split(
            X_original, Y_original, test_size=0.2, random_state=42, stratify=Y_original
        )
        print(f"Original images split: Train={len(X_train_orig)}, Test={len(X_test_orig)}")
        
        # Check for hash overlap between train and test
        train_hashes = [self.compute_image_hash(face) for face in X_train_orig]
        test_hashes = [self.compute_image_hash(face) for face in X_test_orig]
        common_hashes = set(train_hashes).intersection(set(test_hashes))
        if common_hashes:
            print(f"Warning: Found {len(common_hashes)} hashes in both train and test sets!")
            with open("/Users/anantasingh/Desktop/advcslocal2/output/common_hashes.txt", 'w') as f:
                f.write(str(common_hashes))
        
        #Augment train and test sets separately and save augmented images
        X, Y = [], []
        aug_counter = defaultdict(int)  # To track augmented image indices per class
        for split_name, faces, labels in [("Train", X_train_orig, Y_train_orig), ("Test", X_test_orig, Y_test_orig)]:
            print(f"Augmenting {split_name} set...")
            for face, label in zip(faces, labels):
                X.append(face)
                Y.append(label)
                face_data_per_class[label].append((face, label, "original"))  # Ensure originals are in face_data
                if augment:
                    augmented_faces = self.augment_face(face, num_augmentations)
                    class_output_dir = os.path.join(self.output_dir, label)
                    augmented_dir = os.path.join(class_output_dir, "augmented")
                    os.makedirs(augmented_dir, exist_ok=True)
                    for i, aug_face in enumerate(augmented_faces):
                        aug_counter[label] += 1
                        aug_filename = f"augmented_{aug_counter[label]}.jpg"
                        aug_path = os.path.join(augmented_dir, aug_filename)
                        cv2.imwrite(aug_path, cv2.cvtColor(aug_face, cv2.COLOR_RGB2BGR))
                        X.append(aug_face)
                        Y.append(label)
                        face_data_per_class[label].append((aug_face, label, "augmented"))
        
        # Save sample images for each class
        for class_name, face_data in face_data_per_class.items():
            self._save_sample_images(face_data, class_name)
        
        # Compute counts and summaries
        for sub_dir in set(Y):
            class_counts[sub_dir] = sum(1 for label in Y if label == sub_dir)
            original_dir = os.path.join(self.output_dir, sub_dir, "original")
            augmented_dir = os.path.join(self.output_dir, sub_dir, "augmented")
            original_count = len([f for f in os.listdir(original_dir) if f.endswith(('.jpg', '.png', '.jpeg'))])
            augmented_count = len([f for f in os.listdir(augmented_dir) if f.endswith(('.jpg', '.png', '.jpeg'))])
            original_counts[sub_dir] = original_count
            augmented_counts[sub_dir] = augmented_count
            print(f"Class {sub_dir}:")
            print(f"  - Original images: {original_count}")
            print(f"  - Augmented images: {augmented_count}")
            print(f"  - Total: {class_counts[sub_dir]}")
        
        print(f"\nSummary:")
        print(f"Total original images attempted: {self.total_images}")
        print(f"Original images successfully processed: {self.processed_images}")
        print(f"Images skipped: {len(self.skipped_images)}")
        print(f"Final dataset size (with augmentations): {len(X)}")
        print(f"Class distribution:")
        for cls, count in class_counts.items():
            print(f"  - {cls}: {count} images (Original: {original_counts[cls]}, Augmented: {augmented_counts[cls]})")
        
        # Check for duplicate hashes
        duplicate_hashes = {h: paths for h, paths in self.image_hashes.items() if len(paths) > 1}
        if duplicate_hashes:
            print("Warning: Detected duplicate image hashes (potential data leakage):")
            with open("/Users/anantasingh/Desktop/advcslocal2/output/duplicate_hashes.txt", 'w') as f:
                for h, paths in list(duplicate_hashes.items())[:5]:
                    print(f"  Hash {h}: {paths}")
                    f.write(f"Hash {h}: {paths}\n")
        if self.skipped_images:
            print("Skipped images (first 5):")
            for i, (path, reason, shape) in enumerate(self.skipped_images[:5]):
                print(f"  {i+1}. {path}: {reason} (Shape: {shape})")
            with open("/Users/anantasingh/Desktop/advcslocal2/output/skipped_images.txt", 'w') as f:
                for path, reason, shape in self.skipped_images:
                    f.write(f"{path}: {reason} (Shape: {shape})\n")
        with open("/Users/anantasingh/Desktop/advcslocal2/output/dataset_summary.txt", 'w') as f:
            f.write(f"Total original images attempted: {self.total_images}\n")
            f.write(f"Original images successfully processed: {self.processed_images}\n")
            f.write(f"Images skipped: {len(self.skipped_images)}\n")
            f.write(f"Final dataset size (with augmentations): {len(X)}\n")
            f.write("Class distribution:\n")
            for cls, count in class_counts.items():
                f.write(f"  - {cls}: {count} images (Original: {original_counts[cls]}, Augmented: {augmented_counts[cls]})\n")
        return np.asarray(X), np.asarray(Y)

    def plot_images(self, X, Y, n=10):
        plt.figure(figsize=(15, 3))
        for i in range(min(n, len(X))):
            plt.subplot(1, n, i+1)
            plt.imshow(X[i])
            plt.title(Y[i])
            plt.axis('off')
        plt.savefig("/Users/anantasingh/Desktop/advcslocal2/output/sample_images.png")
        plt.close()

# embedding computation
def get_embedding_batch(faces, embedder, batch_size=32):
    embeddings = []
    for i in tqdm(range(0, len(faces), batch_size), desc="Computing embeddings"):
        batch = faces[i:i+batch_size]
        batch = np.array(batch).astype('float32') / 255.0
        batch_tensor = torch.from_numpy(batch).permute(0, 3, 1, 2).to(torch.device('cpu'))
        with torch.no_grad():
            batch_embeddings = embedder(batch_tensor).cpu().numpy()
        batch_embeddings = batch_embeddings / np.linalg.norm(batch_embeddings, axis=1, keepdims=True)
        embeddings.extend(batch_embeddings)
        if i % (batch_size * 20) == 0 and i > 0:
            np.save("/Users/anantasingh/Desktop/advcslocal2/output/temp_embeddings.npy", np.array(embeddings))
        del batch_tensor
        torch.cuda.empty_cache()
    return np.array(embeddings)

# Main execution
if __name__ == "__main__":
    start_time = time.time()
    dataset_path = "/Users/anantasingh/Desktop/advcslocal2/celebrities"
    faceloading = FACELOADING(dataset_path, batch_size=32, max_workers=4)
    X, Y = faceloading.load_classes(augment=True, num_augmentations=5)
    faceloading.plot_images(X, Y, n=10)
    print(f"Dataset loading time: {time.time() - start_time:.2f} seconds")
    print(f"Total images (with augmentation): {len(X)}")
    start_time = time.time()
    device = torch.device('cpu')
    embedder = InceptionResnetV1(pretrained='vggface2', classify=False).eval().to(device)
    weights_path = "/Users/anantasingh/Desktop/advcslocal2/vggface2_weights.pt"
    if os.path.exists(weights_path):
        try:
            state_dict = torch.load(weights_path, map_location=device)
            state_dict = {k: v for k, v in state_dict.items() if not k.startswith('logits')}
            embedder.load_state_dict(state_dict, strict=False)
            print("Loaded pre-downloaded FaceNet weights")
        except RuntimeError as e:
            print(f"Error loading weights: {e}")
            print("Regenerating weights...")
            embedder = InceptionResnetV1(pretrained='vggface2', classify=False).eval().to(device)
            torch.save(embedder.state_dict(), weights_path)
    else:
        print("Weights not found, downloading...")
        embedder = InceptionResnetV1(pretrained='vggface2', classify=False).eval().to(device)
        torch.save(embedder.state_dict(), weights_path)
    embedding_cache = "/Users/anantasingh/Desktop/advcslocal2/output/faces_embeddings_full.npz"
    print(f"Starting embedding computation for {len(X)} images...")
    EMBEDDED_X = get_embedding_batch(X, embedder, batch_size=32)
    embedding_norms = np.linalg.norm(EMBEDDED_X, axis=1)
    print(f"Embedding norms: mean={np.mean(embedding_norms):.2f}, std={np.std(embedding_norms):.2f}")
    if np.any(embedding_norms < 0.1):
        print("Warning: Some embeddings have low norms")
    np.savez_compressed(embedding_cache, EMBEDDED_X=EMBEDDED_X, Y=Y)
    print(f"Embedding time: {time.time() - start_time:.2f} seconds")
    del embedder
    gc.collect()
    torch.cuda.empty_cache()
    scaler = StandardScaler()
    EMBEDDED_X = scaler.fit_transform(EMBEDDED_X)
    print("Standardized embeddings")
    start_time = time.time()
    encoder = LabelEncoder()
    Y_encoded = encoder.fit_transform(Y)
    print("Encoded labels")
    pca = PCA(n_components=30)
    EMBEDDED_X = pca.fit_transform(EMBEDDED_X)
    print(f"PCA reduced dimensions to: {EMBEDDED_X.shape[1]}")
    print(f"Explained variance ratio: {sum(pca.explained_variance_ratio_):.4f}")
    print(f"PCA time: {time.time() - start_time:.2f} seconds")
    pca_cache = "/Users/anantasingh/Desktop/advcslocal2/output/pca_transformed_full.npz"
    np.savez_compressed(pca_cache, EMBEDDED_X=EMBEDDED_X, Y_encoded=Y_encoded)
    start_time = time.time()
    X_train, X_test, Y_train, Y_test = train_test_split(
        EMBEDDED_X, Y_encoded, test_size=0.2, random_state=42, stratify=Y_encoded
    )
    print(f"Training samples: {len(X_train)}, Test samples: {len(X_test)}")
    print(f"Data split time: {time.time() - start_time:.2f} seconds")
    gc.collect()
    print(f"Memory usage before training: {psutil.virtual_memory().percent}%")

    # class weights
    class_weights = compute_class_weight('balanced', classes=np.unique(Y_encoded), y=Y_encoded)
    class_weight_dict = dict(enumerate(class_weights))

    # Train SVM
    start_time = time.time()
    print("Starting SVM training...")
    svm = SVC(probability=True, class_weight='balanced')
    svm_param_grid = {
        'C': [0.001, 0.01],
        'kernel': ['rbf', 'linear'],
        'gamma': ['scale', 'auto']
    }
    svm_grid = GridSearchCV(svm, svm_param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=2)
    svm_grid.fit(X_train, Y_train)
    best_svm = svm_grid.best_estimator_
    print(f"Best SVM parameters: {svm_grid.best_params_}")
    svm_scores = cross_val_score(best_svm, X_train, Y_train, cv=5)
    print(f"SVM Cross-validation accuracy: {svm_scores.mean():.2f} ± {svm_scores.std():.2f}")
    svm_y_pred = best_svm.predict(X_test)
    svm_test_accuracy = best_svm.score(X_test, Y_test)
    print(f"SVM Test accuracy: {svm_test_accuracy:.2f}")
    print("SVM Classification Report:")
    print(classification_report(Y_test, svm_y_pred, target_names=encoder.classes_))
    y_test_bin = label_binarize(Y_test, classes=range(len(encoder.classes_)))
    svm_y_score = best_svm.predict_proba(X_test)
    plt.figure(figsize=(8, 6))
    svm_eers = []
    for i, name in enumerate(encoder.classes_):
        fpr, tpr, thresholds = roc_curve(y_test_bin[:, i], svm_y_score[:, i])
        auc = roc_auc_score(y_test_bin[:, i], svm_y_score[:, i])
        plt.plot(fpr, tpr, label=f'{name} (AUC = {auc:.2f})')
        fnr = 1 - tpr
        eer_idx = np.nanargmin(np.abs(fpr - fnr))
        eer = fpr[eer_idx]
        svm_eers.append(eer)
        print(f"SVM EER for {name}: {eer:.4f}")
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('SVM ROC Curves')
    plt.legend()
    plt.savefig("/Users/anantasingh/Desktop/advcslocal2/output/svm_roc_curves.png")
    plt.close()
    print(f"SVM Average EER: {np.mean(svm_eers):.4f}")
    print(f"SVM training and evaluation time: {time.time() - start_time:.2f} seconds")

    # Train MLP
    start_time = time.time()
    print("Starting MLP training...")
    mlp = MLPClassifier(max_iter=2000, random_state=42)
    mlp_param_grid = {
        'hidden_layer_sizes': [(100,), (100, 50)],
        'learning_rate_init': [0.001, 0.01],
        'alpha': [0.001, 0.01]
    }
    mlp_grid = GridSearchCV(mlp, mlp_param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=2)
    mlp_grid.fit(X_train, Y_train)
    best_mlp = mlp_grid.best_estimator_
    print(f"Best MLP parameters: {mlp_grid.best_params_}")
    mlp_scores = cross_val_score(best_mlp, X_train, Y_train, cv=5)
    print(f"MLP Cross-validation accuracy: {mlp_scores.mean():.2f} ± {mlp_scores.std():.2f}")
    mlp_y_pred = best_mlp.predict(X_test)
    mlp_test_accuracy = best_mlp.score(X_test, Y_test)
    print(f"MLP Test accuracy: {mlp_test_accuracy:.2f}")
    print("MLP Classification Report:")
    print(classification_report(Y_test, mlp_y_pred, target_names=encoder.classes_))
    mlp_y_score = best_mlp.predict_proba(X_test)
    plt.figure(figsize=(8, 6))
    mlp_eers = []
    for i, name in enumerate(encoder.classes_):
        fpr, tpr, _ = roc_curve(y_test_bin[:, i], mlp_y_score[:, i])
        auc = roc_auc_score(y_test_bin[:, i], mlp_y_score[:, i])
        plt.plot(fpr, tpr, label=f'{name} (AUC = {auc:.2f})')
        fnr = 1 - tpr
        eer_idx = np.nanargmin(np.abs(fpr - fnr))
        eer = fpr[eer_idx]
        mlp_eers.append(eer)
        print(f"MLP EER for {name}: {eer:.4f}")
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('MLP ROC Curves')
    plt.legend()
    plt.savefig('/Users/anantasingh/Desktop/advcslocal2/output/mlp_roc.png')
    plt.close()
    print(f"MLP Average EER: {np.mean(mlp_eers):.4f}")
    print(f"MLP training and evaluation time: {time.time() - start_time:.2f} seconds")

    # Trained Random Forest
    start_time = time.time()
    print("Starting Random Forest training...")
    rf = RandomForestClassifier(max_depth=8, min_samples_split=5, class_weight='balanced', random_state=42)
    rf_param_grid = {'n_estimators': [100, 200]}
    rf_grid = GridSearchCV(rf, rf_param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=2)
    rf_grid.fit(X_train, Y_train)
    best_rf = rf_grid.best_estimator_
    print(f"Best Random Forest parameters: {rf_grid.best_params_}")
    rf_scores = cross_val_score(best_rf, X_train, Y_train, cv=5)
    print(f"Random Forest Cross-validation accuracy: {rf_scores.mean():.2f} ± {rf_scores.std():.2f}")
    rf_y_pred = best_rf.predict(X_test)
    rf_test_accuracy = best_rf.score(X_test, Y_test)
    print(f"Random Forest Test accuracy: {rf_test_accuracy:.2f}")
    print("Random Forest Classification Report:")
    print(classification_report(Y_test, rf_y_pred, target_names=encoder.classes_))
    rf_y_score = best_rf.predict_proba(X_test)
    plt.figure(figsize=(8, 6))
    rf_eers = []
    for i, name in enumerate(encoder.classes_):
        fpr, tpr, _ = roc_curve(y_test_bin[:, i], rf_y_score[:, i])
        auc = roc_auc_score(y_test_bin[:, i], rf_y_score[:, i])
        plt.plot(fpr, tpr, label=f'{name} (AUC = {auc:.2f})')
        fnr = 1 - tpr
        eer_idx = np.nanargmin(np.abs(fpr - fnr))
        eer = fpr[eer_idx]
        rf_eers.append(eer)
        print(f"Random Forest EER for {name}: {eer:.4f}")
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Random Forest ROC Curves')
    plt.legend()
    plt.savefig('/Users/anantasingh/Desktop/advcslocal2/output/rf_roc.png')
    plt.close()
    print(f"Random Forest Average EER: {np.mean(rf_eers):.4f}")
    print(f"Random Forest training and evaluation time: {time.time() - start_time:.2f} seconds")

    # Save models
    model_path = "/Users/anantasingh/Desktop/advcslocal2/output/face_models_full.pkl"
    with open(model_path, 'wb') as f:
        pickle.dump({
            'svm': best_svm,
            'mlp': best_mlp,
            'rf': best_rf,
            'pca': pca,
            'scaler': scaler,
            'encoder': encoder
        }, f)
    print(f"Models saved to {model_path}")

    # PCA variance
    plt.figure(figsize=(8, 6))
    plt.plot(np.cumsum(pca.explained_variance_ratio_))
    plt.xlabel('Number of Components')
    plt.ylabel('Cumulative Explained Variance')
    plt.title('PCA Explained Variance')
    plt.savefig('/Users/anantasingh/Desktop/advcslocal2/output/pca_variance.png')
    plt.close()

    # Confusion Matrices
    for name, y_pred, path in [
        ('SVM', svm_y_pred, '/Users/anantasingh/Desktop/advcslocal2/output/svm_cm.png'),
        ('MLP', mlp_y_pred, '/Users/anantasingh/Desktop/advcslocal2/output/mlp_cm.png'),
        ('Random Forest', rf_y_pred, '/Users/anantasingh/Desktop/advcslocal2/output/rf_cm.png')
    ]:
        cm = confusion_matrix(Y_test, y_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', xticklabels=encoder.classes_, yticklabels=encoder.classes_)
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.title(f'{name} Confusion Matrix')
        plt.savefig(path)
        plt.close()

    # t-SNE Visualization
    tsne = TSNE(n_components=2, random_state=42)
    X_tsne = tsne.fit_transform(EMBEDDED_X[:1000])
    plt.figure(figsize=(10, 8))
    for cls in np.unique(Y_encoded):
        idx = Y_encoded[:1000] == cls
        plt.scatter(X_tsne[idx, 0], X_tsne[idx, 1], label=encoder.classes_[cls], alpha=0.5)
    plt.legend()
    plt.title('t-SNE Visualization of Embeddings')
    plt.savefig('/Users/anantasingh/Desktop/advcslocal2/output/tsne.png')
    plt.close()

    # # Embedding Similarity Check
    # from sklearn.metrics.pairwise import cosine_similarity
    # sim_matrix = cosine_similarity(EMBEDDED_X[:1000])
    # intra_class_sim = []
    # inter_class_sim = []
    # for i in range(1000):
    #     for j in range(i + 1, 1000):
    #         if Y_encoded[i] == Y_encoded[j]:
    #             intra_class_sim.append(sim_matrix[i, j])
    #         else:
    #             inter_class_sim.append(sim_matrix[i, j])
    # print(f"Intra-class similarity: mean={np.mean(intra_class_sim):.4f}, std={np.std(intra_class_sim):.4f}")
    # print(f"Inter-class similarity: mean={np.mean(inter_class_sim):.4f}, std={np.std(inter_class_sim):.4f}")


Processing class 1/6: Lindsay_Lohan (Originals)
Processing class 2/6: Tom_Cruise (Originals)
Processing class 3/6: Leonardo_DiCaprio (Originals)
Processing class 4/6: Daniel_Radcliffe (Originals)
Processing class 5/6: Orlando_Bloom (Originals)
Processing class 6/6: Miley_Cyrus (Originals)
Original images split: Train=1333, Test=334
Augmenting Train set...
Augmenting Test set...
Class Miley_Cyrus:
  - Original images: 524
  - Augmented images: 1310
  - Total: 1572
Class Daniel_Radcliffe:
  - Original images: 528
  - Augmented images: 1320
  - Total: 1584
Class Tom_Cruise:
  - Original images: 414
  - Augmented images: 1034
  - Total: 1241
Class Lindsay_Lohan:
  - Original images: 746
  - Augmented images: 1865
  - Total: 2238
Class Leonardo_DiCaprio:
  - Original images: 422
  - Augmented images: 1055
  - Total: 1266
Class Orlando_Bloom:
  - Original images: 700
  - Augmented images: 1750
  - Total: 2100

Summary:
Total original images attempted: 1738
Original images successfully proces

Computing embeddings: 100%|███████████████████| 313/313 [09:17<00:00,  1.78s/it]


Embedding norms: mean=1.00, std=0.00
Embedding time: 558.24 seconds
Standardized embeddings
Encoded labels
PCA reduced dimensions to: 30
Explained variance ratio: 0.8454
PCA time: 0.06 seconds
Training samples: 8000, Test samples: 2001
Data split time: 0.00 seconds
Memory usage before training: 80.1%
Starting SVM training...
Fitting 5 folds for each of 8 candidates, totalling 40 fits
Best SVM parameters: {'C': 0.001, 'gamma': 'scale', 'kernel': 'linear'}
SVM Cross-validation accuracy: 0.86 ± 0.01
SVM Test accuracy: 0.87
SVM Classification Report:
                   precision    recall  f1-score   support

 Daniel_Radcliffe       0.80      0.86      0.83       317
Leonardo_DiCaprio       0.87      0.86      0.87       253
    Lindsay_Lohan       0.87      0.87      0.87       448
      Miley_Cyrus       0.89      0.84      0.86       315
    Orlando_Bloom       0.96      0.88      0.91       420
       Tom_Cruise       0.80      0.90      0.85       248

         accuracy               

  arr = np.array(param_list)


Best MLP parameters: {'alpha': 0.01, 'hidden_layer_sizes': (100, 50), 'learning_rate_init': 0.01}
MLP Cross-validation accuracy: 0.85 ± 0.01
MLP Test accuracy: 0.85
MLP Classification Report:
                   precision    recall  f1-score   support

 Daniel_Radcliffe       0.83      0.82      0.82       317
Leonardo_DiCaprio       0.85      0.83      0.84       253
    Lindsay_Lohan       0.88      0.83      0.86       448
      Miley_Cyrus       0.81      0.83      0.82       315
    Orlando_Bloom       0.86      0.90      0.88       420
       Tom_Cruise       0.84      0.86      0.85       248

         accuracy                           0.85      2001
        macro avg       0.84      0.84      0.84      2001
     weighted avg       0.85      0.85      0.85      2001

MLP EER for Daniel_Radcliffe: 0.0950
MLP EER for Leonardo_DiCaprio: 0.0967
MLP EER for Lindsay_Lohan: 0.0959
MLP EER for Miley_Cyrus: 0.0943
MLP EER for Orlando_Bloom: 0.0677
MLP EER for Tom_Cruise: 0.0696
MLP Avera

In [5]:
#Deepfake Generation InsightFace

import os
import cv2
import numpy as np
import random
import insightface
from insightface.app import FaceAnalysis

# Directory for PubFig dataset
pubfig_dir = "/Users/anantasingh/Desktop/advcslocal2/celebrities"
insightface_output_dir = "/Users/anantasingh/Desktop/advcslocal2/output/deepfakes"

# Initialized InsightFace with CoreML for Metal GPU support, fallback to CPU
app = FaceAnalysis(name='buffalo_l', providers=['CoreMLExecutionProvider', 'CPUExecutionProvider'])
app.prepare(ctx_id=-1, det_size=(320, 320), det_thresh=0.3)  # ctx_id=-1 for GPU (CoreML/Metal)
swapper = insightface.model_zoo.get_model('/Users/anantasingh/Desktop/advcslocal2/models/inswapper_128.onnx', download=False)

# Function to get a valid image with detected face
def get_valid_image(celeb_dir, max_attempts=10):
    files = [f for f in os.listdir(celeb_dir) if f.endswith('.jpg')]
    random.shuffle(files)
    for i, filename in enumerate(files[:max_attempts]):
        img_path = os.path.join(celeb_dir, filename)
        img = cv2.imread(img_path)
        if img is None:
            print(f"Failed to load: {img_path}")
            continue
        faces = app.get(img)
        if len(faces) > 0:
            return img_path, img, faces[0]
        print(f"No faces detected in: {img_path}")
    print(f"Failed to find a valid image with a face in {celeb_dir} after {max_attempts} attempts")
    return None, None, None

# Generates a single deepfake
def generate_insightface_deepfake(real_img, real_face, target_img, target_face, output_dir, real_name, target_name, pair_name, counter):
    os.makedirs(output_dir, exist_ok=True)
    real_ = real_img.copy()
    # Generate and save the deepfake with real celebrity's face on target body
    real_ = swapper.get(real_, real_face, target_face, paste_back=True)
    real_celeb_name = os.path.basename(os.path.dirname(real_name))
    target_celeb_name = os.path.basename(os.path.dirname(target_name))
    swapped_real_path = os.path.join(output_dir, f"{pair_name}_{real_celeb_name}_swapped_with_{target_celeb_name}_{counter}.jpg")
    cv2.imwrite(swapped_real_path, real_)
    print(f"Saved deepfake: {swapped_real_path}")

# Generates multiple deepfakes for a pair
def generate_multiple_deepfakes(celeb1_dir, celeb2_dir, output_dir, pair_name, num_deepfakes=10):
    celeb1_name = os.path.basename(celeb1_dir)
    celeb2_name = os.path.basename(celeb2_dir)
    celeb1_output_dir = os.path.join(output_dir, celeb1_name)
    celeb2_output_dir = os.path.join(output_dir, celeb2_name)
    os.makedirs(celeb1_output_dir, exist_ok=True)
    os.makedirs(celeb2_output_dir, exist_ok=True)
    successful_deepfakes = 0
    attempts = 0
    max_attempts = num_deepfakes * 2  # Allow extra attempts
    counter = 0  # Unique counter for filenames
    while successful_deepfakes < num_deepfakes and attempts < max_attempts:
        real_path1, real_img1, real_face1 = get_valid_image(celeb1_dir)
        target_path2, target_img2, target_face2 = get_valid_image(celeb2_dir)
        real_path2, real_img2, real_face2 = get_valid_image(celeb2_dir)
        target_path1, target_img1, target_face1 = get_valid_image(celeb1_dir)
        if (real_img1 is None or target_img2 is None or real_img2 is None or target_img1 is None):
            print(f"Attempt {attempts + 1}: Skipping due to invalid images for {pair_name}")
            attempts += 1
            continue
        print(f"Attempt {attempts + 1}: Generating deepfake for {real_path1} -> {target_path2}")
        generate_insightface_deepfake(
            real_img1, real_face1, target_img2, target_face2,
            celeb2_output_dir, real_path1, target_path2, pair_name, counter
        )
        counter += 1
        print(f"Attempt {attempts + 1}: Generating deepfake for {real_path2} -> {target_path1}")
        generate_insightface_deepfake(
            real_img2, real_face2, target_img1, target_face1,
            celeb1_output_dir, real_path2, target_path1, pair_name, counter
        )
        counter += 1
        successful_deepfakes += 1
        attempts += 1
    print(f"Generated {successful_deepfakes} deepfakes for {pair_name}")

# Define all desired pairs
pairs = [
    ("Leonardo_DiCaprio", "Lindsay_Lohan", "leo_lindsay_pair"),
    ("Orlando_Bloom", "Tom_Cruise", "orlando_tom_pair")
]

# Generates deepfakes for all pairs
for celeb1_name, celeb2_name, pair_name in pairs:
    celeb1_dir = os.path.join(pubfig_dir, celeb1_name)
    celeb2_dir = os.path.join(pubfig_dir, celeb2_name)
    if os.path.isdir(celeb1_dir) and os.path.isdir(celeb2_dir):
        print(f"Generating deepfakes for {pair_name} ({celeb1_name} and {celeb2_name})")
        generate_multiple_deepfakes(celeb1_dir, celeb2_dir, insightface_output_dir, pair_name, num_deepfakes=10)
    else:
        print(f"Directory not found for {celeb1_name} or {celeb2_name}")

Applied providers: ['CoreMLExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CoreMLExecutionProvider': {}}
find model: /Users/anantasingh/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CoreMLExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CoreMLExecutionProvider': {}}
find model: /Users/anantasingh/.insightface/models/buffalo_l/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CoreMLExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CoreMLExecutionProvider': {}}
find model: /Users/anantasingh/.insightface/models/buffalo_l/det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CoreMLExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CoreMLExecutionProvider': {}}
find model: /Users/anantasingh/.insightface/models/buffalo_l/gen



Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
inswapper-shape: [1, 3, 128, 128]
Generating deepfakes for leo_lindsay_pair (Leonardo_DiCaprio and Lindsay_Lohan)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Attempt 1: Generating deepfake for /Users/anantasingh/Desktop/advcslocal2/celebrities/Leonardo_DiCaprio/73.jpg -> /Users/anantasingh/Desktop/advcslocal2/celebrities/Lindsay_Lohan/186.jpg
Saved deepfake: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Lindsay_Lohan/leo_lindsay_pair_Leonardo_DiCaprio_swapped_with_Lindsay_Lohan_0.jpg
Attempt 1: Generating deepfake for /Users/anantasingh/Desktop/advcslocal2/celebrities/Lindsay_Lohan/333.jpg -> /Users/anantasingh/Desktop/advcslocal2/celebrities/Leonardo_DiCaprio/129.jpg
Saved deepfake: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Leonardo_DiCaprio/leo_lindsay_pair_Lindsay_Lohan_swapped_with_Leonardo_DiCaprio_1.jpg
Attempt 2: Generating deepfake for /Users/anantasingh/Desktop/advcslocal2/celebrities/Leonardo_DiCaprio/123.jpg -> /Users/anantasingh/Desktop/advcslocal2/celebrities/Lindsay_Lohan/122.jpg
Saved deepfake: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Lindsay_Lohan/leo_lindsay_pair_Leonardo_DiCaprio_swap

In [6]:
#Testing Previously Trained Model with Generated Deepfakes SVM

import pickle
import os
import cv2
import numpy as np
import torch
from facenet_pytorch import InceptionResnetV1
import insightface
from insightface.app import FaceAnalysis
from sklearn.metrics import confusion_matrix, roc_curve, auc
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
import seaborn as sns

# Loading the trained SVM model
model_path = "/Users/anantasingh/Desktop/advcslocal2/output/face_models_full.pkl"
with open(model_path, 'rb') as f:
    models = pickle.load(f)
svm = models['svm']
pca = models['pca']
scaler = models['scaler']
encoder = models['encoder']

# Function to extract face and predict
def predict_face(image_path, app):
    img = cv2.imread(image_path)
    if img is None:
        print(f"Failed to load: {image_path}")
        return None
    faces = app.get(img)
    if len(faces) == 0:
        print(f"No face detected in: {image_path}")
        return None
    face = cv2.resize(img, (160, 160))  # Match FR input size
    face = face.astype('float32') / 255.0
    face_tensor = torch.from_numpy(face).permute(2, 0, 1).unsqueeze(0).to(torch.device('cpu'))
    with torch.no_grad():
        embedder = InceptionResnetV1(pretrained='vggface2', classify=False).eval().to(torch.device('cpu'))
        embedding = embedder(face_tensor).cpu().numpy()
    embedding = embedding / np.linalg.norm(embedding, axis=1, keepdims=True)
    embedding = scaler.transform(embedding)
    embedding = pca.transform(embedding)
    prediction = svm.predict(embedding)
    return encoder.inverse_transform(prediction)[0]

# Initialized InsightFace for face detection in deepfakes
app = FaceAnalysis(name='buffalo_l', providers=['CPUExecutionProvider'])
app.prepare(ctx_id=-1, det_size=(320, 320), det_thresh=0.3)

# Directory for deepfake images
deepfake_dir = "/Users/anantasingh/Desktop/advcslocal2/output/deepfakes"

# Test deepfake images
results = []
for celeb1 in os.listdir(deepfake_dir):
    celeb1_dir = os.path.join(deepfake_dir, celeb1)
    if os.path.isdir(celeb1_dir):
        for celeb2 in os.listdir(celeb1_dir):
            if os.path.isfile(os.path.join(celeb1_dir, celeb2)) and celeb2.endswith('.jpg'):
                # true_label = celeb2.split('_swapped_with_')[1].split('.')[0]  # Extract target celeb name
                true_label = celeb2.split('_swapped_with_')[1].split('.')[0]  # Get 'Lindsay_Lohan_0'
                true_label = '_'.join(true_label.split('_')[:-1])  # Remove the counter, get 'Lindsay_Lohan'
                predicted_label = predict_face(os.path.join(celeb1_dir, celeb2), app)
                if predicted_label is not None:
                    results.append({
                        'image_path': os.path.join(celeb1_dir, celeb2),
                        'true_label': true_label,
                        'predicted_label': predicted_label,
                        'correct': true_label == predicted_label
                    })
                    print(f"Image: {os.path.join(celeb1_dir, celeb2)} | True: {true_label} | Predicted: {predicted_label} | Correct: {true_label == predicted_label}")

# Calculate metrics
total_deepfakes = len(results)
correct_predictions = sum(1 for r in results if r['correct'])
misidentification_rate = 1 - (correct_predictions / total_deepfakes) if total_deepfakes > 0 else 0
print(f"Total deepfake images tested: {total_deepfakes}")
print(f"Correct predictions: {correct_predictions}")
print(f"Misidentification rate: {misidentification_rate:.2f}")

# Visualize results
import matplotlib.pyplot as plt
import seaborn as sns

# Confusion Matrix for deepfake misidentification
true_labels = [r['true_label'] for r in results]
pred_labels = [r['predicted_label'] for r in results]
cm = confusion_matrix(true_labels, pred_labels, labels=encoder.classes_)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=encoder.classes_, yticklabels=encoder.classes_, cmap='Blues')
plt.title('Deepfake Misidentification Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.savefig('/Users/anantasingh/Desktop/advcslocal2/output/deepfake_confusion_matrix.png')
plt.close()

# Bar plot of misidentification rate per class
misid_per_class = {}
for label in encoder.classes_:
    true_count = sum(1 for r in results if r['true_label'] == label)
    incorrect_count = sum(1 for r in results if r['true_label'] == label and not r['correct'])
    misid_per_class[label] = incorrect_count / true_count if true_count > 0 else 0
plt.figure(figsize=(10, 6))
plt.bar(misid_per_class.keys(), misid_per_class.values())
plt.title('Misidentification Rate per Class for Deepfakes')
plt.xlabel('Class')
plt.ylabel('Misidentification Rate')
plt.xticks(rotation=45)
plt.savefig('/Users/anantasingh/Desktop/advcslocal2/output/deepfake_misid_rate.png')
plt.close()

# ROC-like curve (approximated using misclassification)
from sklearn.metrics import roc_curve, auc
y_test_bin = label_binarize([r['true_label'] for r in results], classes=encoder.classes_)
y_score = label_binarize([r['predicted_label'] for r in results], classes=encoder.classes_)
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(encoder.classes_)):
    fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])
plt.figure(figsize=(8, 6))
for i, label in enumerate(encoder.classes_):
    plt.plot(fpr[i], tpr[i], label=f'{label} (AUC = {roc_auc[i]:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.title('ROC Curves for Deepfake Misidentification')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.savefig('/Users/anantasingh/Desktop/advcslocal2/output/deepfake_roc_curves.png')
plt.close()

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/anantasingh/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/anantasingh/.insightface/models/buffalo_l/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/anantasingh/.insightface/models/buffalo_l/det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/anantasingh/.insightface/models/buffalo_l/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/anantasingh/.insightface/models/buffalo_l/w600k_r50.onnx recognition ['None

  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Lindsay_Lohan/leo_lindsay_pair_Leonardo_DiCaprio_swapped_with_Lindsay_Lohan_18.jpg | True: Lindsay_Lohan | Predicted: Lindsay_Lohan | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Lindsay_Lohan/leo_lindsay_pair_Leonardo_DiCaprio_swapped_with_Lindsay_Lohan_6.jpg | True: Lindsay_Lohan | Predicted: Lindsay_Lohan | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Lindsay_Lohan/leo_lindsay_pair_Leonardo_DiCaprio_swapped_with_Lindsay_Lohan_14.jpg | True: Lindsay_Lohan | Predicted: Lindsay_Lohan | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Lindsay_Lohan/leo_lindsay_pair_Leonardo_DiCaprio_swapped_with_Lindsay_Lohan_16.jpg | True: Lindsay_Lohan | Predicted: Lindsay_Lohan | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Lindsay_Lohan/leo_lindsay_pair_Leonardo_DiCaprio_swapped_with_Lindsay_Lohan_4.jpg | True: Lindsay_Lohan | Predicted: Lindsay_Lohan | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Lindsay_Lohan/leo_lindsay_pair_Leonardo_DiCaprio_swapped_with_Lindsay_Lohan_0.jpg | True: Lindsay_Lohan | Predicted: Lindsay_Lohan | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Lindsay_Lohan/leo_lindsay_pair_Leonardo_DiCaprio_swapped_with_Lindsay_Lohan_12.jpg | True: Lindsay_Lohan | Predicted: Lindsay_Lohan | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Lindsay_Lohan/leo_lindsay_pair_Leonardo_DiCaprio_swapped_with_Lindsay_Lohan_10.jpg | True: Lindsay_Lohan | Predicted: Lindsay_Lohan | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Lindsay_Lohan/leo_lindsay_pair_Leonardo_DiCaprio_swapped_with_Lindsay_Lohan_2.jpg | True: Lindsay_Lohan | Predicted: Lindsay_Lohan | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Tom_Cruise/orlando_tom_pair_Orlando_Bloom_swapped_with_Tom_Cruise_8.jpg | True: Tom_Cruise | Predicted: Tom_Cruise | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Tom_Cruise/orlando_tom_pair_Orlando_Bloom_swapped_with_Tom_Cruise_18.jpg | True: Tom_Cruise | Predicted: Tom_Cruise | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Tom_Cruise/orlando_tom_pair_Orlando_Bloom_swapped_with_Tom_Cruise_14.jpg | True: Tom_Cruise | Predicted: Tom_Cruise | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Tom_Cruise/orlando_tom_pair_Orlando_Bloom_swapped_with_Tom_Cruise_6.jpg | True: Tom_Cruise | Predicted: Tom_Cruise | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Tom_Cruise/orlando_tom_pair_Orlando_Bloom_swapped_with_Tom_Cruise_4.jpg | True: Tom_Cruise | Predicted: Tom_Cruise | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Tom_Cruise/orlando_tom_pair_Orlando_Bloom_swapped_with_Tom_Cruise_16.jpg | True: Tom_Cruise | Predicted: Tom_Cruise | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Tom_Cruise/orlando_tom_pair_Orlando_Bloom_swapped_with_Tom_Cruise_12.jpg | True: Tom_Cruise | Predicted: Tom_Cruise | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Tom_Cruise/orlando_tom_pair_Orlando_Bloom_swapped_with_Tom_Cruise_0.jpg | True: Tom_Cruise | Predicted: Tom_Cruise | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Tom_Cruise/orlando_tom_pair_Orlando_Bloom_swapped_with_Tom_Cruise_2.jpg | True: Tom_Cruise | Predicted: Tom_Cruise | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Tom_Cruise/orlando_tom_pair_Orlando_Bloom_swapped_with_Tom_Cruise_10.jpg | True: Tom_Cruise | Predicted: Tom_Cruise | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Leonardo_DiCaprio/leo_lindsay_pair_Lindsay_Lohan_swapped_with_Leonardo_DiCaprio_1.jpg | True: Leonardo_DiCaprio | Predicted: Leonardo_DiCaprio | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Leonardo_DiCaprio/leo_lindsay_pair_Lindsay_Lohan_swapped_with_Leonardo_DiCaprio_15.jpg | True: Leonardo_DiCaprio | Predicted: Leonardo_DiCaprio | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Leonardo_DiCaprio/leo_lindsay_pair_Lindsay_Lohan_swapped_with_Leonardo_DiCaprio_17.jpg | True: Leonardo_DiCaprio | Predicted: Leonardo_DiCaprio | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Leonardo_DiCaprio/leo_lindsay_pair_Lindsay_Lohan_swapped_with_Leonardo_DiCaprio_3.jpg | True: Leonardo_DiCaprio | Predicted: Leonardo_DiCaprio | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Leonardo_DiCaprio/leo_lindsay_pair_Lindsay_Lohan_swapped_with_Leonardo_DiCaprio_7.jpg | True: Leonardo_DiCaprio | Predicted: Leonardo_DiCaprio | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Leonardo_DiCaprio/leo_lindsay_pair_Lindsay_Lohan_swapped_with_Leonardo_DiCaprio_13.jpg | True: Leonardo_DiCaprio | Predicted: Leonardo_DiCaprio | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Leonardo_DiCaprio/leo_lindsay_pair_Lindsay_Lohan_swapped_with_Leonardo_DiCaprio_11.jpg | True: Leonardo_DiCaprio | Predicted: Miley_Cyrus | Correct: False


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Leonardo_DiCaprio/leo_lindsay_pair_Lindsay_Lohan_swapped_with_Leonardo_DiCaprio_5.jpg | True: Leonardo_DiCaprio | Predicted: Leonardo_DiCaprio | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Leonardo_DiCaprio/leo_lindsay_pair_Lindsay_Lohan_swapped_with_Leonardo_DiCaprio_9.jpg | True: Leonardo_DiCaprio | Predicted: Leonardo_DiCaprio | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Leonardo_DiCaprio/leo_lindsay_pair_Lindsay_Lohan_swapped_with_Leonardo_DiCaprio_19.jpg | True: Leonardo_DiCaprio | Predicted: Leonardo_DiCaprio | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Orlando_Bloom/orlando_tom_pair_Tom_Cruise_swapped_with_Orlando_Bloom_7.jpg | True: Orlando_Bloom | Predicted: Orlando_Bloom | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Orlando_Bloom/orlando_tom_pair_Tom_Cruise_swapped_with_Orlando_Bloom_5.jpg | True: Orlando_Bloom | Predicted: Orlando_Bloom | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Orlando_Bloom/orlando_tom_pair_Tom_Cruise_swapped_with_Orlando_Bloom_19.jpg | True: Orlando_Bloom | Predicted: Orlando_Bloom | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Orlando_Bloom/orlando_tom_pair_Tom_Cruise_swapped_with_Orlando_Bloom_1.jpg | True: Orlando_Bloom | Predicted: Orlando_Bloom | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Orlando_Bloom/orlando_tom_pair_Tom_Cruise_swapped_with_Orlando_Bloom_3.jpg | True: Orlando_Bloom | Predicted: Orlando_Bloom | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Orlando_Bloom/orlando_tom_pair_Tom_Cruise_swapped_with_Orlando_Bloom_13.jpg | True: Orlando_Bloom | Predicted: Orlando_Bloom | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Orlando_Bloom/orlando_tom_pair_Tom_Cruise_swapped_with_Orlando_Bloom_11.jpg | True: Orlando_Bloom | Predicted: Orlando_Bloom | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Orlando_Bloom/orlando_tom_pair_Tom_Cruise_swapped_with_Orlando_Bloom_9.jpg | True: Orlando_Bloom | Predicted: Orlando_Bloom | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Orlando_Bloom/orlando_tom_pair_Tom_Cruise_swapped_with_Orlando_Bloom_15.jpg | True: Orlando_Bloom | Predicted: Orlando_Bloom | Correct: True


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Image: /Users/anantasingh/Desktop/advcslocal2/output/deepfakes/Orlando_Bloom/orlando_tom_pair_Tom_Cruise_swapped_with_Orlando_Bloom_17.jpg | True: Orlando_Bloom | Predicted: Orlando_Bloom | Correct: True
Total deepfake images tested: 40
Correct predictions: 39
Misidentification rate: 0.03


