In [2]:
#!/usr/bin/env python3
"""
Enhanced Facial Recognition Model Trainer with Dataset Download
Downloads facial datasets and trains recognition models with ontology integration
"""

import os
import cv2
import numpy as np
import pickle
import json
import requests
import zipfile
import tarfile
from datetime import datetime
from pathlib import Path
import logging
from typing import Dict, List, Tuple, Optional
from urllib.parse import urlparse
import shutil

# ML Libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Image processing
try:
    from skimage.feature import hog
    SKIMAGE_AVAILABLE = True
except ImportError:
    print("Warning: scikit-image not available. Install with: pip install scikit-image")
    SKIMAGE_AVAILABLE = False

# Ontology libraries
try:
    from rdflib import Graph, Namespace, RDF, RDFS, Literal
    from rdflib.namespace import XSD
    ONTOLOGY_AVAILABLE = True
except ImportError:
    print("Warning: rdflib not available. Install with: pip install rdflib")
    ONTOLOGY_AVAILABLE = False

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


class DatasetDownloader:
    """Downloads and manages facial recognition datasets"""
    
    def __init__(self, download_dir: str = "datasets"):
        self.download_dir = Path(download_dir)
        self.download_dir.mkdir(exist_ok=True)
        
        # Popular facial recognition datasets
        self.datasets = {
            "lfw": {
                "name": "Labeled Faces in the Wild (LFW)",
                "url": "http://vis-www.cs.umass.edu/lfw/lfw.tgz",
                "description": "Natural face images dataset with 13,233 images of 5,749 people"
            },
            "att": {
                "name": "AT&T Database of Faces (ORL)",
                "url": "https://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html",
                "description": "40 subjects, 10 images each, controlled conditions"
            },
            "yale": {
                "name": "Yale Face Database",
                "url": "http://vision.ucsd.edu/content/yale-face-database",
                "description": "15 subjects, various expressions and lighting"
            }
        }
    
    def create_sample_dataset(self, num_people: int = 5, images_per_person: int = 10):
        """Create a sample dataset using webcam or synthetic data"""
        sample_dir = self.download_dir / "sample_dataset"
        sample_dir.mkdir(exist_ok=True)
        
        logger.info(f"Creating sample dataset with {num_people} people...")
        
        # Create sample data structure
        people_names = [f"person_{i+1:02d}" for i in range(num_people)]
        
        for person_name in people_names:
            person_dir = sample_dir / person_name
            person_dir.mkdir(exist_ok=True)
            
            # Generate synthetic face images (colored rectangles as placeholders)
            for img_idx in range(images_per_person):
                # Create a synthetic face image
                img = self._generate_synthetic_face(person_name, img_idx)
                img_path = person_dir / f"{person_name}_{img_idx:03d}.jpg"
                cv2.imwrite(str(img_path), img)
        
        logger.info(f"Sample dataset created at: {sample_dir}")
        return str(sample_dir)
    
    def _generate_synthetic_face(self, person_name: str, img_idx: int) -> np.ndarray:
        """Generate a synthetic face image for testing"""
        # Create a 128x128 colored image
        img = np.zeros((128, 128, 3), dtype=np.uint8)
        
        # Use person name hash to generate consistent colors
        person_hash = hash(person_name) % 256
        base_color = (person_hash, (person_hash + 50) % 256, (person_hash + 100) % 256)
        
        # Add some variation based on image index
        variation = img_idx * 10
        color = tuple((c + variation) % 256 for c in base_color)
        
        # Draw a face-like shape
        center = (64, 64)
        # Face (circle)
        cv2.circle(img, center, 50, color, -1)
        # Eyes (smaller circles)
        cv2.circle(img, (50, 50), 8, (0, 0, 0), -1)
        cv2.circle(img, (78, 50), 8, (0, 0, 0), -1)
        # Nose (triangle)
        pts = np.array([[64, 60], [60, 75], [68, 75]], np.int32)
        cv2.fillPoly(img, [pts], (50, 50, 50))
        # Mouth (ellipse)
        cv2.ellipse(img, (64, 85), (15, 8), 0, 0, 180, (0, 0, 0), -1)
        
        # Add some noise for variation
        noise = np.random.randint(0, 30, img.shape, dtype=np.uint8)
        img = cv2.add(img, noise)
        
        return img
    
    def download_lfw_subset(self, max_people: int = 20):
        """Download a subset of LFW dataset"""
        lfw_dir = self.download_dir / "lfw_subset"
        lfw_dir.mkdir(exist_ok=True)
        
        try:
            logger.info("Downloading LFW dataset subset...")
            
            # For demo purposes, we'll create a structured subset
            # In a real implementation, you would download from the actual LFW dataset
            
            # Create some realistic person names
            person_names = [
                "Abraham_Lincoln", "Albert_Einstein", "Barack_Obama", "Bill_Gates",
                "Elon_Musk", "Hillary_Clinton", "Joe_Biden", "Mark_Zuckerberg",
                "Oprah_Winfrey", "Steve_Jobs", "Taylor_Swift", "Tom_Cruise",
                "Will_Smith", "Angelina_Jolie", "Brad_Pitt", "Jennifer_Lawrence",
                "Leonardo_DiCaprio", "Robert_Downey_Jr", "Scarlett_Johansson", "Chris_Evans"
            ]
            
            for i, person_name in enumerate(person_names[:max_people]):
                person_dir = lfw_dir / person_name
                person_dir.mkdir(exist_ok=True)
                
                # Generate multiple images per person
                num_images = np.random.randint(5, 15)
                for j in range(num_images):
                    img = self._generate_realistic_face(person_name, j)
                    img_path = person_dir / f"{person_name}_{j+1:04d}.jpg"
                    cv2.imwrite(str(img_path), img)
            
            logger.info(f"LFW subset created at: {lfw_dir}")
            return str(lfw_dir)
            
        except Exception as e:
            logger.error(f"Error creating LFW subset: {e}")
            return None
    
    def _generate_realistic_face(self, person_name: str, img_idx: int) -> np.ndarray:
        """Generate more realistic synthetic face images"""
        # Create a 128x128 image with skin-like color
        img = np.ones((128, 128, 3), dtype=np.uint8) * 220
        
        # Use person name to generate consistent features
        person_hash = hash(person_name)
        np.random.seed(person_hash + img_idx)
        
        # Add skin tone variation
        skin_tone = np.random.randint(180, 255)
        img[:, :] = (skin_tone - 40, skin_tone - 20, skin_tone)
        
        # Face shape (oval)
        center = (64, 70)
        axes = (45, 55)
        cv2.ellipse(img, center, axes, 0, 0, 360, 
                   (skin_tone - 10, skin_tone + 5, skin_tone), -1)
        
        # Eyes
        eye_y = 55
        left_eye = (45, eye_y)
        right_eye = (83, eye_y)
        
        # Eye whites
        cv2.ellipse(img, left_eye, (12, 8), 0, 0, 360, (255, 255, 255), -1)
        cv2.ellipse(img, right_eye, (12, 8), 0, 0, 360, (255, 255, 255), -1)
        
        # Iris
        iris_color = (np.random.randint(20, 100), np.random.randint(50, 150), np.random.randint(20, 100))
        cv2.circle(img, left_eye, 6, iris_color, -1)
        cv2.circle(img, right_eye, 6, iris_color, -1)
        
        # Pupils
        cv2.circle(img, left_eye, 3, (0, 0, 0), -1)
        cv2.circle(img, right_eye, 3, (0, 0, 0), -1)
        
        # Eyebrows
        eyebrow_color = (np.random.randint(0, 100), np.random.randint(0, 80), np.random.randint(0, 60))
        cv2.ellipse(img, (45, 45), (15, 4), 0, 0, 180, eyebrow_color, -1)
        cv2.ellipse(img, (83, 45), (15, 4), 0, 0, 180, eyebrow_color, -1)
        
        # Nose
        nose_pts = np.array([[64, 65], [60, 75], [64, 78], [68, 75]], np.int32)
        cv2.fillPoly(img, [nose_pts], (skin_tone - 20, skin_tone - 10, skin_tone - 5))
        
        # Mouth
        mouth_color = (np.random.randint(100, 200), np.random.randint(50, 120), np.random.randint(50, 120))
        cv2.ellipse(img, (64, 90), (12, 6), 0, 0, 360, mouth_color, -1)
        
        # Add hair
        hair_color = (np.random.randint(0, 100), np.random.randint(0, 80), np.random.randint(0, 60))
        cv2.ellipse(img, (64, 30), (50, 25), 0, 0, 180, hair_color, -1)
        
        # Add some realistic noise and lighting
        noise = np.random.normal(0, 15, img.shape).astype(np.int16)
        img = np.clip(img.astype(np.int16) + noise, 0, 255).astype(np.uint8)
        
        # Simulate lighting variation
        lighting = np.random.uniform(0.7, 1.3)
        img = np.clip(img * lighting, 0, 255).astype(np.uint8)
        
        return img
    
    def list_available_datasets(self):
        """List available datasets"""
        print("\nAvailable datasets:")
        print("=" * 50)
        for key, dataset in self.datasets.items():
            print(f"{key}: {dataset['name']}")
            print(f"   Description: {dataset['description']}")
            print()


class OntologyManager:
    """Manages the facial recognition ontology"""

    def __init__(self, ontology_file: str = "ontology.owl"):
        self.ontology_file = ontology_file
        self.graph = None
        self.namespace = None

        if ONTOLOGY_AVAILABLE:
            self.load_ontology()

    def load_ontology(self):
        """Load the ontology file"""
        try:
            self.graph = Graph()
            if os.path.exists(self.ontology_file):
                self.graph.parse(self.ontology_file, format="xml")
                logger.info(f"Successfully loaded ontology from {self.ontology_file}")
            else:
                # Create basic ontology structure
                self.create_basic_ontology()
                logger.info("Created basic ontology structure")

            self.namespace = Namespace("http://www.semanticweb.org/ontologies/facial-recognition#")
            self.graph.bind("facial", self.namespace)

        except Exception as e:
            logger.error(f"Error loading ontology: {e}")
            self.graph = None

    def create_basic_ontology(self):
        """Create a basic ontology structure"""
        self.graph = Graph()
        self.namespace = Namespace("http://www.semanticweb.org/ontologies/facial-recognition#")
        self.graph.bind("facial", self.namespace)
        
        # Add basic classes
        self.graph.add((self.namespace.Person, RDF.type, RDFS.Class))
        self.graph.add((self.namespace.Algorithm, RDF.type, RDFS.Class))
        self.graph.add((self.namespace.RecognitionResult, RDF.type, RDFS.Class))
        
        # Add properties
        self.graph.add((self.namespace.personName, RDF.type, RDF.Property))
        self.graph.add((self.namespace.age, RDF.type, RDF.Property))
        self.graph.add((self.namespace.algorithmName, RDF.type, RDF.Property))
        self.graph.add((self.namespace.accuracy, RDF.type, RDF.Property))
        self.graph.add((self.namespace.confidenceScore, RDF.type, RDF.Property))

    def add_person_to_ontology(self, person_id: str, name: str, age: int = None):
        """Add a person to the ontology"""
        if not self.graph:
            return

        try:
            person_uri = self.namespace[f"person_{person_id}"]
            self.graph.add((person_uri, RDF.type, self.namespace.Person))
            self.graph.add((person_uri, self.namespace.personName, Literal(name)))

            if age:
                self.graph.add((person_uri, self.namespace.age, Literal(age, datatype=XSD.int)))

            logger.info(f"Added person {name} to ontology")
        except Exception as e:
            logger.error(f"Error adding person to ontology: {e}")

    def add_training_result(self, model_name: str, accuracy: float):
        """Add training result to ontology"""
        if not self.graph:
            return

        try:
            result_id = f"training_{model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
            result_uri = self.namespace[result_id]

            self.graph.add((result_uri, RDF.type, self.namespace.RecognitionResult))
            self.graph.add((result_uri, self.namespace.confidenceScore, Literal(accuracy, datatype=XSD.double)))

            # Add algorithm info
            algorithm_uri = self.namespace[f"algorithm_{model_name}"]
            self.graph.add((algorithm_uri, RDF.type, self.namespace.Algorithm))
            self.graph.add((algorithm_uri, self.namespace.algorithmName, Literal(model_name)))
            self.graph.add((algorithm_uri, self.namespace.accuracy, Literal(accuracy, datatype=XSD.double)))

            logger.info(f"Added training result for {model_name} with accuracy {accuracy:.4f}")
        except Exception as e:
            logger.error(f"Error adding training result to ontology: {e}")

    def save_ontology(self, filename: str = None):
        """Save the updated ontology"""
        if not self.graph:
            return

        filename = filename or self.ontology_file.replace('.owl', '_updated.owl')
        try:
            self.graph.serialize(destination=filename, format="xml")
            logger.info(f"Ontology saved to {filename}")
        except Exception as e:
            logger.error(f"Error saving ontology: {e}")


class FaceProcessor:
    """Face detection and preprocessing"""

    def __init__(self):
        # Load face detection cascade
        cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
        self.face_cascade = cv2.CascadeClassifier(cascade_path)

        if self.face_cascade.empty():
            logger.error("Failed to load face cascade classifier")

    def detect_and_extract_face(self, image_path: str, target_size: Tuple[int, int] = (128, 128)):
        """Detect and extract face from image"""
        try:
            # Read image
            image = cv2.imread(image_path)
            if image is None:
                logger.warning(f"Could not load image: {image_path}")
                return None

            # Convert to grayscale for detection
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            # Detect faces
            faces = self.face_cascade.detectMultiScale(
                gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
            )

            if len(faces) == 0:
                logger.warning(f"No face detected in: {image_path}")
                return None

            # Use the largest face
            if len(faces) > 1:
                faces = sorted(faces, key=lambda x: x[2] * x[3], reverse=True)

            x, y, w, h = faces[0]

            # Extract face region
            face_region = image[y:y + h, x:x + w]

            # Resize to target size
            face_resized = cv2.resize(face_region, target_size)

            return face_resized

        except Exception as e:
            logger.error(f"Error processing image {image_path}: {e}")
            return None

    def preprocess_image(self, image: np.ndarray) -> np.ndarray:
        """Preprocess image for model input"""
        # Normalize pixel values to [0, 1]
        image_normalized = image.astype(np.float32) / 255.0
        return image_normalized


class CNNModel:
    """Convolutional Neural Network for face recognition"""

    def __init__(self, input_shape: Tuple[int, int, int], num_classes: int):
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.model = None
        self.history = None

    def build_model(self):
        """Build CNN architecture"""
        self.model = keras.Sequential([
            # First Convolutional Block
            layers.Conv2D(32, (3, 3), activation='relu', input_shape=self.input_shape),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),

            # Second Convolutional Block
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),

            # Third Convolutional Block
            layers.Conv2D(128, (3, 3), activation='relu'),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),

            # Fourth Convolutional Block
            layers.Conv2D(256, (3, 3), activation='relu'),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),

            # Flatten and Dense layers
            layers.Flatten(),
            layers.Dense(512, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(256, activation='relu'),
            layers.Dropout(0.3),
            layers.Dense(self.num_classes, activation='softmax')
        ])

        # Compile model
        self.model.compile(
            optimizer=keras.optimizers.Adam(learning_rate=0.001),
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )

        logger.info("CNN model built successfully")
        return self.model

    def train(self, X_train, y_train, X_val, y_val, epochs=50, batch_size=32):
        """Train the model"""
        if self.model is None:
            self.build_model()

        # Data augmentation
        train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
            rotation_range=15,
            width_shift_range=0.1,
            height_shift_range=0.1,
            horizontal_flip=True,
            zoom_range=0.1,
            fill_mode='nearest'
        )

        # Callbacks
        callbacks = [
            keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
            keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
        ]

        # Train model
        self.history = self.model.fit(
            train_datagen.flow(X_train, y_train, batch_size=batch_size),
            steps_per_epoch=len(X_train) // batch_size,
            epochs=epochs,
            validation_data=(X_val, y_val),
            callbacks=callbacks,
            verbose=1
        )

        return self.history


class FacialRecognitionTrainer:
    """Main class for training facial recognition models"""

    def __init__(self, ontology_file: str = "ontology.owl"):
        self.ontology_manager = OntologyManager(ontology_file)
        self.face_processor = FaceProcessor()
        self.dataset_downloader = DatasetDownloader()
        self.models = {}
        self.label_encoder = LabelEncoder()
        self.class_names = []

    def load_dataset(self, dataset_path: str) -> Tuple[np.ndarray, np.ndarray]:
        """Load dataset from directory structure"""
        dataset_path = Path(dataset_path)

        if not dataset_path.exists():
            logger.error(f"Dataset path {dataset_path} does not exist")
            return np.array([]), np.array([])

        images = []
        labels = []
        person_count = {}

        logger.info(f"Loading dataset from {dataset_path}")

        # Iterate through person directories
        for person_dir in dataset_path.iterdir():
            if not person_dir.is_dir():
                continue

            person_name = person_dir.name
            person_count[person_name] = 0

            # Add person to ontology
            if self.ontology_manager.graph:
                self.ontology_manager.add_person_to_ontology(
                    person_name.replace(" ", "_"), person_name
                )

            # Process images in person directory
            for img_file in person_dir.glob("*"):
                if img_file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp']:
                    face_image = self.face_processor.detect_and_extract_face(str(img_file))

                    if face_image is not None:
                        processed_image = self.face_processor.preprocess_image(face_image)
                        images.append(processed_image)
                        labels.append(person_name)
                        person_count[person_name] += 1

        if not images:
            logger.error("No valid images found in dataset")
            return np.array([]), np.array([])

        # Convert to numpy arrays
        X = np.array(images)
        y = np.array(labels)

        # Encode labels
        y_encoded = self.label_encoder.fit_transform(y)
        self.class_names = self.label_encoder.classes_.tolist()

        logger.info(f"Loaded {len(X)} images from {len(self.class_names)} persons")
        for person, count in person_count.items():
            logger.info(f"  {person}: {count} images")

        return X, y_encoded

    def train_cnn_model(self, X, y, test_size=0.2, epochs=50):
        """Train CNN model"""
        logger.info("Training CNN model...")

        # Convert labels to categorical
        y_categorical = keras.utils.to_categorical(y, num_classes=len(self.class_names))

        # Split data
        X_train, X_val, y_train, y_val = train_test_split(
            X, y_categorical, test_size=test_size, random_state=42, stratify=y
        )

        logger.info(f"Training set: {len(X_train)} samples")
        logger.info(f"Validation set: {len(X_val)} samples")

        # Create and train model
        cnn_model = CNNModel(
            input_shape=X[0].shape,
            num_classes=len(self.class_names)
        )

        history = cnn_model.train(X_train, y_train, X_val, y_val, epochs=epochs)

        # Evaluate model
        val_loss, val_accuracy = cnn_model.model.evaluate(X_val, y_val, verbose=0)
        logger.info(f"CNN Validation Accuracy: {val_accuracy:.4f}")

        self.models['cnn'] = cnn_model

        # Add to ontology
        if self.ontology_manager.graph:
            self.ontology_manager.add_training_result("CNN", val_accuracy)

        return cnn_model, val_accuracy

    def train_svm_model(self, X, y, test_size=0.2):
        """Train SVM model with HOG features"""
        if not SKIMAGE_AVAILABLE:
            logger.error("scikit-image not available. Cannot train SVM model.")
            return None, 0

        logger.info("Training SVM model with HOG features...")

        # Extract HOG features
        logger.info("Extracting HOG features...")
        hog_features = []
        for img in X:
            # Convert to grayscale if needed
            if len(img.shape) == 3:
                gray = cv2.cvtColor((img * 255).astype(np.uint8), cv2.COLOR_BGR2GRAY)
            else:
                gray = (img * 255).astype(np.uint8)

            # Extract HOG features
            features = hog(
                gray,
                orientations=9,
                pixels_per_cell=(8, 8),
                cells_per_block=(2, 2),
                visualize=False,
                feature_vector=True
            )
            hog_features.append(features)

        X_hog = np.array(hog_features)

        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X_hog, y, test_size=test_size, random_state=42, stratify=y
        )

        # Train SVM
        svm_model = SVC(kernel='rbf', probability=True, random_state=42)
        svm_model.fit(X_train, y_train)

        # Evaluate
        y_pred = svm_model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        logger.info(f"SVM Accuracy: {accuracy:.4f}")

        self.models['svm'] = svm_model

        # Add to ontology
        if self.ontology_manager.graph:
            self.ontology_manager.add_training_result("SVM", accuracy)

        return svm_model, accuracy

    def export_models(self, export_dir: str = "exported_models"):
        """Export trained models and metadata"""
        export_path = Path(export_dir)
        export_path.mkdir(exist_ok=True)

        logger.info(f"Exporting models to {export_path}")

        # Export CNN model
        if 'cnn' in self.models:
            cnn_path = export_path / "cnn_face_recognition_model.h5"
            self.models['cnn'].model.save(str(cnn_path))
            logger.info(f"CNN model exported to {cnn_path}")

        # Export SVM model
        if 'svm' in self.models:
            svm_path = export_path / "svm_face_recognition_model.pkl"
            joblib.dump(self.models['svm'], str(svm_path))
            logger.info(f"SVM model exported to {svm_path}")

        # Export label encoder
        encoder_path = export_path / "label_encoder.pkl"
        joblib.dump(self.label_encoder, str(encoder_path))

        # Export class names
        classes_path = export_path / "class_names.json"
        with open(classes_path, 'w') as f:
            json.dump(self.class_names, f, indent=2)

        # Export metadata
        metadata = {
            "export_timestamp": datetime.now().isoformat(),
            "models_exported": list(self.models.keys()),
            "num_classes": len(self.class_names),
            "class_names": self.class_names,
            "input_shape": [128, 128, 3] if self.models else None
        }

        metadata_path = export_path / "model_metadata.json"
        with open(metadata_path, 'w') as f:
            json.dump(metadata, f, indent=2)

        # Export updated ontology
        if self.ontology_manager.graph:
            ontology_path = export_path / "ontology_updated.owl"
            self.ontology_manager.save_ontology(str(ontology_path))

        logger.info("Model export completed successfully!")
        return str(export_path)

    def predict(self, image_path: str, model_type: str = 'cnn'):
        """Predict person from image"""
        if model_type not in self.models:
            logger.error(f"Model {model_type} not available")
            return None

        # Process image
        face_image = self.face_processor.detect_and_extract_face(image_path)
        if face_image is None:
            logger.error("No face detected in image")
            return None

        processed_image = self.face_processor.preprocess_image(face_image)

        if model_type == 'cnn':
            # CNN prediction
            image_batch = np.expand_dims(processed_image, axis=0)
            predictions = self.models['cnn'].model.predict(image_batch, verbose=0)
            confidence = float(np.max(predictions))
            predicted_class = int(np.argmax(predictions))
            person_name = self.class_names[predicted_class]

        elif model_type == 'svm' and SKIMAGE_AVAILABLE:
            # SVM prediction (requires HOG features)
            gray = cv2.cvtColor((processed_image * 255).astype(np.uint8), cv2.COLOR_BGR2GRAY)
            features = hog(
                gray, orientations=9, pixels_per_cell=(8, 8),
                cells_per_block=(2, 2), visualize=False, feature_vector=True
            )

            features_batch = features.reshape(1, -1)
            probabilities = self.models['svm'].predict_proba(features_batch)[0]
            confidence = float(np.max(probabilities))
            predicted_class = int(np.argmax(probabilities))
            person_name = self.class_names[predicted_class]

        return {
            'person_name': person_name,
            'confidence': confidence,
            'model_used': model_type
        }


def main():
    """Main function with dataset download options"""
    print("Enhanced Facial Recognition Model Trainer")
    print("=" * 60)
    
    # Initialize trainer
    trainer = FacialRecognitionTrainer("ontology.owl")
    
    # Dataset options
    print("\nDataset Options:")
    print("1. Use existing dataset directory")
    print("2. Create sample synthetic dataset")
    print("3. Download LFW subset (simulated)")
    print("4. List available datasets")
    
    choice = input("\nEnter your choice (1-4): ").strip()
    
    dataset_path = None
    
    if choice == "1":
        dataset_path = input("Enter dataset path (or press Enter for 'dataset'): ").strip()
        if not dataset_path:
            dataset_path = "dataset"
    
    elif choice == "2":
        num_people = int(input("Number of people (default 5): ") or "5")
        images_per_person = int(input("Images per person (default 10): ") or "10")
        dataset_path = trainer.dataset_downloader.create_sample_dataset(num_people, images_per_person)
    
    elif choice == "3":
        max_people = int(input("Maximum number of people (default 20): ") or "20")
        dataset_path = trainer.dataset_downloader.download_lfw_subset(max_people)
    
    elif choice == "4":
        trainer.dataset_downloader.list_available_datasets()
        return
    
    else:
        print("Invalid choice. Using default dataset directory.")
        dataset_path = "dataset"
    
    if not dataset_path:
        print("Error: Could not set up dataset.")
        return
    
    print(f"\nUsing dataset: {dataset_path}")
    print("Expected structure:")
    print("dataset/")
    print("├── person1/")
    print("│   ├── img1.jpg")
    print("│   └── img2.jpg")
    print("├── person2/")
    print("│   ├── img1.jpg")
    print("│   └── img2.jpg")
    print("└── ...")

    # Load dataset
    X, y = trainer.load_dataset(dataset_path)

    if len(X) == 0:
        print("No data loaded. Please check your dataset structure.")
        return

    # Training options
    print(f"\nTraining models with {len(X)} images from {len(trainer.class_names)} persons...")
    
    train_cnn = input("Train CNN model? (y/n, default y): ").strip().lower()
    train_cnn = train_cnn != 'n'
    
    train_svm = input("Train SVM model? (y/n, default y): ").strip().lower()
    train_svm = train_svm != 'n' and SKIMAGE_AVAILABLE
    
    if not train_svm and not SKIMAGE_AVAILABLE:
        print("Note: SVM training disabled - scikit-image not available")
    
    epochs = int(input("Number of epochs for CNN (default 30): ") or "30")

    # Train models
    cnn_accuracy = 0
    svm_accuracy = 0
    
    if train_cnn:
        cnn_model, cnn_accuracy = trainer.train_cnn_model(X, y, epochs=epochs)

    if train_svm:
        svm_model, svm_accuracy = trainer.train_svm_model(X, y)

    # Export models
    export_path = trainer.export_models()

    print("\n" + "=" * 60)
    print("TRAINING COMPLETED!")
    print("=" * 60)
    if train_cnn:
        print(f"CNN Accuracy: {cnn_accuracy:.4f}")
    if train_svm:
        print(f"SVM Accuracy: {svm_accuracy:.4f}")
    print(f"Models exported to: {export_path}")
    print("\nExported files:")
    if train_cnn:
        print("├── cnn_face_recognition_model.h5")
    if train_svm:
        print("├── svm_face_recognition_model.pkl")
    print("├── label_encoder.pkl")
    print("├── class_names.json")
    print("├── model_metadata.json")
    print("└── ontology_updated.owl")

    # Test prediction
    test_image = input("\nEnter test image path (or press Enter to skip): ").strip()
    if test_image and os.path.exists(test_image):
        print(f"\nTesting prediction on: {test_image}")

        # Test CNN
        if train_cnn:
            cnn_result = trainer.predict(test_image, 'cnn')
            if cnn_result:
                print(f"CNN Prediction: {cnn_result['person_name']} (confidence: {cnn_result['confidence']:.3f})")

        # Test SVM
        if train_svm:
            svm_result = trainer.predict(test_image, 'svm')
            if svm_result:
                print(f"SVM Prediction: {svm_result['person_name']} (confidence: {svm_result['confidence']:.3f})")

    print("\nTraining completed successfully!")
    print("To use the trained models, load them using:")
    print("- CNN: tf.keras.models.load_model('exported_models/cnn_face_recognition_model.h5')")
    print("- SVM: joblib.load('exported_models/svm_face_recognition_model.pkl')")


if __name__ == "__main__":
    # Install required packages
    required_packages = [
        "tensorflow>=2.8.0",
        "opencv-python>=4.5.0",
        "scikit-learn>=1.0.0",
        "scikit-image>=0.19.0",
        "rdflib>=6.0.0",
        "joblib>=1.1.0"
    ]
    
    print("Required packages:")
    for package in required_packages:
        print(f"  pip install {package}")
    print("\nRun: pip install " + " ".join(required_packages))
    print()
    
    main()

ModuleNotFoundError: No module named 'cv2'

In [3]:
pip install cv3

[1;31merror[0m: [1mexternally-managed-environment[0m

[31m×[0m This environment is externally managed
[31m╰─>[0m To install Python packages system-wide, try apt install
[31m   [0m python3-xyz, where xyz is the package you are trying to
[31m   [0m install.
[31m   [0m 
[31m   [0m If you wish to install a non-Debian-packaged Python package,
[31m   [0m create a virtual environment using python3 -m venv path/to/venv.
[31m   [0m Then use path/to/venv/bin/python and path/to/venv/bin/pip. Make
[31m   [0m sure you have python3-full installed.
[31m   [0m 
[31m   [0m If you wish to install a non-Debian packaged Python application,
[31m   [0m it may be easiest to use pipx install xyz, which will manage a
[31m   [0m virtual environment for you. Make sure you have pipx installed.
[31m   [0m 
[31m   [0m See /usr/share/doc/python3.12/README.venv for more information.

[1;35mnote[0m: If you believe this is a mistake, please contact your Python installation or OS dist