In [27]:
import os
import numpy as np
import cv2
from skimage.feature import local_binary_pattern
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import joblib

def load_dataset(path):
    """Load images with enhanced error handling"""
    images, labels = [], []
    min_images_per_person = 5  # Minimum images required per person
    
    for filename in sorted(os.listdir(path)):
        if filename.lower().endswith(('.jpg', '.jpeg')):
            try:
                person_id = int(filename.split('-')[0])
                img_path = os.path.join(path, filename)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                
                if img is not None:
                    img = cv2.resize(img, (128, 128))
                    images.append(img)
                    labels.append(person_id)
            except:
                continue
    
    # Filter out people with too few images
    unique, counts = np.unique(labels, return_counts=True)
    valid_labels = unique[counts >= min_images_per_person]
    filtered_images = []
    filtered_labels = []
    
    for img, label in zip(images, labels):
        if label in valid_labels:
            filtered_images.append(img)
            filtered_labels.append(label)
    
    print(f"\nLoaded {len(filtered_images)} images for {len(valid_labels)} people")
    print(f"Images per person: {len(filtered_images)/len(valid_labels):.1f}")
    return np.array(filtered_images), np.array(filtered_labels)

def augment_images(images, labels):
    """Create additional training samples"""
    datagen = ImageDataGenerator(
        rotation_range=15,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.1
    )
    
    augmented_images = []
    augmented_labels = []
    
    for img, label in zip(images, labels):
        img = img.reshape(128, 128, 1)
        # Generate 3 augmented versions
        for _ in range(3):
            transformed = datagen.random_transform(img)
            augmented_images.append(transformed[:,:,0])
            augmented_labels.append(label)
    
    return np.vstack([images, augmented_images]), np.concatenate([labels, augmented_labels])

def extract_features(images):
    """Enhanced feature extraction"""
    # CNN Features
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
    model = Model(inputs=base_model.input, outputs=base_model.get_layer('block5_pool').output)
    
    rgb_images = np.repeat(images[..., np.newaxis], 3, axis=-1)
    cnn_features = model.predict(rgb_images, batch_size=16, verbose=0).reshape(len(images), -1)
    
    # LBP Features
    lbp_features = []
    for img in images:
        lbp = local_binary_pattern((img * 255).astype(np.uint8), 24, 3, method='uniform')
        hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 26))
        lbp_features.append(hist / (hist.sum() + 1e-6))
    
    return cnn_features, np.array(lbp_features)

def train_and_evaluate(X, y):
    """Improved training with class weighting"""
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Calculate class weights
    unique, counts = np.unique(y_train, return_counts=True)
    class_weights = {cls: len(y_train)/(len(unique)*count) for cls, count in zip(unique, counts)}
    
    svm = SVC(
        C=10, 
        gamma=0.01, 
        kernel='rbf', 
        class_weight=class_weights,  # Balance classes
        probability=True
    )
    svm.fit(X_train, y_train)
    
    print("\nEvaluation Results:")
    y_pred = svm.predict(X_test)
    print(classification_report(y_test, y_pred, zero_division=0))
    
    return svm

def main():
    try:
        # 1. Load and filter data
        dataset_path = r"C:\Users\Selim\Downloads\originalimages_part1"
        images, labels = load_dataset(dataset_path)
        
        # 2. Augment data
        images, labels = augment_images(images, labels)
        images = preprocess_images(images)
        
        # 3. Feature extraction
        cnn_feat, lbp_feat = extract_features(images)
        X = np.hstack((
            StandardScaler().fit_transform(cnn_feat),
            StandardScaler().fit_transform(lbp_feat)
        ))
        
        # 4. Reduce dimensions
        X_reduced = PCA(n_components=100).fit_transform(X)
        
        # 5. Train and save
        model = train_and_evaluate(X_reduced, labels)
        joblib.dump(model, 'improved_face_model.pkl')
        print("\nImproved model saved successfully!")
        
    except Exception as e:
        print(f"\nError: {str(e)}")

if __name__ == "__main__":
    main()


Loaded 700 images for 50 people
Images per person: 14.0

Evaluation Results:
              precision    recall  f1-score   support

           1       1.00      0.29      0.44         7
           2       0.00      0.00      0.00         8
           3       1.00      0.18      0.31        11
           4       1.00      0.08      0.15        12
           5       0.00      0.00      0.00         9
           6       1.00      0.22      0.36         9
           7       1.00      0.08      0.14        13
           8       1.00      0.10      0.18        10
           9       1.00      0.10      0.18        10
          10       1.00      0.10      0.18        10
          11       1.00      0.08      0.15        12
          12       0.00      0.00      0.00        12
          13       1.00      0.50      0.67         6
          14       1.00      0.18      0.31        11
          15       1.00      0.20      0.33        10
          16       1.00      0.25      0.40        16
   

In [28]:
import os
import numpy as np
import cv2
from skimage.feature import local_binary_pattern
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import joblib

def load_and_filter_dataset(path, min_images=10):
    """Load images and ensure minimum samples per class"""
    images, labels = [], []
    
    # Count images per person first
    person_counts = {}
    for filename in os.listdir(path):
        if filename.lower().endswith(('.jpg', '.jpeg')):
            try:
                person_id = int(filename.split('-')[0])
                person_counts[person_id] = person_counts.get(person_id, 0) + 1
            except:
                continue
    
    # Only keep persons with enough images
    valid_persons = {pid for pid, count in person_counts.items() if count >= min_images}
    
    # Load images for valid persons only
    for filename in sorted(os.listdir(path)):
        if filename.lower().endswith(('.jpg', '.jpeg')):
            try:
                person_id = int(filename.split('-')[0])
                if person_id in valid_persons:
                    img = cv2.imread(os.path.join(path, filename), cv2.IMREAD_GRAYSCALE)
                    if img is not None:
                        img = cv2.resize(img, (128, 128))
                        images.append(img)
                        labels.append(person_id)
            except:
                continue
    
    print(f"\nLoaded {len(images)} images for {len(valid_persons)} people")
    print(f"Average images per person: {len(images)/len(valid_persons):.1f}")
    
    if len(valid_persons) < 2:
        raise ValueError("Not enough valid persons for training (need at least 2)")
    
    return np.array(images), np.array(labels)

def augment_data(images, labels, augment_factor=4):
    """Generate synthetic training data"""
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.15,
        height_shift_range=0.15,
        zoom_range=0.15,
        brightness_range=[0.8, 1.2]
    )
    
    augmented_images = []
    augmented_labels = []
    
    for img, label in zip(images, labels):
        img = img.reshape(128, 128, 1)
        # Generate augmented versions
        for _ in range(augment_factor):
            transformed = datagen.random_transform(img)
            augmented_images.append(transformed[:,:,0])
            augmented_labels.append(label)
    
    # Combine original and augmented data
    all_images = np.concatenate([images, np.array(augmented_images)])
    all_labels = np.concatenate([labels, np.array(augmented_labels)])
    
    return all_images, all_labels

def extract_enhanced_features(images):
    """Improved feature extraction with normalization"""
    # CNN Features
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
    model = Model(inputs=base_model.input, outputs=base_model.get_layer('block5_pool').output)
    
    rgb_images = np.repeat(images[..., np.newaxis], 3, axis=-1)
    cnn_features = model.predict(rgb_images, batch_size=32, verbose=0)
    cnn_features = cnn_features.reshape(cnn_features.shape[0], -1)
    
    # Enhanced LBP Features
    lbp_features = []
    for img in images:
        img_uint8 = (img * 255).astype(np.uint8)
        lbp = local_binary_pattern(img_uint8, 24, 3, method='uniform')
        hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 27))  # Extra bin
        hist = hist.astype("float32") / (hist.sum() + 1e-6)
        lbp_features.append(hist)
    
    return cnn_features, np.array(lbp_features)

def train_optimized_model(X, y):
    """Enhanced training with balanced classes"""
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y)
    
    # Balance class weights
    unique, counts = np.unique(y_train, return_counts=True)
    class_weights = {u: sum(counts)/(len(counts)*c) for u, c in zip(unique, counts)}
    
    # Optimized SVM
    svm = SVC(
        C=5,  # Reduced regularization
        gamma='scale',
        kernel='rbf',
        class_weight=class_weights,
        probability=True,
        decision_function_shape='ovr'  # Better for multi-class
    )
    
    svm.fit(X_train, y_train)
    
    # Enhanced evaluation
    print("\nDetailed Classification Report:")
    y_pred = svm.predict(X_test)
    print(classification_report(y_test, y_pred, zero_division=0, digits=3))
    
    return svm

def main():
    try:
        # 1. Load and filter data
        dataset_path = r"C:\Users\Selim\Downloads\originalimages_part1"
        print("Loading and filtering dataset...")
        images, labels = load_and_filter_dataset(dataset_path, min_images=10)
        
        # 2. Augment data
        print("Augmenting dataset...")
        images, labels = augment_data(images, labels)
        
        # 3. Preprocess
        print("Preprocessing images...")
        images = images / 255.0  # Normalization
        for i in range(len(images)):
            images[i] = cv2.equalizeHist((images[i] * 255).astype(np.uint8)) / 255.0
        
        # 4. Feature extraction
        print("Extracting features...")
        cnn_feat, lbp_feat = extract_enhanced_features(images)
        X = np.hstack((
            StandardScaler().fit_transform(cnn_feat),
            StandardScaler().fit_transform(lbp_feat)
        ))
        
        # 5. Dimensionality reduction
        print("Reducing dimensions...")
        X_reduced = PCA(n_components=0.95, svd_solver='full').fit_transform(X)  # Keep 95% variance
        
        # 6. Train and save
        print("Training model...")
        model = train_optimized_model(X_reduced, labels)
        joblib.dump(model, 'optimized_face_model.pkl')
        print("\nOptimized model saved successfully!")
        
    except Exception as e:
        print(f"\nError: {str(e)}")
        print("Common fixes:")
        print("- Check your images are named like '1-01.jpg', '1-02.jpg'")
        print("- Ensure you have at least 10 images per person")
        print("- Verify the folder contains only face images")

if __name__ == "__main__":
    main()

Loading and filtering dataset...

Loaded 700 images for 50 people
Average images per person: 14.0
Augmenting dataset...
Preprocessing images...
Extracting features...
Reducing dimensions...
Training model...

Detailed Classification Report:
              precision    recall  f1-score   support

           1      0.867     0.929     0.897        14
           2      1.000     0.857     0.923        14
           3      0.824     1.000     0.903        14
           4      0.917     0.786     0.846        14
           5      1.000     0.786     0.880        14
           6      1.000     0.857     0.923        14
           7      0.647     0.786     0.710        14
           8      0.923     0.857     0.889        14
           9      0.875     1.000     0.933        14
          10      1.000     0.929     0.963        14
          11      1.000     1.000     1.000        14
          12      1.000     0.857     0.923        14
          13      1.000     0.857     0.923        14
  