In [31]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import os
from sklearn.preprocessing import StandardScaler
import warnings
# Required for feature extraction:
# !pip install opencv-python numpy pandas matplotlib scikit-learn

warnings.filterwarnings('ignore')

class ImageProcessor:
    """
    Processes facial images: loads, displays, augments, and extracts features
    """
    
    def __init__(self, base_path='multimodal-auth-system/media/images'):
        # Adjusted base path to match common repo structure
        self.base_path = Path(base_path)
        self.members = []
        self.all_features = []
        
    def load_images_for_member(self, member_name):
        """Load all three images for a team member"""
        member_path = self.base_path / member_name
        images = {}
        
        image_types = ['neutral', 'smiling', 'surprised']
        
        for img_type in image_types:
            # Try common image extensions
            for ext in ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']:
                img_path = member_path / f"{img_type}{ext}"
                if img_path.exists():
                    # cv2.imread loads BGR, but we convert to RGB for matplotlib/processing
                    img = cv2.imread(str(img_path))
                    if img is not None:
                        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                        images[img_type] = img_rgb
                        print(f"✓ Loaded {member_name}/{img_type}{ext}")
                    break
        
        return images
    
    def display_member_images(self, member_name, images):
        """Display all three original images for a member"""
        fig, axes = plt.subplots(1, 3, figsize=(15, 5))
        fig.suptitle(f'{member_name} - Original Images', fontsize=16, fontweight='bold')
        
        for idx, (img_type, img) in enumerate(images.items()):
            axes[idx].imshow(img)
            axes[idx].set_title(img_type.capitalize(), fontsize=12)
            axes[idx].axis('off')
        
        plt.tight_layout()
        plt.show()
    
    def augment_image(self, image, aug_type):
        """Apply specific augmentation to an image"""
        if aug_type == 'rotation':
            # Rotate 15 degrees
            h, w = image.shape[:2]
            center = (w // 2, h // 2)
            matrix = cv2.getRotationMatrix2D(center, 15, 1.0)
            augmented = cv2.warpAffine(image, matrix, (w, h))
            
        elif aug_type == 'flip':
            # Horizontal flip
            augmented = cv2.flip(image, 1)
            
        elif aug_type == 'grayscale':
            # Convert to grayscale then back to RGB for consistency in plotting
            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
            augmented = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
            
        elif aug_type == 'brightness':
            # Increase brightness
            augmented = cv2.convertScaleAbs(image, alpha=1.2, beta=30)
            
        elif aug_type == 'blur':
            # Apply Gaussian blur
            augmented = cv2.GaussianBlur(image, (5, 5), 0)
            
        else:
            augmented = image.copy()
        
        return augmented
    
    def display_augmentations(self, member_name, img_type, original_img):
        """Display original image with all augmentations"""
        augmentations = ['rotation', 'flip', 'grayscale', 'brightness', 'blur']
        
        # Check if the notebook environment supports displaying plots
        if not hasattr(plt, '_show'):
             print("Skipping display of augmentations (matplotlib not fully initialized).")
             return

        fig, axes = plt.subplots(2, 3, figsize=(15, 10))
        fig.suptitle(f'{member_name} - {img_type.capitalize()} Image Augmentations', 
                     fontsize=16, fontweight='bold')
        
        # Display original
        axes[0, 0].imshow(original_img)
        axes[0, 0].set_title('Original', fontsize=12)
        axes[0, 0].axis('off')
        
        # Display augmentations
        for idx, aug_type in enumerate(augmentations):
            row = (idx + 1) // 3
            col = (idx + 1) % 3
            augmented = self.augment_image(original_img, aug_type)
            axes[row, col].imshow(augmented)
            axes[row, col].set_title(aug_type.capitalize(), fontsize=12)
            axes[row, col].axis('off')
        
        plt.tight_layout()
        plt.show()
    
    def extract_features(self, image, member_name, img_type, aug_type='original'):
        """Extract multiple features from an image"""
        features = {}
        
        # Basic info (metadata columns)
        features['member_name'] = member_name
        features['image_type'] = img_type
        features['augmentation'] = aug_type
        
        # Convert to different color spaces
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
        
        # 1. Color histogram features (RGB)
        # Bins 0-31, 32-63, etc. for each color channel
        for i, color in enumerate(['red', 'green', 'blue']):
            hist = cv2.calcHist([image], [i], None, [32], [0, 256])
            hist = hist.flatten() / hist.sum()  # Normalize
            for j, val in enumerate(hist):
                features[f'{color}_hist_{j}'] = val
        
        # 2. Grayscale histogram
        gray_hist = cv2.calcHist([gray], [0], None, [32], [0, 256])
        gray_hist = gray_hist.flatten() / gray_hist.sum()
        for i, val in enumerate(gray_hist):
            features[f'gray_hist_{i}'] = val
        
        # 3. Statistical features
        features['mean_intensity'] = np.mean(gray)
        features['std_intensity'] = np.std(gray)
        features['mean_red'] = np.mean(image[:,:,0])
        features['mean_green'] = np.mean(image[:,:,1])
        features['mean_blue'] = np.mean(image[:,:,2])
        
        # 4. Edge detection features
        edges = cv2.Canny(gray, 100, 200)
        features['edge_density'] = np.sum(edges > 0) / edges.size
        
        # 5. Texture features (statistical approximation)
        features['texture_contrast'] = np.std(gray)
        features['texture_entropy'] = self._calculate_entropy(gray)
        
        # 6. HSV features
        features['mean_hue'] = np.mean(hsv[:,:,0])
        features['mean_saturation'] = np.mean(hsv[:,:,1])
        features['mean_value'] = np.mean(hsv[:,:,2])
        
        return features
    
    def _calculate_entropy(self, image):
        """Calculate entropy of an image"""
        hist = cv2.calcHist([image], [0], None, [256], [0, 256])
        hist = hist.flatten() / hist.sum()
        hist = hist[hist > 0]  # Remove zeros
        entropy = -np.sum(hist * np.log2(hist))
        return entropy
    
    def process_all_members(self, member_names):
        """Process images for all team members"""
        self.members = member_names
        
        for member in member_names:
            print(f"\n{'='*60}")
            print(f"Processing images for: {member}")
            print('='*60)
            
            # Load images
            images = self.load_images_for_member(member)
            
            if not images:
                print(f"⚠ No images found for {member}. Check paths/extensions.")
                continue
            
            # Display original images (Assignment requirement)
            self.display_member_images(member, images)
            
            # Process each image type
            for img_type, img in images.items():
                print(f"\n--- Processing {img_type} image ---")
                
                # Display augmentations (Assignment requirement)
                self.display_augmentations(member, img_type, img)
                
                # Extract features from original
                features = self.extract_features(img, member, img_type, 'original')
                self.all_features.append(features)
                
                # Extract features from augmented versions
                augmentations = ['rotation', 'flip', 'grayscale', 'brightness', 'blur']
                for aug_type in augmentations:
                    aug_img = self.augment_image(img, aug_type)
                    features = self.extract_features(aug_img, member, img_type, aug_type)
                    self.all_features.append(features)
                
                print(f"✓ Extracted features for {img_type} (original + 5 augmentations)")
    
    def save_features(self, output_path='data/image_features.csv'):
        """Save all extracted features to CSV"""
        if not self.all_features:
            print("⚠ No features to save!")
            return
        
        df = pd.DataFrame(self.all_features)
        
        # Create output directory if it doesn't exist
        output_file = Path(output_path)
        output_file.parent.mkdir(parents=True, exist_ok=True)
        
        # Save to CSV
        df.to_csv(output_path, index=False)
        print(f"\n✓ Saved {len(df)} feature vectors to {output_path}")
        print(f"  Shape: {df.shape}")
        print(f"  Columns: {df.shape[1]}")
        
        return df
    
    def display_feature_summary(self):
        """Display summary of extracted features"""
        if not self.all_features:
            print("⚠ No features extracted yet!")
            return
        
        df = pd.DataFrame(self.all_features)
        
        print("\n" + "="*60)
        print("FEATURE EXTRACTION SUMMARY")
        print("="*60)
        print(f"\nTotal samples: {len(df)}")
        print(f"Total features per sample: {df.shape[1] - 3}")  # Exclude metadata columns
        print(f"\nSamples per member:")
        print(df['member_name'].value_counts())
        print(f"\nSamples per image type:")
        print(df['image_type'].value_counts())
        print(f"\nSamples per augmentation:")
        print(df['augmentation'].value_counts())