In [1]:
import os
from pathlib import Path
from PIL import Image
import time

# --- Configuration ---
# Root directory containing the 'train' and 'test' folders (Output from Data_split.py)
SPLIT_ROOT_DIR = r"Fingerprint_Split_Dataset"

# New directory where the fully preprocessed (resized) images will be saved
PREPROCESSED_ROOT_DIR = r"Fingerprint_Preprocessed_Dataset"

# Target size for all images
TARGET_SIZE = (96, 96) 

# File extensions to look for
IMAGE_EXTENSIONS = ('.tif', '.png', '.jpg', '.jpeg')
# ---------------------

def preprocess_and_save_data(split_dir, dest_dir, target_size, extensions):
    """
    Traverses the split dataset, resizes each image, converts to grayscale, 
    and saves it to a new, parallel directory structure.
    """
    print(f"Starting physical preprocessing to resize images to {target_size}...")
    start_time = time.time()
    
    Path(dest_dir).mkdir(parents=True, exist_ok=True)
    processed_count = 0
    
    # os.walk is used to traverse the directory structure recursively
    for root, dirs, files in os.walk(split_dir):
        # Determine the relative path from the split root to the current directory (root)
        # This helps us replicate the 'train/person_id' structure
        relative_path = Path(root).relative_to(split_dir)
        
        # Create the corresponding directory in the new preprocessed path
        target_dir = Path(dest_dir) / relative_path
        target_dir.mkdir(parents=True, exist_ok=True)
        
        for filename in files:
            # Check for valid image extensions
            if filename.lower().endswith(extensions):
                source_path = Path(root) / filename
                dest_path = target_dir / filename
                
                try:
                    # 1. Load the image
                    img = Image.open(source_path)
                    
                    # 2. Convert to Grayscale (if not already)
                    # CNNs for fingerprints usually prefer a single channel
                    if img.mode != 'L':
                        img = img.convert('L') 
                        
                    # 3. Resize the image (using BICUBIC interpolation for quality)
                    resized_img = img.resize(target_size, Image.Resampling.BICUBIC)
                    
                    # 4. Save the preprocessed image to the new location
                    resized_img.save(dest_path)
                    
                    processed_count += 1
                except Exception as e:
                    print(f"Error processing {source_path}: {e}")
            
    end_time = time.time()
    
    print("-" * 60)
    print(f"Preprocessing complete! Took {end_time - start_time:.2f} seconds.")
    print(f"Total images processed and saved: {processed_count}")
    print(f"New dataset saved at: '{PREPROCESSED_ROOT_DIR}'")
    print("This dataset is now ready for direct loading into any ML framework.")

# Run the function
preprocess_and_save_data(SPLIT_ROOT_DIR, PREPROCESSED_ROOT_DIR, TARGET_SIZE, IMAGE_EXTENSIONS)


Starting physical preprocessing to resize images to (96, 96)...
------------------------------------------------------------
Preprocessing complete! Took 38.17 seconds.
Total images processed and saved: 9600
New dataset saved at: 'Fingerprint_Preprocessed_Dataset'
This dataset is now ready for direct loading into any ML framework.
