In [None]:
#pip install torch torchvision
import os
from PIL import Image
import torch
import torch.nn as nn
from torchvision import models
import torchvision.transforms as transforms
import numpy as np
import re  # Import regular expression module for extracting numbers


# Load pretrained VGG-19 model and define custom network
class VGG19Extractor(nn.Module):
    def __init__(self):
        super(VGG19Extractor, self).__init__()
        vgg19 = models.vgg19(pretrained=True)
        self.features = nn.Sequential(*list(vgg19.features.children())[:26])  # Extract up to conv5_1
        self.max_pool = nn.AdaptiveMaxPool2d((1, 1))  # Adaptive max pooling to 1x1

    def forward(self, x):
        x = self.features(x)  # Extract conv5_1 output
        x = self.max_pool(x)  # Max pooling
        x = x.view(x.size(0), -1)  # Flatten to 1D vector
        return x


# Define image preprocessing function
def preprocess_image(image_path, device):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize image to 224x224
        transforms.ToTensor(),  # Convert to Tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
    ])
    image = Image.open(image_path).convert('RGB')  # Open image and convert to RGB format
    input_tensor = transform(image).unsqueeze(0)  # Add batch dimension
    return input_tensor.to(device)  # Move input tensor to specified device


# Sort filenames in numerical order
def sorted_alphanumeric(data):
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(data, key=alphanum_key)


# Batch process images in folder
def process_images_in_folder(folder_path, model, device):
    features_list = []  # Store features for each image

    # Get file list and sort in numerical order
    filenames = sorted_alphanumeric(os.listdir(folder_path))

    for filename in filenames:
        if filename.endswith(('.png', '.jpg', '.jpeg')):  # Process only image files
            image_path = os.path.join(folder_path, filename)
            try:
                # Preprocess image
                input_tensor = preprocess_image(image_path, device)

                # Extract features using model
                with torch.no_grad():  # Disable gradient computation to save memory
                    features = model(input_tensor)

                # Convert features to numpy array and store
                features_numpy = features.squeeze().cpu().numpy()  # Move back to CPU and convert to NumPy array
                features_list.append(features_numpy)

                print(f"Processed: {filename}")
            except Exception as e:
                print(f"Error processing {filename}: {e}")

    return features_list


# Main program
if __name__ == "__main__":
    # Folder path
    folder_path = "C:/Users/12152/Desktop/data/sepia4/vision_white_after/segmentation_histogram_equalization_test"  # Replace with your image folder path

    # Detect device (GPU or CPU)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Create model instance and move to specified device
    model = VGG19Extractor().to(device)
    model.eval()  # Set to evaluation mode

    # Batch process images
    features_list = process_images_in_folder(folder_path, model, device)

    # Convert feature list to NumPy array
    features_array = np.array(features_list)  # Shape: (N, 512), where N is the number of images

    # Save as .npy file
    np.save("C:/Users/12152/Desktop/data/sepia4/vision_white_after/segmentation_histogram_equalization_test/features.npy", features_array)
    print("Features saved to 'features.npy'")

    # Output features (can be saved to file or processed further)
    if features_list:
        print("Feature extraction completed. Number of images processed:", len(features_list))
    else:
        print("No images were processed.")