In [7]:
import os
import cv2
import numpy as np
import pywt
from skimage import io, color, feature
from scipy import fftpack, stats
from skimage.measure import shannon_entropy

# Define the feature extraction function for each image
def extract_features(image_path):
    # Load image
    image = io.imread(image_path)

    # Resize the image to 400x400
    resized_image = cv2.resize(image, (400, 400))

    # Convert to grayscale
    gray_image = color.rgb2gray(resized_image)

    # Spatial domain features
    features = {}

    # Mean, Median, Standard Deviation
    features['mean'] = np.mean(gray_image)
    features['median'] = np.median(gray_image)
    features['std_dev'] = np.std(gray_image)

    # Maximum and Minimum pixel values
    features['max'] = np.max(gray_image)
    features['min'] = np.min(gray_image)

    # Entropy
    features['entropy'] = shannon_entropy(gray_image)

    # Edge detection using Canny
    edges = feature.canny(gray_image, sigma=1, low_threshold=0.2)
    features['edges_mean'] = np.mean(edges)

    # Frequency domain features using FFT
    fft_image = fftpack.fft2(gray_image)
    fft_image_shifted = fftpack.fftshift(fft_image)
    fft_magnitude = np.abs(fft_image_shifted)

    # FFT statistics
    features['fft_mean'] = np.mean(fft_magnitude)
    features['fft_median'] = np.median(fft_magnitude)
    features['fft_std'] = np.std(fft_magnitude)
    features['fft_max'] = np.max(fft_magnitude)
    features['fft_min'] = np.min(fft_magnitude)

    # Haar Wavelet Transform
    coeffs = pywt.dwt2(gray_image, 'haar')
    cA, (cH, cV, cD) = coeffs

    # Haar Wavelet statistics (use approximation coefficients)
    features['haar_mean'] = np.mean(cA)
    features['haar_median'] = np.median(cA)
    features['haar_std'] = np.std(cA)
    features['haar_max'] = np.max(cA)
    features['haar_min'] = np.min(cA)

    # DCT (Discrete Cosine Transform)
    dct_image = fftpack.dct(fftpack.dct(gray_image.T, norm='ortho').T, norm='ortho')

    # DCT statistics
    features['dct_mean'] = np.mean(dct_image)
    features['dct_median'] = np.median(dct_image)
    features['dct_std'] = np.std(dct_image)
    features['dct_max'] = np.max(dct_image)
    features['dct_min'] = np.min(dct_image)

    return features

# Folder containing images
image_folder = 'val_data'

# List to store feature vectors
all_features = []

# Iterate through each class folder and extract features from each image
for category_folder in os.listdir(image_folder):
    category_path = os.path.join(image_folder, category_folder)
    
    if os.path.isdir(category_path):
        for image_file in os.listdir(category_path):
            image_path = os.path.join(category_path, image_file)
            image_features = extract_features(image_path)
            image_features['label'] = category_folder  # Add the class label
            all_features.append(image_features)

# Convert list of dictionaries to a DataFrame (for easier processing with scikit-learn)
import pandas as pd
df_features = pd.DataFrame(all_features)

# Save to CSV for further use
df_features.to_csv('image_val.csv', index=False)
