In [None]:
# Install required packages
!pip install scikit-image opencv-python-headless tqdm

import os
import numpy as np
import cv2
from google.colab import drive
import pandas as pd
from skimage.feature import graycomatrix, graycoprops
from scipy.stats import skew, kurtosis
import zipfile
from tqdm import tqdm
import concurrent.futures
import threading
from concurrent.futures import ThreadPoolExecutor



In [None]:
# Mount Google Drive
drive.mount('/content/drive')

In [None]:
extract_path = '/content/'

In [None]:
# Specify the path to your zip file in Google Drive
zip_path = '/content/drive/MyDrive/Adultered_Project/Augemented_Data.zip'
extract_path = '/content/'
print("Extracting zip file...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [None]:
def calculate_glcm_features(img_gray):
    """Calculate GLCM features efficiently"""
    glcm = graycomatrix(img_gray, distances=[1], angles=[0], levels=256,
                        symmetric=True, normed=True)
    return {
        'Contrast': graycoprops(glcm, 'contrast')[0][0],
        'Energy': graycoprops(glcm, 'energy')[0][0],
        'Homogeneity': graycoprops(glcm, 'homogeneity')[0][0]
    }

In [None]:
def calculate_color_stats(img_rgb):
    """Calculate color statistics efficiently using NumPy operations"""
    means = np.mean(img_rgb, axis=(0,1))
    stds = np.std(img_rgb, axis=(0,1))
    return {
        'Mean_R': means[0],
        'Mean_G': means[1],
        'Mean_B': means[2],
        'Std_R': stds[0],
        'Std_G': stds[1],
        'Std_B': stds[2]
    }

In [None]:
def calculate_intensity_stats(intensity):
    """Calculate intensity statistics efficiently"""
    flat_intensity = intensity.flatten()
    return {
        'var_intensity': np.var(flat_intensity),
        'skewness': skew(flat_intensity),
        'kurtosis': kurtosis(flat_intensity)
    }

In [None]:
def calculate_color_histogram(img):
    """Calculate color histogram features efficiently"""
    hist = cv2.calcHist([img], [0,1,2], None, [8,8,8], [0,256,0,256,0,256])
    hist = cv2.normalize(hist, hist).flatten()
    return {
        'Color_Hist_1': hist[0],
        'Color_Hist_2': hist[1],
        'Color_Hist_3': hist[2]
    }

In [None]:
def extract_features(image_path):
    """Extract all features for a single image"""
    try:
        # Read image
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError(f"Failed to read image: {image_path}")

        height, width = img.shape[:2]

        # Convert to RGB and grayscale (do this once)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Extract all features
        features = {}
        features.update(calculate_glcm_features(img_gray))
        features.update(calculate_color_stats(img_rgb))
        features.update(calculate_intensity_stats(img_gray))
        features.update(calculate_color_histogram(img))
        features.update({
            'Height': height,
            'Width': width,
            'Image_Path': image_path
        })

        return features

    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        return None

In [None]:
def process_images_in_batches(image_paths, batch_size=16):
    """Process images in parallel batches"""
    results = []

    with ThreadPoolExecutor(max_workers=batch_size) as executor:
        futures = []
        for image_path in image_paths:
            future = executor.submit(extract_features, image_path)
            futures.append(future)

        # Show progress bar
        for future in tqdm(concurrent.futures.as_completed(futures),
                          total=len(futures),
                          desc="Processing images"):
            result = future.result()
            if result is not None:
                results.append(result)

    return results

In [None]:
# Get list of all image files
print("Collecting image paths...")
image_paths = []
for root, dirs, files in os.walk(extract_path):
    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_paths.append(os.path.join(root, file))

print(f"Found {len(image_paths)} images")

# Process images
print("Processing images...")
all_features = process_images_in_batches(image_paths)


Collecting image paths...
Found 10043 images
Processing images...


Processing images:   0%|          | 41/10043 [00:01<04:34, 36.38it/s]

Error processing /content/drive/MyDrive/dc.png: Failed to read image: /content/drive/MyDrive/dc.png


Processing images:   1%|          | 60/10043 [00:01<02:54, 57.23it/s]

Error processing /content/drive/MyDrive/IMG_20190522_132854.jpg: Failed to read image: /content/drive/MyDrive/IMG_20190522_132854.jpg
Error processing /content/drive/MyDrive/IMG_20190522_132844.jpg: Failed to read image: /content/drive/MyDrive/IMG_20190522_132844.jpg
Error processing /content/drive/MyDrive/IMG_20190522_131757.jpg: Failed to read image: /content/drive/MyDrive/IMG_20190522_131757.jpg
Error processing /content/drive/MyDrive/IMG_20190522_133452.jpg: Failed to read image: /content/drive/MyDrive/IMG_20190522_133452.jpg


Processing images:  92%|█████████▏| 9285/10043 [02:47<00:28, 26.23it/s]

Error processing /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._i7.jpg: Failed to read image: /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._i7.jpg


Processing images:  93%|█████████▎| 9292/10043 [02:47<00:29, 25.40it/s]

Error processing /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._i6.jpg: Failed to read image: /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._i6.jpg
Error processing /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._i10.jpg: Failed to read image: /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._i10.jpg
Error processing /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._i8.jpg: Failed to read image: /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._i8.jpg
Error processing /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._i13.png: Failed to read image: /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR D

Processing images:  93%|█████████▎| 9296/10043 [02:48<00:30, 24.88it/s]

Error processing /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._sphotos.jpg: Failed to read image: /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._sphotos.jpg


Processing images:  93%|█████████▎| 9307/10043 [02:48<00:26, 27.65it/s]

Error processing /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._i12.jpg: Failed to read image: /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._i12.jpg
Error processing /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._i1.jpg: Failed to read image: /content/drive/Othercomputers/My Laptop/Desktop/Files/TOUR DEMO/__MACOSX/TOUR DEMO/Photos Collections/._i1.jpg


  'skewness': skew(flat_intensity),
  'kurtosis': kurtosis(flat_intensity)
Processing images: 100%|██████████| 10043/10043 [03:11<00:00, 52.34it/s]


In [None]:
# Create DataFrame
print("Creating DataFrame...")
df = pd.DataFrame(all_features)

# Save to CSV
output_path = '/content/drive/MyDrive/image_features.csv'
print(f"Saving results to {output_path}")
df.to_csv(output_path, index=False)
print("Done!")

# Print some statistics
print("\nSummary:")
print(f"Total images processed: {len(df)}")
print(f"Features extracted per image: {len(df.columns)-1}")  # -1 for Image_Path column
print("\nFeature preview:")
print(df.head())

Creating DataFrame...
Saving results to /content/drive/MyDrive/image_features.csv
Done!

Summary:
Total images processed: 10027
Features extracted per image: 17

Feature preview:
     Contrast    Energy  Homogeneity      Mean_R      Mean_G      Mean_B  \
0  124.146354  0.240403     0.782862  159.728081  152.245370  149.369045   
1  124.146354  0.240403     0.782862  159.728081  152.245370  149.369045   
2  268.666494  0.017711     0.126736  175.717906  171.354129  167.148680   
3  129.316790  0.128459     0.745999  180.562294  181.007283  177.877115   
4  167.648185  0.144648     0.634382  154.494425  151.131818  147.306783   

       Std_R      Std_G      Std_B  var_intensity  skewness   kurtosis  \
0  21.193808  21.032794  21.022642     443.528294 -3.630939  16.965012   
1  21.193808  21.032794  21.022642     443.528294 -3.630939  16.965012   
2  36.006704  35.993231  35.850221    1294.975955 -1.646531   3.839517   
3  25.824347  26.230856  26.429890     681.557886 -2.600118   8.0597