# 1. Import libraries

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from skimage import filters, measure

# 2. Initialized path configuration for datasets below
1. Ulcer cell
2. Healthy skin cell
3. Testset (External) that compromise of only ulcer cells

In [2]:
# === PATH CONFIGURATION ===
DATA_ROOT = "Raw_Dataset"
ULCER_DIR = os.path.join(DATA_ROOT, "Ulcer")
HEALTHY_DIR = os.path.join(DATA_ROOT, "Healthy")
TESTSET_DIR = os.path.join(DATA_ROOT, "TestSet")

# 3. FUNCTION 1: IMAGE STANDARDIZATION

In [3]:

def standardize_image(path, size=(256, 256)):
    """Resizes and applies median filtering for edge-preserving denoising."""
    img_bgr = cv2.imread(path)
    if img_bgr is None:
        return None
    # Resize for engineering standardization
    resized = cv2.resize(img_bgr, size)
    # Median blur removes sensor noise while keeping wound edges sharp
    denoised = cv2.medianBlur(resized, 5)
    return denoised

# 4. FUNCTION 2: COLOR SPACE TRANSFORMATION

In [4]:
def get_clinical_color_channels(img_bgr):
    """Converts to LAB space to isolate inflammation (A) and necrosis (L)."""
    # L = Lightness, A = Red-Green axis, B = Yellow-Blue axis
    img_lab = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB)
    l_chan, a_chan, b_chan = cv2.split(img_lab)
    return l_chan, a_chan

# 5. FUNCTION 3: MORPHOLOGICAL SEGMENTATION

In [5]:
def segment_wound_region(l_channel):
    """Uses Otsu's thresholding on Lightness to find the wound boundary."""
    try:
        thresh = filters.threshold_otsu(l_channel)
        # Ulcers typically appear darker (lower L) than surrounding skin
        mask = l_channel < thresh 
        labeled = measure.label(mask)
        props = measure.regionprops(labeled)
        if not props:
            return None
        # Return the largest detected region (the primary ulcer)
        return max(props, key=lambda r: r.area)
    except:
        return None

# 6. FUNCTION 4: FEATURE ENGINEERING

In [6]:
def calculate_metrics(region, l_chan, a_chan, img_bgr, label):
    """Calculates shape and bio-medical color features."""
    return {
        # Shape features (Refining junior's area/perimeter)
        "area": region.area,
        "perimeter": region.perimeter,
        "solidity": region.solidity,
        "eccentricity": region.eccentricity,
        # Bio-medical features (Your innovative value-add)
        "erythema_index": np.mean(a_chan), # Quantifies redness/infection
        "necrosis_index": np.mean(l_chan),  # Quantifies tissue darkening
        "color_variance": np.std(img_bgr),  # Measures wound complexity
        "ulcer_present": label
    }

# 7. MAIN PIPELINE
This code runs the entire functions above

In [7]:
def run_preprocessing_pipeline(data_dirs):
    all_rows = []
    for folder, label in data_dirs:
        print(f"\nAnalyzing {folder}...")
        for file in tqdm(os.listdir(folder)):
            path = os.path.join(folder, file)
            
            # Step-by-step execution
            img = standardize_image(path)
            if img is None: continue
            
            l_c, a_c = get_clinical_color_channels(img)
            region = segment_wound_region(l_c)
            
            if region:
                features = calculate_metrics(region, l_c, a_c, img, label)
                all_rows.append(features)
    
    return pd.DataFrame(all_rows)

# 8. Execution code
1. Call the pipeline function for all 3 datasets
1. Ulcer cell and  Healthy skin cell is extracted into dfu_features.csv
3. Testset (External) that compromise of only ulcer cells is extracted into dfu_features_testset.csv

In [8]:
# Execution
DATA_CONFIG = [(ULCER_DIR, 1), (HEALTHY_DIR, 0)]
df_final = run_preprocessing_pipeline(DATA_CONFIG)
df_final.to_csv("0_dfu_features.csv", index=False)

# NEW EXECUTION BLOCK FOR EXTERNAL DATASET
TEST_DATA_CONFIG = [(TESTSET_DIR, 1)]
df_external = run_preprocessing_pipeline(TEST_DATA_CONFIG)
df_external.to_csv("1_dfu_features_testset.csv", index=False)


Analyzing Raw_Dataset\Ulcer...


100%|██████████| 512/512 [00:05<00:00, 86.72it/s] 



Analyzing Raw_Dataset\Healthy...


100%|██████████| 543/543 [00:05<00:00, 101.99it/s]



Analyzing Raw_Dataset\TestSet...


100%|██████████| 167/167 [00:01<00:00, 85.63it/s]
