In [5]:
import os
import numpy as np
from skimage import io, filters, feature
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.pipeline import Pipeline

# Load dataset
ROOT_DIR = r"K:\ALL\coen\laser\exp1\output"
class_map = {"clean": 0, "soiled": 1}

image_paths = []
labels = []

for class_name, label in class_map.items():
    class_dir = os.path.join(ROOT_DIR, class_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith((".tif", ".tiff")):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(label)

labels = np.array(labels)
print(f"Total images: {len(image_paths)}")

# Feature extraction functions
def extract_channel_stats(channel):
    """Basic statistics per channel"""
    stats = [
        np.mean(channel),
        np.std(channel),
        np.min(channel),
        np.max(channel),
        np.median(channel),
        np.percentile(channel, 10),
        np.percentile(channel, 90),
    ]
    return stats

# Describes distribution of brightness values
def extract_histogram(channel, bins=16):
    hist, _ = np.histogram(channel, bins=bins, range=(0, 255))
    hist = hist.astype(float) / np.sum(hist)  # normalize
    return hist.tolist()

# Describes texture differences
def extract_lbp(channel, P=8, R=1):
    # Normalize to 8-bit
    if channel.dtype != np.uint8:
        ch = ((channel - channel.min()) / (channel.max() - channel.min()) * 255).astype(np.uint8)
    else:
        ch = channel

    lbp = feature.local_binary_pattern(ch, P=P, R=R, method='uniform')
    lbp_hist, _ = np.histogram(lbp, bins=int(lbp.max() + 1), range=(0, lbp.max() + 1))
    lbp_hist = lbp_hist.astype(float) / np.sum(lbp_hist)
    return lbp_hist.tolist()

# Describes edges
def extract_edges(channel):
    edges = filters.sobel(channel)
    return [np.mean(edges), np.std(edges)]

def extract_features(img, channels='all'):
    feats = []
    if channels == 'all':
        ch_indices = range(img.shape[2])
    elif channels == 'rgb':
        ch_indices = range(3)
    elif channels == 'laser':
        ch_indices = range(3, img.shape[2])
    else:
        raise ValueError("Invalid channel selection")

    for i in ch_indices:
        ch = img[:, :, i]
        feats += extract_channel_stats(ch)
        feats += extract_histogram(ch)
        feats += extract_lbp(ch)
        feats += extract_edges(ch)
    return np.array(feats)

# Build feature matrices for different modalities
X_all = []
X_rgb = []
X_laser = []

print("Extracting features")
for path in image_paths:
    img = io.imread(path)
    if img.ndim != 3 or img.shape[2] != 5:
        raise ValueError(f"Image {path} is not 5-channel")
    
    X_all.append(extract_features(img, channels='all'))
    X_rgb.append(extract_features(img, channels='rgb'))
    X_laser.append(extract_features(img, channels='laser'))

X_all = np.array(X_all)
X_rgb = np.array(X_rgb)
X_laser = np.array(X_laser)

print(f"Feature shapes: all={X_all.shape}, rgb={X_rgb.shape}, laser={X_laser.shape}")

# 5-fold cv for the three modalities
def evaluate_svm(X, y, description):
    scaler = StandardScaler()
    clf = LinearSVC(max_iter=5000, class_weight='balanced', random_state=42)
    pipe = Pipeline([('scaler', scaler), ('svm', clf)])

    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_val_score(pipe, X, y, cv=cv, scoring='accuracy')
    bal_scores = cross_val_score(pipe, X, y, cv=cv, scoring='balanced_accuracy')
    print(f"\n{description}: Accuracy = {scores.mean():.3f} ± {scores.std():.3f}")
    print(f"{description}: Balanced Acc = {bal_scores.mean():.3f} ± {bal_scores.std():.3f}")
    return scores, bal_scores

print("Evaluating models")

scores_all, bal_scores_all = evaluate_svm(X_all, labels, "RGB + Laser")
scores_rgb, bal_scores_rgb = evaluate_svm(X_rgb, labels, "RGB only")
scores_laser, bal_scores_laser = evaluate_svm(X_laser, labels, "Laser only")




Total images: 80
Extracting features
Feature shapes: all=(80, 175), rgb=(80, 105), laser=(80, 70)
Evaluating models

RGB + Laser: Accuracy = 0.662 ± 0.064
RGB + Laser: Balanced Acc = 0.558 ± 0.086

RGB only: Accuracy = 0.625 ± 0.079
RGB only: Balanced Acc = 0.500 ± 0.083

Laser only: Accuracy = 0.388 ± 0.061
Laser only: Balanced Acc = 0.292 ± 0.037
