<a href="https://colab.research.google.com/github/AkarshBhatia/LeafC/blob/main/CODE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from zipfile import ZipFile

# Specify the path to your ZIP file
zip_file_path = '/content/Training-20251026T192946Z-1-001.zip'

# Specify the directory where you want to extract the contents
# If not provided, it extracts to the current working directory
extract_to_directory = 'extracted_contents'

with ZipFile(zip_file_path, 'r') as zip_object:
    zip_object.extractall(extract_to_directory)

In [16]:
XYZ = input("ENTER BASEPATH OF ALL 5 CLASSES TRAINING FOLDER")
ERT = input("ENTER PATH OF ANY ONE LEAF IMAGE SAMPLE CHOSEN FROM KNOWN/UNKNOWN FOLDER")

ENTER BASEPATH OF ALL 5 CLASSES TRAINING FOLDER/content/extracted_contents/Training
ENTER PATH OF ANY ONE LEAF IMAGE SAMPLE CHOSEN FROM KNOWN/UNKNOWN FOLDER/content/extracted_contents/Training/jatropha/0006_0026.JPG


In [32]:
import os
import cv2
import numpy as np
from skimage.feature import graycomatrix, graycoprops
from sklearn.svm import OneClassSVM, SVC
from sklearn.preprocessing import StandardScaler
import warnings

# ============== Noise control ==============
warnings.filterwarnings("ignore", category=UserWarning, module='skimage')
warnings.filterwarnings("ignore", category=RuntimeWarning)

# ============== Feature extraction ==============
def segment_leaf(image_path):
    """Segments the leaf from the background."""
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(f"FATAL: Image not found at {image_path}. Please check the path.")

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    _, binary_mask = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        leaf_mask = np.zeros_like(gray, dtype=np.uint8)
        cv2.drawContours(leaf_mask, [largest_contour], -1, 255, cv2.FILLED)
        return leaf_mask, image
    return np.zeros_like(gray, dtype=np.uint8), image

def calculate_morphological_features(leaf_mask):
    """Calculates area, perimeter, aspect ratio, and circularity."""
    contours, _ = cv2.findContours(leaf_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return {'area': 0, 'perimeter': 0, 'aspect_ratio': 0, 'circularity': 0}

    cnt = max(contours, key=cv2.contourArea)
    area = cv2.contourArea(cnt)
    perimeter = cv2.arcLength(cnt, True)
    x, y, w, h = cv2.boundingRect(cnt)

    aspect_ratio = float(w) / h if h != 0 else 0
    circularity = (4 * np.pi * area) / (perimeter ** 2) if perimeter != 0 else 0

    return {'area': area, 'perimeter': perimeter, 'aspect_ratio': aspect_ratio, 'circularity': circularity}

def calculate_texture_features(image, leaf_mask):
    """Calculates contrast, correlation, and energy."""
    gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    masked_gray = cv2.bitwise_and(gray_img, gray_img, mask=leaf_mask)
    masked_gray_norm = cv2.normalize(masked_gray, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)

    try:
        glcm = graycomatrix(masked_gray_norm, distances=[1], angles=[0],
                            levels=256, symmetric=True, normed=True)
        contrast = graycoprops(glcm, 'contrast')[0, 0]
        correlation = graycoprops(glcm, 'correlation')[0, 0]
        energy = graycoprops(glcm, 'energy')[0, 0]
    except ValueError:
        return {'contrast': 0, 'correlation': 0, 'energy': 0}

    return {'contrast': contrast, 'correlation': correlation, 'energy': energy}

def extract_all_features(image_path):
    """Combines all feature extraction steps for a single image."""
    try:
        leaf_mask, image = segment_leaf(image_path)
        if np.sum(leaf_mask) == 0:
            print(f"Warning: No leaf detected in {image_path}. Returning zero features.")
            return [0] * 7

        morph = calculate_morphological_features(leaf_mask)
        tex = calculate_texture_features(image, leaf_mask)

        return [
            morph['area'], morph['perimeter'], morph['aspect_ratio'],
            morph['circularity'], tex['contrast'], tex['correlation'], tex['energy']
        ]
    except Exception as e:
        print(f"Error extracting features from {image_path}: {e}")
        return [0] * 7

# ============== Data loading (ONE class per call) ==============
def process_training_data(folder_path):
    """Processes all images inside ONE class folder."""
    class_data = {}
    class_name = os.path.basename(folder_path.rstrip(os.sep))
    class_data[class_name] = []

    print(f"Processing training data for class: {class_name}")

    if not os.path.isdir(folder_path):
        raise FileNotFoundError(f"Error: Folder not found -> {folder_path}")

    image_files = [f for f in os.listdir(folder_path)
                   if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    if not image_files:
        raise RuntimeError(f"No images found in {folder_path}")

    for img_file in image_files:
        img_path = os.path.join(folder_path, img_file)
        features = extract_all_features(img_path)
        if sum(features) > 0:
            class_data[class_name].append(features)

    print(f"  -> {len(class_data[class_name])} images processed.")
    return class_data, [class_name]  # return class name as a list

# ============== Training helpers ==============
def train_one_class_models_per_folder(folder_paths, nu=0.3, gamma='auto'):
    """
    Trains One-Class SVMs strictly one class at a time (each on its own folder).
    Returns:
        oc_models: {class_name: OneClassSVM}
        oc_scalers: {class_name: StandardScaler}
        class_names: [class_name, ...] in discovered order
    """
    oc_models, oc_scalers, class_names = {}, {}, []

    print("\nTraining One-Class SVMs (per class)...")
    for class_folder in folder_paths:
        cd, cn = process_training_data(class_folder)  # one class per call
        cname = cn[0]
        X = np.array(cd[cname])

        if X.shape[0] < 2:
            raise RuntimeError(f"Class '{cname}' needs >=2 samples; got {X.shape[0]}.")

        scaler = StandardScaler().fit(X)
        Xs = scaler.transform(X)

        oc = OneClassSVM(kernel='rbf', nu=nu, gamma=gamma).fit(Xs)

        oc_models[cname] = oc
        oc_scalers[cname] = scaler
        class_names.append(cname)
        print(f"  - {cname}: {X.shape[0]} samples")

    return oc_models, oc_scalers, class_names

def collect_all_classes(folder_paths):
    """
    Aggregates features from all class folders into one dict for multi-class training.
    Returns:
        class_data: {class_name: [feature_vec, ...]}
        class_names: [class_name, ...]
    """
    all_class_data, all_class_names = {}, []
    for p in folder_paths:
        cd, cn = process_training_data(p)  # one class
        cname = cn[0]
        all_class_data[cname] = cd[cname]
        all_class_names.append(cname)
    return all_class_data, all_class_names

def train_multiclass_svm(class_data, class_names, kernel='rbf', gamma='auto', ovo_or_ovr='ovo'):
    """
    Trains a single multi-class SVM over all classes.
    SVC is OvO by default; set decision_function_shape='ovr' for OvR.
    Returns: mc_svm, mc_scaler
    """
    X, y = [], []
    name_to_idx = {name: i for i, name in enumerate(class_names)}
    for cname in class_names:
        feats = class_data.get(cname, [])
        for feat in feats:
            X.append(feat)
            y.append(name_to_idx[cname])

    if len(set(y)) < 2:
        raise RuntimeError("Need at least 2 classes with data to train multi-class SVM.")

    X = np.array(X); y = np.array(y)
    mc_scaler = StandardScaler().fit(X)
    Xs = mc_scaler.transform(X)

    decision_shape = 'ovr' if ovo_or_ovr.lower() == 'ovr' else 'ovo'
    mc_svm = SVC(kernel=kernel, gamma=gamma, probability=True,
                 decision_function_shape=decision_shape).fit(Xs, y)

    print("\nMulti-Class SVM trained on:",
          {c: len(class_data[c]) for c in class_names})
    return mc_svm, mc_scaler
# ============== Inference ==============
def classify_with_gate(image_path, oc_models, oc_scalers, mc_svm, mc_scaler, class_names):
    """
    1) Gate with per-class One-Class SVMs (trained one-at-a-time).
    2) If any OC accepts, do final multi-class classification over all classes.
    """
    print(f"\n--- Classifying {os.path.basename(image_path)} ---")
    feats = extract_all_features(image_path)
    if sum(feats) == 0:
        print("Result: Could not extract features from the image.")
        return

    x = np.array(feats).reshape(1, -1)

    print("Step 1: One-Class SVM gate...")
    accepted = []
    for cname, oc in oc_models.items():
        xs = oc_scalers[cname].transform(x)
        if oc.predict(xs)[0] == 1:
            accepted.append(cname)

    if not accepted:
        print("\nResult: UNKNOWN (rejected by all One-Class models).")
        return

    print(f"  Accepted by: {accepted}")

    print("\nStep 2: Multi-Class SVM over all classes...")
    x_mc = mc_scaler.transform(x)
    idx = mc_svm.predict(x_mc)[0]
    final_name = class_names[idx]
    print(f"\n>>> Final Result: {final_name}")

# ============== Main ==============
if __name__ == "__main__":
    # TODO: set this to the parent folder that contains your 5 class subfolders
    base_path = XYZ

    # TODO: make sure these names EXACTLY match your directory names
    class_folder_names = [
        "alstonia",
        "chinar",
        "jatropha",
        "mango",
        "pongamia"
    ]
    folder_paths = [os.path.join(base_path, n) for n in class_folder_names]

    print("Folder check:")


    # TODO: set the path to a test image you want to classify
    TEST_IMAGE_PATH = ERT

    try:
        # One-Class SVMs: train one class at a time (your rule)
        oc_models, oc_scalers, class_names = train_one_class_models_per_folder(
            folder_paths, nu=0.1, gamma='auto'
        )

        # Multi-Class SVM: trained on all classes together
        class_data_mc, class_names_mc = collect_all_classes(folder_paths)

        # Keep order consistent with OC training (reindex if needed)
        if class_names != class_names_mc:
            # Reorder class_data_mc to follow class_names order
            class_data_mc = {name: class_data_mc[name] for name in class_names if name in class_data_mc}

        mc_svm, mc_scaler = train_multiclass_svm(
            class_data_mc, class_names, kernel='rbf', gamma='auto', ovo_or_ovr='ovo'
        )

        # Inference
        classify_with_gate(
            image_path=TEST_IMAGE_PATH,
            oc_models=oc_models,
            oc_scalers=oc_scalers,
            mc_svm=mc_svm,
            mc_scaler=mc_scaler,
            class_names=class_names
        )

    except (FileNotFoundError, NotADirectoryError, RuntimeError) as e:
        print(f"\nAn error occurred: {e}")
        print("Please ensure your paths, folder names, and data are correct and try again.")
    except Exception as e:
        print(f"\nAn unexpected error occurred: {e}")

Folder check:

Training One-Class SVMs (per class)...
Processing training data for class: alstonia
  -> 200 images processed.
  - alstonia: 200 samples
Processing training data for class: chinar
  -> 200 images processed.
  - chinar: 200 samples
Processing training data for class: jatropha
  -> 200 images processed.
  - jatropha: 200 samples
Processing training data for class: mango
  -> 200 images processed.
  - mango: 200 samples
Processing training data for class: pongamia
  -> 200 images processed.
  - pongamia: 200 samples
Processing training data for class: alstonia
  -> 200 images processed.
Processing training data for class: chinar
  -> 200 images processed.
Processing training data for class: jatropha
  -> 200 images processed.
Processing training data for class: mango
  -> 200 images processed.
Processing training data for class: pongamia
  -> 200 images processed.

Multi-Class SVM trained on: {'alstonia': 200, 'chinar': 200, 'jatropha': 200, 'mango': 200, 'pongamia': 200}



In [31]:
import os
import cv2
import numpy as np
from skimage.feature import graycomatrix, graycoprops
from sklearn.svm import OneClassSVM, SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import warnings

# Quiet minor warnings
warnings.filterwarnings("ignore", category=UserWarning, module='skimage')
warnings.filterwarnings("ignore", category=RuntimeWarning)

# ===================== Feature extraction =====================
def segment_leaf(image_path):
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(f"FATAL: Image not found at {image_path}")
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    _, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest = max(contours, key=cv2.contourArea)
        mask = np.zeros_like(gray, dtype=np.uint8)
        cv2.drawContours(mask, [largest], -1, 255, cv2.FILLED)
        return mask, image
    return np.zeros_like(gray, dtype=np.uint8), image

def calculate_morphological_features(mask):
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return {'area': 0, 'perimeter': 0, 'aspect_ratio': 0, 'circularity': 0}
    cnt = max(contours, key=cv2.contourArea)
    area = cv2.contourArea(cnt)
    per = cv2.arcLength(cnt, True)
    x, y, w, h = cv2.boundingRect(cnt)
    ar = float(w) / h if h else 0
    circ = (4 * np.pi * area) / (per ** 2) if per else 0
    return {'area': area, 'perimeter': per, 'aspect_ratio': ar, 'circularity': circ}

def calculate_texture_features(image, mask):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    masked = cv2.bitwise_and(gray, gray, mask=mask)
    norm = cv2.normalize(masked, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    try:
        glcm = graycomatrix(norm, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
        return {
            'contrast':   graycoprops(glcm, 'contrast')[0, 0],
            'correlation':graycoprops(glcm, 'correlation')[0, 0],
            'energy':     graycoprops(glcm, 'energy')[0, 0],
        }
    except ValueError:
        return {'contrast': 0, 'correlation': 0, 'energy': 0}

def extract_all_features(image_path):
    try:
        mask, img = segment_leaf(image_path)
        if np.sum(mask) == 0:
            # print(f"Warning: No leaf detected in {image_path}.")
            return None
        morph = calculate_morphological_features(mask)
        tex = calculate_texture_features(img, mask)
        return [
            morph['area'], morph['perimeter'], morph['aspect_ratio'],
            morph['circularity'], tex['contrast'], tex['correlation'], tex['energy']
        ]
    except Exception as e:
        print(f"Error extracting features from {image_path}: {e}")
        return None

# ===================== Data loading =====================
def load_all_data(parent_dir):
    """Scan parent_dir for class subfolders, build X, y, class_names."""
    X, y, class_names = [], [], []
    if not os.path.isdir(parent_dir):
        raise NotADirectoryError(f"FATAL: '{parent_dir}' not found.")
    class_dirs = sorted([d for d in os.listdir(parent_dir)
                         if os.path.isdir(os.path.join(parent_dir, d))])
    if not class_dirs:
        raise NotADirectoryError(f"FATAL: No class subdirectories in '{parent_dir}'.")
    print("Loading data...")
    for idx, cname in enumerate(class_dirs):
        cdir = os.path.join(parent_dir, cname)
        class_names.append(cname)
        imgs = [os.path.join(cdir, f) for f in os.listdir(cdir)
                if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        print(f"  - {cname}: {len(imgs)} files")
        for p in imgs:
            feats = extract_all_features(p)
            if feats is not None:
                X.append(feats); y.append(idx)
    if not X:
        raise RuntimeError("No valid images found after feature extraction.")
    print(f"Loaded {len(X)} samples from {len(class_names)} classes.")
    return np.array(X), np.array(y), class_names

# ===================== Training =====================
def train_one_class_models(X_train, y_train, class_names, nu=0.3, gamma='auto'):
    """Train OC-SVM per class on that class's subset only."""
    oc_models, oc_scalers = {}, {}
    print("\nTraining One-Class SVMs (per class)...")
    for i, cname in enumerate(class_names):
        Xi = X_train[y_train == i]
        if Xi.shape[0] < 2:
            print(f"  ! Skipping '{cname}' (<2 samples)")
            continue
        scaler = StandardScaler().fit(Xi)
        Xs = scaler.transform(Xi)
        oc = OneClassSVM(kernel='rbf', nu=nu, gamma=gamma).fit(Xs)
        oc_models[cname] = oc
        oc_scalers[cname] = scaler
        print(f"  - {cname}: {Xi.shape[0]} samples")
    if not oc_models:
        raise RuntimeError("No OC models trained; not enough data per class.")
    return oc_models, oc_scalers

def train_multiclass_svm(X_train, y_train, kernel='rbf', gamma='auto'):
    """Train a single multi-class SVM (OvO default)."""
    print("\nTraining Multi-Class SVM...")
    scaler = StandardScaler().fit(X_train)
    Xs = scaler.transform(X_train)
    clf = SVC(kernel=kernel, gamma=gamma, probability=True).fit(Xs, y_train)
    print("Multi-Class SVM trained.")
    return clf, scaler

# ===================== Evaluation (OC gate + Multi) =====================
def evaluate_model(X_data, y_data, oc_models, oc_scalers, mc_svm, mc_scaler, class_names):
    """
    OC gate: if no OC accepts -> UNKNOWN; else final label from multi-class SVM.
    UNKNOWN index = len(class_names) (added only for report).
    """
    print("\nEvaluating (OC gate -> Multi)...")
    UNKNOWN_IDX = len(class_names)
    y_pred = []

    for feats in X_data:
        x = feats.reshape(1, -1)
        accepted = False
        # Gate with all OC models
        for cname, oc in oc_models.items():
            xs = oc_scalers[cname].transform(x)
            if oc.predict(xs)[0] == 1:
                accepted = True
                break
        if not accepted:
            y_pred.append(UNKNOWN_IDX)
            continue
        # Multi-class decision
        x_mc = mc_scaler.transform(x)
        y_pred.append(int(mc_svm.predict(x_mc)[0]))

    labels = list(range(len(class_names))) + [UNKNOWN_IDX]
    names = class_names + ["UNKNOWN"]
    print("\nClassification Report:")
    print(classification_report(y_data, y_pred, labels=labels, target_names=names, zero_division=0))

if __name__ == "__main__":

    base_path = XYZ  # parent folder

    # Five separate class paths
    alstonia_path = os.path.join(base_path, "alstonia ")
    chinar_path   = os.path.join(base_path, "chinar")
    jatropha_path = os.path.join(base_path, "jatropha")
    mango_path    = os.path.join(base_path, "mango")
    pongamia_path  = os.path.join(base_path, "pongamia")  # ensure exact spelling

    # Combined list (useful for sanity checks or per-class ops if needed)
    folder_paths = [alstonia_path, chinar_path, jatropha_path, mango_path, pongamia_path]

    # Sanity print
    print("Folder check:")


    try:
        # Load everything from the parent (multi-class dataset)
        X, y, class_names = load_all_data(base_path)

        # Split for evaluation
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.20, random_state=42, stratify=y
        )

        # Train One-Class models strictly per class
        oc_models, oc_scalers = train_one_class_models(
            X_train, y_train, class_names, nu=0.1, gamma='auto'
        )

        # Train Multi-Class model over all classes together
        mc_svm, mc_scaler = train_multiclass_svm(X_train, y_train, kernel='rbf', gamma='auto')

        # Evaluate on the hold-out test set (UNKNOWN if OC gate rejects)
        evaluate_model(X_test, y_test, oc_models, oc_scalers, mc_svm, mc_scaler, class_names)

    except (FileNotFoundError, NotADirectoryError, RuntimeError) as e:
        print(f"\nAn error occurred: {e}")
    except Exception as e:
        print(f"\nAn unexpected error occurred: {e}")

Folder check:
Loading data...
  - .ipynb_checkpoints: 0 files
  - alstonia: 200 files
  - chinar: 200 files
  - jatropha: 200 files
  - mango: 200 files
  - pongamia: 200 files
Loaded 1000 samples from 6 classes.

Training One-Class SVMs (per class)...
  ! Skipping '.ipynb_checkpoints' (<2 samples)
  - alstonia: 160 samples
  - chinar: 160 samples
  - jatropha: 160 samples
  - mango: 160 samples
  - pongamia: 160 samples

Training Multi-Class SVM...
Multi-Class SVM trained.

Evaluating (OC gate -> Multi)...

Classification Report:
                    precision    recall  f1-score   support

.ipynb_checkpoints       0.00      0.00      0.00         0
          alstonia       0.47      0.23      0.31        40
            chinar       0.46      0.65      0.54        40
          jatropha       0.45      0.35      0.39        40
             mango       0.75      0.75      0.75        40
          pongamia       0.68      0.70      0.69        40
           UNKNOWN       0.00      0.00   