<a href="https://colab.research.google.com/github/3791xk/invasive-plants-ml/blob/main/Another_copy_of_LeafDamageConvNext_with_crop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

80-10-10 split

Load Dataset for hawaii invasive species dataset

In [None]:
import os
import time
from tempfile import TemporaryDirectory
from PIL import Image
import pandas as pd
from datasets import load_dataset
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#pytorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import accuracy_score, confusion_matrix
import random

Yolo Set up, crop images preprocessing

In [None]:
!pip install ultralytics huggingface_hub opencv-python -q

In [None]:
# yolo setup
USE_YOLO = True
YOLO_CONF_THRESHOLD = 0.55
YOLO_MIN_GREEN = 0.0
YOLO_PAD_RATIO = 0.05

if USE_YOLO:
    import cv2
    from ultralytics import YOLO
    from huggingface_hub import hf_hub_download

    print("\n[YOLO] Downloading model from Hugging Face...")

    try:
        # Download the model
        model_path = hf_hub_download(
            repo_id="pedromiguelsanchez/yolo-plant-leaf-detection",
            filename="yolo11x_leaf.pt"
        )
        print(f"[YOLO] Model downloaded to: {model_path}")

        yolo_model = YOLO(model_path)

        # cache: path string -> (x1p, y1p, x2p, y2p)
        crop_cache = {}

        def green_fraction(pil_img: Image.Image) -> float:
            """Compute fraction of 'green' pixels in a PIL RGB image using HSV thresholding."""
            arr = np.array(pil_img.convert("RGB"))
            hsv = cv2.cvtColor(arr, cv2.COLOR_RGB2HSV)

            lower_green = np.array([35, 40, 40])
            upper_green = np.array([85, 255, 255])

            mask = cv2.inRange(hsv, lower_green, upper_green)
            if mask.size == 0:
                return 0.0

            green_pixels = mask > 0
            return float(green_pixels.mean())

        def crop_with_yolo_most_green(pil_img: Image.Image, cache_key: str | None = None) -> Image.Image:
            """Run YOLO once per image (cached by path), crop to most-green box, else return original."""
            if cache_key is not None and cache_key in crop_cache:
                x1p, y1p, x2p, y2p = crop_cache[cache_key]
                return pil_img.crop((x1p, y1p, x2p, y2p))

            w, h = pil_img.size
            results = yolo_model(pil_img, verbose=False)
            if len(results) == 0 or results[0].boxes is None or len(results[0].boxes) == 0:
                return pil_img

            boxes = results[0].boxes
            best_crop = None
            best_green = -1.0
            best_coords = None

            for box in boxes:
                xyxy = box.xyxy[0].cpu().numpy()
                conf = float(box.conf.cpu().item())
                if conf < YOLO_CONF_THRESHOLD:
                    continue

                x1, y1, x2, y2 = map(int, xyxy)
                bw = x2 - x1
                bh = y2 - y1
                pad_w = int(bw * YOLO_PAD_RATIO)
                pad_h = int(bh * YOLO_PAD_RATIO)

                x1p = max(0, x1 - pad_w)
                y1p = max(0, y1 - pad_h)
                x2p = min(w, x2 + pad_w)
                y2p = min(h, y2 + pad_h)

                crop = pil_img.crop((x1p, y1p, x2p, y2p))
                gf = green_fraction(crop)

                if gf >= YOLO_MIN_GREEN and gf > best_green:
                    best_green = gf
                    best_crop = crop
                    best_coords = (x1p, y1p, x2p, y2p)

            if best_crop is None:
                return pil_img

            if cache_key is not None:
                crop_cache[cache_key] = best_coords
            return best_crop

        print("[YOLO] Model loaded successfully!")

    except Exception as e:
        print(f"[YOLO] Error loading model: {e}")
        print("[YOLO] Disabling YOLO, using original images")
        USE_YOLO = False
        def crop_with_yolo_most_green(pil_img: Image.Image, cache_key: str | None = None) -> Image.Image:
            """Identity function when YOLO is disabled."""
            return pil_img

else:
    def crop_with_yolo_most_green(pil_img: Image.Image, cache_key: str | None = None) -> Image.Image:
        """Identity function when YOLO is disabled."""
        return pil_img

print("USE_YOLO =", USE_YOLO)

In [None]:
from datasets import load_dataset

# Login using e.g. `huggingface-cli login` to access this dataset
ds = load_dataset("imageomics/invasive_plants_hawaii", name="full", split="both")

In [None]:
cols = ['healthy', 'leaf_miner', 'rust', 'other_insect', 'mechanical_damage']

# load dataset into pandas df
leaf_df = ds.to_pandas()
leaf_df.replace('nan', np.nan, inplace=True)

# Create a version with 0 and 1 for "No" and "Yes"
bool_df = leaf_df.replace(to_replace = ['Yes','No', 'Maybe'],value = ['1','0','0.5'])
bool_df.head()

ConvNext

make list of images and their labels and split into train/validation/test (80/10/10)

In [None]:
data = []
for idx, row in bool_df.iterrows():
    # Make sure labels are numbers, not strings
    labels = []
    for col in cols:
        val = row[col]
        if pd.isna(val):
            labels.append(0.0)
        else:
            labels.append(float(val))

    data.append({'image': row['image'], 'labels': labels})

#shuffle images
random.seed(43)
random.shuffle(data)

# Then split
n = len(data)
train_data = data[:int(0.8*n)]
val_data = data[int(0.8*n):int(0.9*n)]
test_data = data[int(0.9*n):]

Custom Dataset Class for pytorch

In [None]:
class PlantDataset(Dataset):
    def __init__(self, data, transform):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        img = item['image']

        # If it's a dict with bytes, load from bytes
        if isinstance(img, dict) and 'bytes' in img:
            from io import BytesIO
            img = Image.open(BytesIO(img['bytes']))
        elif not isinstance(img, Image.Image):
            img = Image.fromarray(img)

        img = img.convert('RGB')
        img = crop_with_yolo_most_green(img, cache_key=str(idx))
        labels = torch.tensor(item['labels'], dtype=torch.float32)
        return self.transform(img), labels

dataloaders and setup of model

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
#dataloaders
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


train_loader = DataLoader(PlantDataset(train_data, transform), batch_size=32, shuffle=True)
val_loader = DataLoader(PlantDataset(val_data, transform), batch_size=32)
test_loader = DataLoader(PlantDataset(test_data, transform), batch_size=32)

#setup Model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = models.convnext_tiny(pretrained=True)
model.classifier[2] = nn.Linear(model.classifier[2].in_features, len(cols))
model = model.to(device)

lossFn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001)


Train convnext

In [None]:
passThroughs = 10
history = {'train': [], 'val': []}

for pass_num in range(passThroughs):
    print(f"\n{'='*50}")
    print(f"Pass {pass_num+1}/{passThroughs}")
    print(f"{'='*50}")

    # Train
    model.train()
    train_loss = 0
    print("Training...")
    for batch_idx, (imgs, labels) in enumerate(train_loader):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = lossFn(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

        # Print every 10 batches
        if (batch_idx + 1) % 10 == 0:
            print(f"  Batch {batch_idx+1}/{len(train_loader)} - Loss: {loss.item():.4f}")

    # Validate
    model.eval()
    val_loss = 0
    print("Validating...")
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            val_loss += lossFn(outputs, labels).item()

    train_loss /= len(train_loader)
    val_loss /= len(val_loader)
    history['train'].append(train_loss)
    history['val'].append(val_loss)

    print(f"  Pass {pass_num+1} Complete:")
    print(f"  Train Loss: {train_loss:.4f}")
    print(f"  Val Loss: {val_loss:.4f}")

print("\n Training finished")

Testing and Results

In [None]:
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for imgs, labels in test_loader:
        imgs = imgs.to(device)
        outputs = model(imgs)
        preds = (torch.sigmoid(outputs) > 0.5).cpu().numpy()
        all_preds.append(preds)
        all_labels.append(labels.numpy())

all_preds = np.vstack(all_preds)
all_labels = np.vstack(all_labels)

print("\nTest Accuracy:")
for i, name in enumerate(cols):
    acc = (all_preds[:, i] == all_labels[:, i]).mean()
    print(f"{name}: {acc:.2%}")

# Plot
plt.plot(history['train'], label='Train')
plt.plot(history['val'], label='Val')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.savefig('loss.png')
plt.show()


In [None]:
from sklearn.metrics import f1_score

model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for imgs, labels in test_loader:
        imgs = imgs.to(device)
        outputs = model(imgs)
        preds = (torch.sigmoid(outputs) > 0.35).cpu().numpy()
        all_preds.append(preds)
        all_labels.append(labels.numpy())

all_preds = np.vstack(all_preds)
all_labels = np.vstack(all_labels)

print("\nTest Results:")
print(f"{'Class':<20} {'Accuracy':<12} {'F1 Score':<12}")
print("="*44)

f1_scores = []
for i, name in enumerate(cols):
    # Remove samples where label is 0.5 (Maybe)
    mask = all_labels[:, i] != 0.5
    labels_filtered = all_labels[mask, i]
    preds_filtered = all_preds[mask, i]

    if len(labels_filtered) > 0:
        acc = (preds_filtered == labels_filtered).mean()
        f1 = f1_score(labels_filtered, preds_filtered, average='binary')
        f1_scores.append(f1)
        print(f"{name:<20} {acc:.2%}          {f1:.4f}")
    else:
        print(f"{name:<20} No clear labels")

# Overall F1
overall_f1 = np.mean(f1_scores)
print("="*44)
print(f"{'Overall F1 (macro)':<20} {overall_f1:.4f}")

# Plot
plt.plot(history['train'], label='Train')
plt.plot(history['val'], label='Val')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.savefig('loss.png')
plt.show()

print("\nDone!")

In [None]:
from sklearn.metrics import roc_auc_score
roc_aucs = {}

for i, name in enumerate(cols):
    # Convert labels to int
    labels_int = all_labels[:, i].astype(int)
    preds_prob = all_preds[:, i]

    # Skip if only one class present
    if len(np.unique(labels_int)) < 2:
        continue

    auc = roc_auc_score(labels_int, preds_prob)
    roc_aucs[name] = auc
    print(f"{name}: ROC-AUC = {auc:.4f}")

# Macro ROC-AUC
if len(roc_aucs) > 0:
    macro_auc = np.mean(list(roc_aucs.values()))
    print(f"Macro ROC-AUC (valid classes only): {macro_auc:.4f}")
else:
    print("No valid classes to compute ROC-AUC")
