In [1]:
%pip install segmentation-models-pytorch
%pip install albumentations==0.4.6

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting numpy>=1.11.1 (from albumentations==0.4.6)
  Using cached numpy-2.2.6-cp311-cp311-win_amd64.whl.metadata (60 kB)
Using cached numpy-2.2.6-cp311-cp311-win_amd64.whl (12.9 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.4
    Uninstalling numpy-1.26.4:
      Successfully uninstalled numpy-1.26.4
Successfully installed numpy-2.2.6
Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gensim 4.3.3 requires numpy<2.0,>=1.18.5, but you have numpy 2.2.6 which is incompatible.

[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
%pip install "numpy<2.0"

Collecting numpy<2.0
  Using cached numpy-1.26.4-cp311-cp311-win_amd64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp311-cp311-win_amd64.whl (15.8 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.2.6
    Uninstalling numpy-2.2.6:
      Successfully uninstalled numpy-2.2.6
Successfully installed numpy-1.26.4
Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.

[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [38]:
import os
from pathlib import Path
from tqdm import tqdm
import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import albumentations as A
from albumentations.pytorch import ToTensorV2
import segmentation_models_pytorch as smp
from sklearn.model_selection import train_test_split
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [39]:
class cropDataset(Dataset):
  def __init__(self, img_dir, mask_dir, files_list, transform=None):
    self.img_dir = Path(img_dir)
    self.mask_dir = Path(mask_dir)
    self.files = files_list
    self.transform = transform

  def __len__(self):
    return len(self.files)

  def __getitem__(self, idx):
      name = self.files[idx]
      img_path = os.path.join(self.img_dir, name)
      mask_path = os.path.join(self.mask_dir, name)

      img = cv2.imread(img_path)
      img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

      mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
      mask = (mask > 0).astype(np.float32)

      if self.transform:
          augmented = self.transform(image=img, mask=mask)
          img = augmented['image']     # already Tensor [3, H, W]
          mask = augmented['mask']     # already Tensor [H, W]
          mask = mask.unsqueeze(0)     # make it [1, H, W]

      else:
          img = torch.from_numpy(img.transpose(2, 0, 1)).float()
          mask = torch.from_numpy(mask).unsqueeze(0).float()

      return img, mask


In [40]:
def get_files_with_masks(images_dir, masks_dir):
  images = set([f for f in os.listdir(images_dir) if f.endswith('.png')])
  masks = set([f for f in os.listdir(masks_dir) if f.endswith('.png')])
  return sorted(list(images & masks))

In [41]:
def compute_iou(pred, target, num_classes):
  ious = []
  for cls in range(num_classes):
    pred_inds = pred == cls # boolean mask which is same shape as image for the predicted mask
    target_inds = target == cls
    intersection = (pred_inds & target_inds).sum() # .sum counts how many are true at the same place
    union = (pred_inds | target_inds).sum()

    if union == 0:
      iou = float('nan')
    else:
      iou = intersection/union # percentage overlap

    ious.append(iou)
    valid_ious = [x for x in ious if str(x) != 'nan']
    if len(valid_ious) == 0:
      miou = 0.0
    else:
      miou = np.mean(valid_ious)

    return miou, ious

In [42]:
def visualize(img_tensor, mask_gt, mask_pred, out_path, class_colors=None):
    # Convert image tensor to numpy
    if isinstance(img_tensor, torch.Tensor):  # if its a tensor
        img = img_tensor.detach().cpu().permute(1, 2, 0).numpy()
    else:
        img = img_tensor

    # Normalize & convert to uint8
    img = np.clip(img, 0, 1)
    img = (img * 255).astype(np.uint8)

    # Convert masks to numpy
    if isinstance(mask_gt, torch.Tensor):
        mask_gt = mask_gt.detach().cpu().numpy()
    if isinstance(mask_pred, torch.Tensor):
        mask_pred = mask_pred.detach().cpu().numpy()

    # Fix mask shapes
    mask_gt = np.squeeze(mask_gt)
    mask_pred = np.squeeze(mask_pred)

    # Threshold for binary mask
    mask_pred = (mask_pred > 0.5).astype(np.uint8)
    mask_gt = mask_gt.astype(np.uint8)

    # Prepare overlay
    overlay = img.copy()

    # Define colors (binary)
    if class_colors is None:
        class_colors = {
            0: (0, 0, 0),      # background - black
            1: (0, 255, 0),    # weeds - green
        }

    # Overlay prediction
    for cls, color in class_colors.items():
        mask = (mask_pred == cls)

        # Make sure mask is 2D and boolean
        mask = np.squeeze(mask).astype(bool)

        if mask.sum() == 0:
            continue

        # Blend with transparency
        overlay[mask] = (overlay[mask] * 0.5 + np.array(color) * 0.5).astype(np.uint8)

    # Plot and save
    fig, axs = plt.subplots(1, 3, figsize=(12, 4))
    axs[0].imshow(img)
    axs[0].set_title("Image")
    axs[0].axis("off")

    axs[1].imshow(mask_gt, cmap="gray")
    axs[1].set_title("Ground Truth")
    axs[1].axis("off")

    axs[2].imshow(overlay)
    axs[2].set_title("Prediction Overlay")
    axs[2].axis("off")

    plt.tight_layout()
    plt.savefig(out_path)
    plt.close()


In [43]:
def get_pos_weight(loader):
    total_pos, total_neg = 0, 0
    for _, masks in loader:
        total_pos += masks.sum().item()
        total_neg += (1 - masks).sum().item()
    pos_weight = total_neg / (total_pos + 1e-6)
    print(f"Dynamic pos_weight = {pos_weight:.2f}")
    return torch.tensor([pos_weight])


In [44]:
def train(model, loader, optimizer, loss_fn, device, scaler=None):
    model.train()
    running_loss = 0.0

    for imgs, masks in tqdm(loader, desc='Train', leave=False):
        imgs = imgs.to(device)
        masks = masks.to(device)

        with torch.cuda.amp.autocast(enabled=(scaler is not None)):
            preds = model(imgs)
            loss = loss_fn(preds, masks)

        optimizer.zero_grad()

        if scaler is not None:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        running_loss += loss.item()


    return running_loss / len(loader)


In [None]:
import numpy as np
import torch
from tqdm import tqdm
import torch.nn.functional as F
import os

@torch.no_grad()
def eval_with_class_iou(model, loader, loss_fn, device, num_classes, save_preds_dir=None, sample_limit=5):
    model.eval()
    val_loss = 0.0

    # Accumulators for "global" IoU across the whole dataset
    # intersection[c] = total pixels where (pred==c and gt==c)
    # union[c]        = total pixels where (pred==c or  gt==c)
    intersection = np.zeros(num_classes, dtype=np.int64)
    union        = np.zeros(num_classes, dtype=np.int64)
    support      = np.zeros(num_classes, dtype=np.int64)  # GT pixel count per class (optional but useful)

    saved = 0

    for imgs, masks in tqdm(loader, desc='Eval', leave=False):
        imgs  = imgs.to(device)
        masks = masks.to(device)  # [B, H, W] (Long)

        preds = model(imgs)       # [B, C, H, W] (logits)
        loss  = loss_fn(preds, masks)
        val_loss += loss.item()

        probs = torch.sigmoid(preds)
        pred_cls = (probs > 0.5).long()

        # Move to numpy
        p = pred_cls.cpu().numpy()
        g = masks.cpu().numpy()

        # Aggregate intersections/unions **per class** over the batch
        # This computes "global IoU" (more stable than averaging per-image IoUs)
        for c in range(num_classes):
            pc = (p == c)
            gc = (g == c)
            inter = np.logical_and(pc, gc).sum()
            uni   = np.logical_or(pc, gc).sum()

            intersection[c] += inter
            union[c]        += uni
            support[c]      += gc.sum()

        # (Optional) save a few visualizations
        if save_preds_dir is not None and saved < sample_limit:
            for i in range(min(p.shape[0], sample_limit - saved)):
                img_np = imgs[i].cpu().permute(1, 2, 0).numpy()
                img_np = (img_np - img_np.min()) / (img_np.max() - img_np.min() + 1e-9)
                out_path = os.path.join(str(save_preds_dir), f'eval_{saved}.png')
                visualize(img_np, g[i], p[i], out_path)  # your visualize() handles numpy inputs
                saved += 1
                if saved >= sample_limit:
                    break

    # Compute mean loss
    avg_loss = val_loss / max(1, len(loader))

    # Per-class IoU (ignore classes with union == 0)
    per_class_iou = []
    for c in range(num_classes):
        if union[c] == 0:
            per_class_iou.append(float('nan'))
        else:
            per_class_iou.append(float(intersection[c]) / float(union[c]))

    # mIoU over classes that actually appear
    valid = [x for x in per_class_iou if not np.isnan(x)]
    avg_miou = float(np.mean(valid)) if len(valid) else 0.0

    return avg_loss, avg_miou, per_class_iou, support


In [51]:
def main():
    img_size = 512
    batch_size = 2
    num_classes = 2
    epochs = 60
    lr = 1e-4
    num_workers = 0
    use_amp = True

    images_dir = 'CoFly-WeedDB/CoFly-WeedDB/images'
    masks_dir = 'CoFly-WeedDB/CoFly-WeedDB/labels'
    os.makedirs('output', exist_ok=True)
    preds_dir = Path('output') / 'preds'
    preds_dir.mkdir(parents=True, exist_ok=True)

    files = get_files_with_masks(images_dir, masks_dir)
    print("Files found:", files)
    train_files, val_files = train_test_split(files, test_size=0.2, random_state=42)
    print(f"Found {len(files)} labeled images. Train: {len(train_files)}, Val: {len(val_files)}")

    train_transform = A.Compose([
        A.Resize(img_size, img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.RandomBrightnessContrast(p=0.4),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    val_transform = A.Compose([
        A.Resize(img_size, img_size),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    

    # Datasets
    train_ds = cropDataset(images_dir, masks_dir, train_files, train_transform)
    val_ds = cropDataset(images_dir, masks_dir, val_files, val_transform)

    # DataLoaders
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers)


    
    
    model = smp.Unet(
        encoder_name='resnet34',
        encoder_weights='imagenet',
        in_channels=3,
        classes=1
    )

    dice = smp.losses.DiceLoss(mode='binary')
    focal = smp.losses.FocalLoss(mode='binary')

    def loss_fn(preds, targets):
        bce = F.binary_cross_entropy_with_logits(preds, targets, pos_weight=pos_weight)
        
        # Dice loss (foreground overlap)
        probs = torch.sigmoid(preds)
        smooth = 1e-6
        intersection = (probs * targets).sum()
        dice = 1 - (2. * intersection + smooth) / (probs.sum() + targets.sum() + smooth)
        
        return 0.5 * bce + 0.5 * dice



    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', factor=0.5, patience=3
    )
    scaler = torch.cuda.amp.GradScaler(enabled=use_amp and torch.cuda.is_available())

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    pos_weight = get_pos_weight(train_loader).to(device)

    best_miou = -1.0
    for epoch in range(1, epochs + 1):
        print(f"\nEpoch {epoch}/{epochs}")
        train_loss = train(model, train_loader, optimizer, loss_fn, device,
                           scaler=scaler if use_amp else None)
        val_loss, val_miou, per_cls_iou, support = eval_with_class_iou(
        model, val_loader, loss_fn, device, num_classes,
        save_preds_dir=preds_dir, sample_limit=5
    )

        print(f"Train loss: {train_loss:.4f} | Val loss: {val_loss:.4f} | Val mIoU: {val_miou:.4f}")
        print("Per-class IoU:")
        for c, (iou, sup) in enumerate(zip(per_cls_iou, support)):
            sup_pct = 100.0 * sup / max(1, support.sum())
            
            print(f"  Class {c}: IoU={0.0 if np.isnan(iou) else iou:.4f}  (support={sup} px, {sup_pct:.2f}% of GT)")

        scheduler.step(val_miou)

        if val_miou > best_miou:
            best_miou = val_miou
            ckpt_path = os.path.join('output', 'best_model.pth')
            torch.save({
                'epoch': epoch,
                'model_state': model.state_dict(),
                'optimizer_state': optimizer.state_dict(),
                'miou': best_miou
            }, ckpt_path)
            print(f"Saved best model (mIoU={best_miou:.4f}) to {ckpt_path}")


In [None]:
main()

Files found: ['ID_00048_UAV_dji.phantom.4.pro.hawk.1_[Lat=39.54212427861807,Lon=22.64442951302024,Alt=4.900000095367432]_DATE_03_07_2019_14_38_56.png', 'ID_00049_UAV_dji.phantom.4.pro.hawk.1_[Lat=39.54212238368247,Lon=22.644427100249906,Alt=4.900000095367432]_DATE_03_07_2019_14_38_57.png', 'ID_00050_UAV_dji.phantom.4.pro.hawk.1_[Lat=39.54212050531792,Lon=22.644424707209755,Alt=4.900000095367432]_DATE_03_07_2019_14_38_58.png', 'ID_00051_UAV_dji.phantom.4.pro.hawk.1_[Lat=39.54211671867755,Lon=22.64441993246098,Alt=4.900000095367432]_DATE_03_07_2019_14_38_59.png', 'ID_00052_UAV_dji.phantom.4.pro.hawk.1_[Lat=39.54211477371615,Lon=22.644417506003943,Alt=4.900000095367432]_DATE_03_07_2019_14_38_59.png', 'ID_00053_UAV_dji.phantom.4.pro.hawk.1_[Lat=39.542112844908914,Lon=22.64441509154499,Alt=4.900000095367432]_DATE_03_07_2019_14_39_00.png', 'ID_00054_UAV_dji.phantom.4.pro.hawk.1_[Lat=39.54211093392337,Lon=22.644412730677484,Alt=4.900000095367432]_DATE_03_07_2019_14_39_00.png', 'ID_00055_UAV_d

  scaler = torch.cuda.amp.GradScaler(enabled=use_amp and torch.cuda.is_available())


Dynamic pos_weight = 18.02

Epoch 1/60


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                      

Train loss: 0.9890 | Val loss: 0.8570 | Val mIoU: 0.4535
Per-class IoU:
  Class 0: IoU=0.7814  (support=10312372 px, 95.95% of GT)
  Class 1: IoU=0.1256  (support=435532 px, 4.05% of GT)
Saved best model (mIoU=0.4535) to output\best_model.pth

Epoch 2/60


                                                      

Train loss: 0.8088 | Val loss: 0.7645 | Val mIoU: 0.4482
Per-class IoU:
  Class 0: IoU=0.7607  (support=10312372 px, 95.95% of GT)
  Class 1: IoU=0.1357  (support=435532 px, 4.05% of GT)

Epoch 3/60


                                                      

Train loss: 0.7271 | Val loss: 0.7357 | Val mIoU: 0.5331
Per-class IoU:
  Class 0: IoU=0.8668  (support=10312372 px, 95.95% of GT)
  Class 1: IoU=0.1995  (support=435532 px, 4.05% of GT)
Saved best model (mIoU=0.5331) to output\best_model.pth

Epoch 4/60


                                                      

Train loss: 0.6774 | Val loss: 0.6982 | Val mIoU: 0.6134
Per-class IoU:
  Class 0: IoU=0.9298  (support=10312372 px, 95.95% of GT)
  Class 1: IoU=0.2969  (support=435532 px, 4.05% of GT)
Saved best model (mIoU=0.6134) to output\best_model.pth

Epoch 5/60


                                                      

Train loss: 0.6306 | Val loss: 0.7455 | Val mIoU: 0.6312
Per-class IoU:
  Class 0: IoU=0.9455  (support=10312372 px, 95.95% of GT)
  Class 1: IoU=0.3170  (support=435532 px, 4.05% of GT)
Saved best model (mIoU=0.6312) to output\best_model.pth

Epoch 6/60


                                                      

Train loss: 0.6217 | Val loss: 0.7233 | Val mIoU: 0.6449
Per-class IoU:
  Class 0: IoU=0.9490  (support=10312372 px, 95.95% of GT)
  Class 1: IoU=0.3408  (support=435532 px, 4.05% of GT)
Saved best model (mIoU=0.6449) to output\best_model.pth

Epoch 7/60


                                                      

Train loss: 0.6004 | Val loss: 0.6895 | Val mIoU: 0.5160
Per-class IoU:
  Class 0: IoU=0.8455  (support=10312372 px, 95.95% of GT)
  Class 1: IoU=0.1866  (support=435532 px, 4.05% of GT)

Epoch 8/60


                                                      

Train loss: 0.5965 | Val loss: 0.6235 | Val mIoU: 0.6068
Per-class IoU:
  Class 0: IoU=0.9211  (support=10312372 px, 95.95% of GT)
  Class 1: IoU=0.2924  (support=435532 px, 4.05% of GT)

Epoch 9/60


Train:  74%|███████▍  | 59/80 [00:12<00:04,  4.82it/s]