#Dependecies, Image and masks downloads


In [1]:
%pip install --quiet fiftyone pycocotools albumentations

Note: you may need to restart the kernel to use updated packages.


download & split 70 / 15 / 15 % with FiftyOne

In [2]:
import fiftyone as fo, random, os, warnings
warnings.filterwarnings("ignore")

CLASSES  = ["Banana", "Orange", "Strawberry"]
BUFFERS  = 500
DATA_RAW = "data_raw"
fo.config.dataset_zoo_dir = DATA_RAW

train_ds = fo.Dataset("fruit_train", overwrite=True)
val_ds   = fo.Dataset("fruit_val",   overwrite=True)
test_ds  = fo.Dataset("fruit_test",  overwrite=True)

def grab(cls):
    return fo.zoo.load_zoo_dataset(
        "open-images-v7",
        split="train",
        classes=[cls],
        label_types=["segmentations"],
        only_matching=True,
        max_samples=BUFFERS,
        dataset_name=f"{cls.lower()}_tmp",
    )

for cls in CLASSES:
    ds  = grab(cls)
    ids = ds.values("id"); random.shuffle(ids)
    # 70 / 15 / 15 %
    n = len(ids); t, v = int(0.70*n), int(0.85*n)
    train_ds.add_samples(ds.select(ids[:t]))
    val_ds  .add_samples(ds.select(ids[t:v]))
    test_ds .add_samples(ds.select(ids[v:]))

print("train counts:", train_ds.count_values("ground_truth.detections.label"))
print("val counts:", val_ds.count_values("ground_truth.detections.label"))
print("test counts:", test_ds.count_values("ground_truth.detections.label"))


Downloading split 'train' to 'data_raw/open-images-v7/train' if necessary
Necessary images already downloaded
Existing download of split 'train' is sufficient
Loading 'open-images-v7' split 'train'
 100% |█████████████████| 500/500 [10.2s elapsed, 0s remaining, 45.5 samples/s]      
Dataset 'banana_tmp' created
 100% |█████████████████| 350/350 [1.2s elapsed, 0s remaining, 298.9 samples/s]         
 100% |███████████████████| 75/75 [208.6ms elapsed, 0s remaining, 359.6 samples/s]    
 100% |███████████████████| 75/75 [242.4ms elapsed, 0s remaining, 309.4 samples/s]    
Downloading split 'train' to 'data_raw/open-images-v7/train' if necessary
Necessary images already downloaded
Existing download of split 'train' is sufficient
Loading 'open-images-v7' split 'train'
 100% |█████████████████| 500/500 [32.6s elapsed, 0s remaining, 18.9 samples/s]      
Dataset 'orange_tmp' created
 100% |█████████████████| 350/350 [1.8s elapsed, 0s remaining, 192.2 samples/s]         
 100% |███████████████

polygons conversion to semantic PNG masks

In [3]:
import fiftyone.utils.labels as fou, shutil

PROC_ROOT   = "seg_data_fruit"
MASK_TARGET = {i+1: c for i, c in enumerate(CLASSES)}

def export(ds, split):
    out = f"{PROC_ROOT}/{split}"
    img_dir, msk_dir = f"{out}/images", f"{out}/masks"
    os.makedirs(img_dir, exist_ok=True); os.makedirs(msk_dir, exist_ok=True)

    fou.objects_to_segmentations(
        ds,
        in_field="ground_truth",
        mask_targets=MASK_TARGET,
        out_field="sem_mask",
        output_dir=msk_dir,
        rel_dir=".",
        overwrite=True,
    )
    for s in ds:
        shutil.copy2(s.filepath, f"{img_dir}/{os.path.basename(s.filepath)}")

for name, fo_ds in zip(("train","val","test"), (train_ds, val_ds, test_ds)):
    export(fo_ds, name)

print("semantic masks + images written to", PROC_ROOT)

Computing metadata...
 100% |███████████████| 1050/1050 [245.6ms elapsed, 0s remaining, 4.3K samples/s]     
 100% |███████████████| 1050/1050 [16.7s elapsed, 0s remaining, 65.1 samples/s]      
Computing metadata...
 100% |█████████████████| 225/225 [37.8ms elapsed, 0s remaining, 5.9K samples/s] 
 100% |█████████████████| 225/225 [3.4s elapsed, 0s remaining, 64.8 samples/s]      
Computing metadata...
 100% |█████████████████| 225/225 [38.0ms elapsed, 0s remaining, 5.9K samples/s] 
 100% |█████████████████| 225/225 [3.5s elapsed, 0s remaining, 60.6 samples/s]      
semantic masks + images written to seg_data_fruit


Back-up to Google Drive

In [4]:
# from google.colab import drive
# import shutil, os
# drive.mount("/content/drive")

PROJ_DIR = "drive/GMM3"
os.makedirs(PROJ_DIR, exist_ok=True)
shutil.copytree(PROC_ROOT, f"{PROJ_DIR}/seg_data", dirs_exist_ok=True)

print("✓ dataset copied to", PROJ_DIR)

✓ dataset copied to drive/GMM3


Move downloaded masks and images to google drive

#Dataset & DataLoader

In [None]:
import glob, os, cv2, torch, numpy as np
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2

DATA_ROOT = "drive/GMM3/seg_data"
IMG_SIZE  = 256

class SegDataset(Dataset):
    def __init__(self, split="train", train=True):
        img_root  = f"{DATA_ROOT}/{split}/images"
        mask_root = f"{DATA_ROOT}/{split}/masks"
        img_paths = sorted(glob.glob(f"{img_root}/*.jpg"))

        mask_paths = glob.glob(f"{mask_root}/**/*.png", recursive=True)
        lookup = {os.path.splitext(os.path.basename(p))[0]: p for p in mask_paths}

        self.imgs, self.masks = [], []
        for im in img_paths:
            key = os.path.splitext(os.path.basename(im))[0]
            if key in lookup:
                self.imgs.append(im)
                self.masks.append(lookup[key])

        if not self.imgs:
            raise RuntimeError(f"No pairs found in split '{split}'")

        self.tfm = A.Compose([
            A.RandomResizedCrop(size=(IMG_SIZE, IMG_SIZE),
                                scale=(0.8, 1.0), ratio=(0.75, 1.33), p=1.0)
                if train else A.Resize(height=IMG_SIZE, width=IMG_SIZE),
            A.HorizontalFlip(p=0.5) if train else A.NoOp(),
            ToTensorV2(),
        ])

    def __len__(self): return len(self.imgs)

    def __getitem__(self, idx):
        x = cv2.cvtColor(cv2.imread(self.imgs[idx]), cv2.COLOR_BGR2RGB)
        y = cv2.imread(self.masks[idx], cv2.IMREAD_UNCHANGED)
        t = self.tfm(image=x, mask=y)
        return t["image"]/255.0, t["mask"].long()

# -------------- loaders --------------------------------------------------
train_set = SegDataset("train", train=True)
val_set   = SegDataset("val",   train=False)
test_set  = SegDataset("test",  train=False)

print(f"train {len(train_set)} | val {len(val_set)} | test {len(test_set)}")

train_loader = DataLoader(train_set, batch_size=8, shuffle=True,  num_workers=2)
val_loader   = DataLoader(val_set,   batch_size=4, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_set,  batch_size=4, shuffle=False, num_workers=2)

# sample shape check
x, y = train_set[0]
print("sample shapes:", x.shape, y.shape)


ModuleNotFoundError: No module named 'google.colab'

#Model

In [None]:
import torch, torch.nn as nn, torch.nn.functional as F

class Block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_c, out_c, 3, padding=1),
            nn.BatchNorm2d(out_c), nn.ReLU(inplace=True),
            nn.Conv2d(out_c, out_c, 3, padding=1),
            nn.BatchNorm2d(out_c), nn.ReLU(inplace=True),
        )
    def forward(self, x): return self.conv(x)

class TinyUNet(nn.Module):
    def __init__(self, n_classes=4, base=32):
        super().__init__()
        self.e1 = Block(3,      base)
        self.e2 = Block(base,   base*2)
        self.e3 = Block(base*2, base*4)
        self.pool = nn.MaxPool2d(2)
        self.mid = Block(base*4, base*8)
        self.u2  = nn.ConvTranspose2d(base*8, base*4, 2, 2)
        self.d2  = Block(base*8, base*4)
        self.u1  = nn.ConvTranspose2d(base*4, base*2, 2, 2)
        self.d1  = Block(base*4, base*2)
        self.u0  = nn.ConvTranspose2d(base*2, base,   2, 2)
        self.d0  = Block(base*2, base)
        self.head = nn.Conv2d(base, n_classes, 1)

    def forward(self, x):
        e1 = self.e1(x)
        e2 = self.e2(self.pool(e1))
        e3 = self.e3(self.pool(e2))
        m  = self.mid(self.pool(e3))
        d2 = self.d2(torch.cat([self.u2(m), e3], 1))
        d1 = self.d1(torch.cat([self.u1(d2), e2], 1))
        d0 = self.d0(torch.cat([self.u0(d1), e1], 1))
        return self.head(d0)

n_classes = 4   # 0-bg + 1-Banana + 2-Orange + 3-Strawberry
device = "cuda" if torch.cuda.is_available() else "cpu"
model  = TinyUNet(n_classes).to(device)


#Training loop

In [None]:
import torch.nn.functional as F, torch.optim as optim

def dice_loss(pred, targ, eps=1e-6):
    pred = F.softmax(pred, dim=1)
    oh = F.one_hot(targ, n_classes).permute(0,3,1,2).float()
    inter = (pred*oh).sum((2,3)); union = pred.sum((2,3))+oh.sum((2,3))
    return 1 - ((2*inter+eps)/(union+eps)).mean()

def loss_fn(p,t): return 0.5*F.cross_entropy(p,t)+0.5*dice_loss(p,t)

opt = optim.AdamW(model.parameters(), lr=3e-4)
train_loss_curve, val_loss_curve = [], []

for epoch in range(10):
    # ---- train ----
    model.train(); running = 0
    for x,y in train_loader:
        x,y = x.to(device), y.to(device)
        opt.zero_grad(); loss = loss_fn(model(x),y)
        loss.backward(); opt.step(); running += loss.item()
    tl = running/len(train_loader); train_loss_curve.append(tl)

    # ---- val ----
    model.eval(); running = 0
    with torch.no_grad():
        for x,y in val_loader:
            running += loss_fn(model(x.to(device)), y.to(device)).item()
    vl = running/len(val_loader); val_loss_curve.append(vl)

    print(f"ep {epoch:02d}  train {tl:.3f}  val {vl:.3f}")

ep 00  train 0.824  val 0.718
ep 01  train 0.670  val 0.638


KeyboardInterrupt: 

In [None]:
# ── Cell: plot loss curves -----------------------------------------------
import matplotlib.pyplot as plt

epochs = range(1, len(train_loss_curve) + 1)

plt.figure(figsize=(7,4))
plt.plot(epochs, train_loss_curve, label="Train loss")
plt.plot(epochs, val_loss_curve, label="Val loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Tiny-U-Net training curves")
plt.legend()
plt.grid(True)
plt.show()

#Testing and Eval

In [None]:
from sklearn.metrics import f1_score
import numpy as np, torch

@torch.no_grad()
def evaluate():
    model.eval()
    inter = torch.zeros(n_classes, device=device)
    union = torch.zeros(n_classes, device=device)
    preds, gts = [], []
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)
        p = model(x).argmax(1)
        for c in range(n_classes):
            pc = (p==c); yc = (y==c)
            inter[c] += (pc & yc).sum()
            union[c] += pc.sum() + yc.sum()
        preds += p.view(-1).cpu().tolist()
        gts   += y.view(-1).cpu().tolist()
    dice = (2*inter / union.clamp(min=1)).cpu().numpy()
    macro = f1_score(gts, preds, average="macro", labels=range(1,n_classes))
    micro = f1_score(gts, preds, average="micro", labels=range(1,n_classes))
    return dice, macro, micro

Dice: {'bg': np.float32(0.895), 'Cat': np.float32(0.625), 'Car': np.float32(0.277), 'Person': np.float32(0.0)}
Macro-F1 0.300 | Micro-F1 0.427


In [None]:
fruit_labels = ["bg","Banana","Orange","Strawberry"]

dice, macroF1, microF1 = evaluate()
print("Dice:", dict(zip(fruit_labels, dice.round(3))))
print(f"Macro-F1 {macroF1:.3f} | Micro-F1 {microF1:.3f}")