## Import Libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision import models
from torchvision.models.vgg import VGG
from PIL import Image
import numpy as np
import os
from tqdm import tqdm

## Dataset Class

In [2]:
# VOC2012 Dataset Class
class VOCSegmentation(Dataset):
    def __init__(self, root, split='train', transform=None, target_transform=None):
        """
        Args:
            root: /kaggle/input/pascal-voc-2012-dataset/VOC2012_train_val/VOC2012_train_val
            split: 'train', 'val', or 'trainval'
            transform: Transform for input images
            target_transform: Transform for segmentation masks
        """
        self.root = root
        self.split = split
        self.transform = transform
        self.target_transform = target_transform
        
        # Read image list
        split_file = os.path.join(root, 'ImageSets', 'Segmentation', f'{split}.txt')
        with open(split_file, 'r') as f:
            self.images = [x.strip() for x in f.readlines()]
        
        self.img_dir = os.path.join(root, 'JPEGImages')
        self.mask_dir = os.path.join(root, 'SegmentationClass')
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_name = self.images[idx]
        
        # Load image
        img_path = os.path.join(self.img_dir, f'{img_name}.jpg')
        image = Image.open(img_path).convert('RGB')
        
        # Load mask
        mask_path = os.path.join(self.mask_dir, f'{img_name}.png')
        mask = Image.open(mask_path)
        
        if self.transform:
            image = self.transform(image)
        
        if self.target_transform:
            mask = self.target_transform(mask)
        else:
            mask = torch.from_numpy(np.array(mask)).long()
        
        return image, mask

In [3]:
ROOT = "/kaggle/input/pascal-voc-2012-dataset/VOC2012_train_val/VOC2012_train_val"

train_ds = VOCSegmentation(root=ROOT, split="train")
val_ds = VOCSegmentation(root=ROOT, split="val")
trainval_ds = VOCSegmentation(root=ROOT, split="trainval")

print("Train:", len(train_ds))
print("Val:", len(val_ds))
print("Train+Val:", len(trainval_ds))


'/kaggle/input/pascal-voc-2012-dataset/VOC2012_train_val/VOC2012_train_val'

## VGGNet with intermediate outputs

In [4]:
class VGGNet(VGG):
    def __init__(self, pretrained=True, model='vgg16', requires_grad=True, remove_fc=True):
        super().__init__(make_layers(cfg[model]))
        self.ranges = ranges[model]

        if pretrained:
            exec("self.load_state_dict(models.%s(pretrained=True).state_dict())" % model)

        if not requires_grad:
            for param in super().parameters():
                param.requires_grad = False

        if remove_fc:
            del self.classifier

    def forward(self, x):
        output = {}
        
        # Get output after each maxpool layer
        for idx in range(len(self.ranges)):
            for layer in range(self.ranges[idx][0], self.ranges[idx][1]):
                x = self.features[layer](x)
            output["x%d" % (idx + 1)] = x

        return output

In [5]:
# different version of vggnet
ranges = {
    'vgg11': ((0, 3), (3, 6),  (6, 11),  (11, 16), (16, 21)),
    'vgg13': ((0, 5), (5, 10), (10, 15), (15, 20), (20, 25)),
    'vgg16': ((0, 5), (5, 10), (10, 17), (17, 24), (24, 31)),
    'vgg19': ((0, 5), (5, 10), (10, 19), (19, 28), (28, 37))
}

cfg = {
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


In [6]:
def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

## FCN Architecture

In [7]:
# FCN32s - No skip connections
class FCN32s(nn.Module):
    def __init__(self, pretrained_net, n_class):
        super().__init__()
        self.n_class = n_class
        self.pretrained_net = pretrained_net
        self.relu = nn.ReLU(inplace=True)
        self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn1 = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn2 = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn5 = nn.BatchNorm2d(32)
        self.classifier = nn.Conv2d(32, n_class, kernel_size=1)

    def forward(self, x):
        output = self.pretrained_net(x)
        x5 = output['x5']  # size=(N, 512, x.H/32, x.W/32)

        score = self.bn1(self.relu(self.deconv1(x5)))     # size=(N, 512, x.H/16, x.W/16)
        score = self.bn2(self.relu(self.deconv2(score)))  # size=(N, 256, x.H/8, x.W/8)
        score = self.bn3(self.relu(self.deconv3(score)))  # size=(N, 128, x.H/4, x.W/4)
        score = self.bn4(self.relu(self.deconv4(score)))  # size=(N, 64, x.H/2, x.W/2)
        score = self.bn5(self.relu(self.deconv5(score)))  # size=(N, 32, x.H, x.W)
        score = self.classifier(score)                    # size=(N, n_class, x.H/1, x.W/1)

        return score


In [11]:
# FCN16s - Skip connection from pool4
class FCN16s(nn.Module):
    def __init__(self, pretrained_net, n_class):
        super().__init__()
        self.n_class = n_class
        self.pretrained_net = pretrained_net
        self.relu = nn.ReLU(inplace=True)
        self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn1 = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn2 = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn5 = nn.BatchNorm2d(32)
        self.classifier = nn.Conv2d(32, n_class, kernel_size=1)

    def forward(self, x):
        output = self.pretrained_net(x)
        x5 = output['x5']  # size=(N, 512, x.H/32, x.W/32)
        x4 = output['x4']  # size=(N, 512, x.H/16, x.W/16)

        score = self.relu(self.deconv1(x5))               # size=(N, 512, x.H/16, x.W/16)
        score = self.bn1(score + x4)                      # element-wise add, size=(N, 512, x.H/16, x.W/16)
        score = self.bn2(self.relu(self.deconv2(score)))  # size=(N, 256, x.H/8, x.W/8)
        score = self.bn3(self.relu(self.deconv3(score)))  # size=(N, 128, x.H/4, x.W/4)
        score = self.bn4(self.relu(self.deconv4(score)))  # size=(N, 64, x.H/2, x.W/2)
        score = self.bn5(self.relu(self.deconv5(score)))  # size=(N, 32, x.H, x.W)
        score = self.classifier(score)                    # size=(N, n_class, x.H/1, x.W/1)

        return score


In [12]:
# FCN8s - Skip connections from pool4 and pool3
class FCN8s(nn.Module):
    def __init__(self, pretrained_net, n_class):
        super().__init__()
        self.n_class = n_class
        self.pretrained_net = pretrained_net
        self.relu = nn.ReLU(inplace=True)
        self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn1 = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn2 = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn5 = nn.BatchNorm2d(32)
        self.classifier = nn.Conv2d(32, n_class, kernel_size=1)

    def forward(self, x):
        output = self.pretrained_net(x)
        x5 = output['x5']  # size=(N, 512, x.H/32, x.W/32)
        x4 = output['x4']  # size=(N, 512, x.H/16, x.W/16)
        x3 = output['x3']  # size=(N, 256, x.H/8,  x.W/8)

        score = self.relu(self.deconv1(x5))               # size=(N, 512, x.H/16, x.W/16)
        score = self.bn1(score + x4)                      # element-wise add, size=(N, 512, x.H/16, x.W/16)
        score = self.relu(self.deconv2(score))            # size=(N, 256, x.H/8, x.W/8)
        score = self.bn2(score + x3)                      # element-wise add, size=(N, 256, x.H/8, x.W/8)
        score = self.bn3(self.relu(self.deconv3(score)))  # size=(N, 128, x.H/4, x.W/4)
        score = self.bn4(self.relu(self.deconv4(score)))  # size=(N, 64, x.H/2, x.W/2)
        score = self.bn5(self.relu(self.deconv5(score)))  # size=(N, 32, x.H, x.W)
        score = self.classifier(score)                    # size=(N, n_class, x.H/1, x.W/1)

        return score

In [13]:
# FCNs - Skip connections from all pools
class FCNs(nn.Module):
    def __init__(self, pretrained_net, n_class):
        super().__init__()
        self.n_class = n_class
        self.pretrained_net = pretrained_net
        self.relu = nn.ReLU(inplace=True)
        self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn1 = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn2 = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn5 = nn.BatchNorm2d(32)
        self.classifier = nn.Conv2d(32, n_class, kernel_size=1)

    def forward(self, x):
        output = self.pretrained_net(x)
        x5 = output['x5']  # size=(N, 512, x.H/32, x.W/32)
        x4 = output['x4']  # size=(N, 512, x.H/16, x.W/16)
        x3 = output['x3']  # size=(N, 256, x.H/8,  x.W/8)
        x2 = output['x2']  # size=(N, 128, x.H/4,  x.W/4)
        x1 = output['x1']  # size=(N, 64, x.H/2,  x.W/2)

        score = self.bn1(self.relu(self.deconv1(x5)))     # size=(N, 512, x.H/16, x.W/16)
        score = score + x4                                # element-wise add
        score = self.bn2(self.relu(self.deconv2(score)))  # size=(N, 256, x.H/8, x.W/8)
        score = score + x3                                # element-wise add
        score = self.bn3(self.relu(self.deconv3(score)))  # size=(N, 128, x.H/4, x.W/4)
        score = score + x2                                # element-wise add
        score = self.bn4(self.relu(self.deconv4(score)))  # size=(N, 64, x.H/2, x.W/2)
        score = score + x1                                # element-wise add
        score = self.bn5(self.relu(self.deconv5(score)))  # size=(N, 32, x.H, x.W)
        score = self.classifier(score)                    # size=(N, n_class, x.H/1, x.W/1)

        return score


## Transforms dataset

In [14]:
from torchvision import transforms

image_transform = transforms.Compose([
    transforms.Resize((224, 224)),          # FCN expects fixed size
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


In [15]:
mask_transform = transforms.Compose([
    transforms.Resize((224, 224), interpolation=Image.NEAREST),
    transforms.Lambda(lambda x: torch.from_numpy(np.array(x)).long())
])


In [16]:
VOC_ROOT = "/kaggle/input/pascal-voc-2012-dataset/VOC2012_train_val/VOC2012_train_val"

## Dataset and DataLoader

In [17]:
train_dataset = VOCSegmentation(
    root=VOC_ROOT,
    split='train',
    transform=image_transform,
    target_transform=mask_transform
)

val_dataset = VOCSegmentation(
    root=VOC_ROOT,
    split='val',
    transform=image_transform,
    target_transform=mask_transform
)


In [18]:
from torch.utils.data import DataLoader

train_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)


In [19]:
img, mask = train_dataset[0]
print(img.shape)     # (3, 224, 224)
print(mask.shape)    # (224, 224)
print(torch.unique(mask))


torch.Size([3, 224, 224])
torch.Size([224, 224])
tensor([  0,   1,  15, 255])


## Training Loops

In [20]:
from tqdm import tqdm

def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0

    for images, masks in tqdm(dataloader, desc='Training'):
        images = images.to(device)
        masks = masks.to(device)

        optimizer.zero_grad()
        outputs = model(images)            # (N, C, H, W)
        loss = criterion(outputs, masks)  # ignore_index=255 handles void

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(dataloader)


In [21]:
@torch.no_grad()
def validate_epoch(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0

    for images, masks in tqdm(dataloader, desc='Validation'):
        images = images.to(device)
        masks = masks.to(device)

        outputs = model(images)
        loss = criterion(outputs, masks)

        running_loss += loss.item()

    return running_loss / len(dataloader)


## IoU

In [22]:
def fast_hist(pred, label, n_class):
    mask = (label >= 0) & (label < n_class)
    return torch.bincount(
        n_class * label[mask] + pred[mask],
        minlength=n_class ** 2
    ).reshape(n_class, n_class)


In [23]:
@torch.no_grad()
def evaluate_iou(model, dataloader, device, num_classes):
    model.eval()
    hist = torch.zeros((num_classes, num_classes), device=device)

    for images, masks in tqdm(dataloader, desc='Evaluating IoU'):
        images = images.to(device)
        masks = masks.to(device)

        outputs = model(images)
        preds = torch.argmax(outputs, dim=1)

        valid = masks != 255
        hist += fast_hist(
            preds[valid],
            masks[valid],
            num_classes
        )

    iou = torch.diag(hist) / (
        hist.sum(1) + hist.sum(0) - torch.diag(hist) + 1e-10
    )

    mean_iou = torch.nanmean(iou).item()
    return iou.cpu().numpy(), mean_iou


In [24]:
    # Hyperparameters
    BATCH_SIZE = 4
    LEARNING_RATE = 1e-4
    NUM_EPOCHS = 50
    NUM_CLASSES = 21
    MODEL_TYPE = 'fcn8s'  # 'fcn32s', 'fcn16s', 'fcn8s', or 'fcns'

In [25]:
    # Device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'Using device: {device}')

Using device: cuda


In [26]:
# Create VGGNet backbone
vgg_model = VGGNet(pretrained=True, model='vgg16', requires_grad=True)



Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


100%|██████████| 528M/528M [00:02<00:00, 195MB/s] 


In [27]:
# Create FCN model
model_dict = {
    'fcn32s': FCN32s,
    'fcn16s': FCN16s,
    'fcn8s': FCN8s,
    'fcns': FCNs
}

model = model_dict[MODEL_TYPE](pretrained_net=vgg_model, n_class=NUM_CLASSES).to(device)
print(f'\nUsing {MODEL_TYPE.upper()} architecture')


Using FCN8S architecture


In [28]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss(ignore_index=255)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

## Training

In [29]:
best_miou = 0.0
NUM_EPOCHS = 50

for epoch in range(NUM_EPOCHS):
    print(f'\n{"="*60}')
    print(f'Epoch {epoch+1}/{NUM_EPOCHS}')
    print(f'{"="*60}')

    train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss = validate_epoch(model, val_loader, criterion, device)

    print(f'Train Loss: {train_loss:.4f}')
    print(f'Val   Loss: {val_loss:.4f}')

    if (epoch + 1) % 5 == 0:
        print('\nEvaluating mIoU...')
        ious, mean_iou = evaluate_iou(model, val_loader, device, NUM_CLASSES)
        print(f'Mean IoU: {mean_iou*100:.2f}%')

        voc_classes = [
            'background', 'aeroplane', 'bicycle', 'bird', 'boat',
            'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
            'diningtable', 'dog', 'horse', 'motorbike', 'person',
            'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
        ]

        print('\nPer-class IoU:')
        for cls, iou in zip(voc_classes, ious):
            print(f'{cls:15s}: {iou*100:5.2f}%')

        if mean_iou > best_miou:
            best_miou = mean_iou
            torch.save(model.state_dict(), f'fcn_{MODEL_TYPE}_best.pth')
            print(f'✓ Model saved! Best mIoU: {best_miou*100:.2f}%')

    scheduler.step()



Epoch 1/50


Training: 100%|██████████| 366/366 [00:33<00:00, 10.84it/s]
Validation: 100%|██████████| 363/363 [00:11<00:00, 31.12it/s]


Train Loss: 2.4259
Val   Loss: 1.9698

Epoch 2/50


Training: 100%|██████████| 366/366 [00:33<00:00, 11.01it/s]
Validation: 100%|██████████| 363/363 [00:11<00:00, 30.50it/s]


Train Loss: 1.7582
Val   Loss: 1.5420

Epoch 3/50


Training: 100%|██████████| 366/366 [00:33<00:00, 10.82it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 29.93it/s]


Train Loss: 1.3979
Val   Loss: 1.2585

Epoch 4/50


Training: 100%|██████████| 366/366 [00:34<00:00, 10.66it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 29.44it/s]


Train Loss: 1.1594
Val   Loss: 1.2603

Epoch 5/50


Training: 100%|██████████| 366/366 [00:34<00:00, 10.52it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 29.15it/s]


Train Loss: 1.0443
Val   Loss: 1.0086

Evaluating mIoU...


Evaluating IoU: 100%|██████████| 363/363 [00:12<00:00, 28.31it/s]


Mean IoU: 9.84%

Per-class IoU:
background     : 86.00%
aeroplane      :  1.68%
bicycle        :  0.01%
bird           :  0.34%
boat           :  0.00%
bottle         :  0.00%
bus            : 17.31%
car            :  7.50%
cat            : 28.60%
chair          :  0.59%
cow            :  0.03%
diningtable    :  0.01%
dog            :  0.18%
horse          :  0.39%
motorbike      :  3.94%
person         : 54.36%
pottedplant    :  0.00%
sheep          :  0.01%
sofa           :  0.00%
train          :  4.77%
tvmonitor      :  1.01%
✓ Model saved! Best mIoU: 9.84%

Epoch 6/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.39it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.93it/s]


Train Loss: 0.9450
Val   Loss: 1.1215

Epoch 7/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.37it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.88it/s]


Train Loss: 0.8506
Val   Loss: 0.9186

Epoch 8/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.34it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.82it/s]


Train Loss: 0.7781
Val   Loss: 0.9257

Epoch 9/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.31it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.74it/s]


Train Loss: 0.7219
Val   Loss: 0.8788

Epoch 10/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.29it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.62it/s]


Train Loss: 0.6652
Val   Loss: 0.9499

Evaluating mIoU...


Evaluating IoU: 100%|██████████| 363/363 [00:12<00:00, 28.51it/s]


Mean IoU: 14.63%

Per-class IoU:
background     : 88.02%
aeroplane      :  7.37%
bicycle        :  0.01%
bird           :  6.80%
boat           :  1.86%
bottle         :  0.00%
bus            : 32.53%
car            : 28.69%
cat            : 31.67%
chair          :  1.17%
cow            :  0.14%
diningtable    :  8.41%
dog            :  4.31%
horse          :  2.19%
motorbike      : 14.62%
person         : 58.64%
pottedplant    :  0.08%
sheep          :  2.32%
sofa           :  0.57%
train          :  4.96%
tvmonitor      : 12.78%
✓ Model saved! Best mIoU: 14.63%

Epoch 11/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.30it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.81it/s]


Train Loss: 0.6243
Val   Loss: 0.9320

Epoch 12/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.29it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.73it/s]


Train Loss: 0.5666
Val   Loss: 0.8662

Epoch 13/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.27it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.62it/s]


Train Loss: 0.5109
Val   Loss: 0.7913

Epoch 14/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.29it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.78it/s]


Train Loss: 0.4686
Val   Loss: 0.8065

Epoch 15/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.30it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.72it/s]


Train Loss: 0.4183
Val   Loss: 0.7754

Evaluating mIoU...


Evaluating IoU: 100%|██████████| 363/363 [00:12<00:00, 28.47it/s]


Mean IoU: 24.30%

Per-class IoU:
background     : 88.55%
aeroplane      : 37.18%
bicycle        :  0.00%
bird           : 27.20%
boat           : 11.63%
bottle         :  0.05%
bus            : 41.01%
car            : 37.95%
cat            : 40.79%
chair          :  8.87%
cow            :  1.46%
diningtable    : 20.02%
dog            : 12.77%
horse          :  8.22%
motorbike      : 32.04%
person         : 60.16%
pottedplant    :  2.56%
sheep          : 21.34%
sofa           : 14.15%
train          : 12.18%
tvmonitor      : 32.14%
✓ Model saved! Best mIoU: 24.30%

Epoch 16/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.28it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.69it/s]


Train Loss: 0.4211
Val   Loss: 0.7843

Epoch 17/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.28it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.72it/s]


Train Loss: 0.3581
Val   Loss: 0.7478

Epoch 18/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.29it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.72it/s]


Train Loss: 0.3096
Val   Loss: 0.7298

Epoch 19/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.30it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.73it/s]


Train Loss: 0.2653
Val   Loss: 0.7312

Epoch 20/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.27it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.69it/s]


Train Loss: 0.2246
Val   Loss: 0.7299

Evaluating mIoU...


Evaluating IoU: 100%|██████████| 363/363 [00:12<00:00, 28.45it/s]


Mean IoU: 34.21%

Per-class IoU:
background     : 88.47%
aeroplane      : 58.47%
bicycle        :  0.00%
bird           : 17.89%
boat           : 27.81%
bottle         : 23.63%
bus            : 55.53%
car            : 52.56%
cat            : 40.66%
chair          :  7.24%
cow            : 24.03%
diningtable    : 20.38%
dog            : 35.14%
horse          : 18.76%
motorbike      : 37.83%
person         : 63.87%
pottedplant    : 20.17%
sheep          : 30.68%
sofa           : 16.19%
train          : 38.49%
tvmonitor      : 40.65%
✓ Model saved! Best mIoU: 34.21%

Epoch 21/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.29it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.74it/s]


Train Loss: 0.1879
Val   Loss: 0.7077

Epoch 22/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.28it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.72it/s]


Train Loss: 0.1532
Val   Loss: 0.7092

Epoch 23/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.29it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.70it/s]


Train Loss: 0.1334
Val   Loss: 0.7040

Epoch 24/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.28it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.72it/s]


Train Loss: 0.1242
Val   Loss: 0.6979

Epoch 25/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.28it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.75it/s]


Train Loss: 0.1131
Val   Loss: 0.7321

Evaluating mIoU...


Evaluating IoU: 100%|██████████| 363/363 [00:12<00:00, 28.54it/s]


Mean IoU: 37.90%

Per-class IoU:
background     : 88.58%
aeroplane      : 61.79%
bicycle        :  5.75%
bird           : 28.58%
boat           : 30.27%
bottle         : 26.65%
bus            : 55.21%
car            : 53.34%
cat            : 51.36%
chair          : 10.15%
cow            : 28.43%
diningtable    : 18.19%
dog            : 39.11%
horse          : 28.39%
motorbike      : 45.09%
person         : 64.78%
pottedplant    : 19.88%
sheep          : 40.55%
sofa           : 17.07%
train          : 42.34%
tvmonitor      : 40.34%
✓ Model saved! Best mIoU: 37.90%

Epoch 26/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.28it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.77it/s]


Train Loss: 0.1047
Val   Loss: 0.7468

Epoch 27/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.31it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.81it/s]


Train Loss: 0.0991
Val   Loss: 0.8080

Epoch 28/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.31it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.81it/s]


Train Loss: 0.1128
Val   Loss: 0.8158

Epoch 29/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.29it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.76it/s]


Train Loss: 0.0966
Val   Loss: 0.7475

Epoch 30/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.28it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.73it/s]


Train Loss: 0.0737
Val   Loss: 0.7424

Evaluating mIoU...


Evaluating IoU: 100%|██████████| 363/363 [00:12<00:00, 28.46it/s]


Mean IoU: 39.21%

Per-class IoU:
background     : 88.60%
aeroplane      : 61.19%
bicycle        : 27.37%
bird           : 31.56%
boat           : 27.72%
bottle         : 29.26%
bus            : 54.36%
car            : 45.02%
cat            : 50.99%
chair          :  9.80%
cow            : 30.86%
diningtable    : 21.44%
dog            : 39.66%
horse          : 26.66%
motorbike      : 47.45%
person         : 65.74%
pottedplant    : 22.11%
sheep          : 41.93%
sofa           : 19.70%
train          : 39.24%
tvmonitor      : 42.82%
✓ Model saved! Best mIoU: 39.21%

Epoch 31/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.27it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.81it/s]


Train Loss: 0.0667
Val   Loss: 0.7567

Epoch 32/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.31it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.77it/s]


Train Loss: 0.0604
Val   Loss: 0.7577

Epoch 33/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.30it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.72it/s]


Train Loss: 0.0571
Val   Loss: 0.7813

Epoch 34/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.31it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.77it/s]


Train Loss: 0.0564
Val   Loss: 0.7367

Epoch 35/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.30it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.61it/s]


Train Loss: 0.0653
Val   Loss: 0.8088

Evaluating mIoU...


Evaluating IoU: 100%|██████████| 363/363 [00:12<00:00, 28.47it/s]


Mean IoU: 36.91%

Per-class IoU:
background     : 88.28%
aeroplane      : 60.76%
bicycle        : 28.21%
bird           : 26.13%
boat           : 27.62%
bottle         : 26.95%
bus            : 54.66%
car            : 51.46%
cat            : 39.39%
chair          : 12.38%
cow            : 16.79%
diningtable    : 20.62%
dog            : 39.09%
horse          : 24.05%
motorbike      : 43.61%
person         : 64.20%
pottedplant    : 19.38%
sheep          : 36.59%
sofa           : 12.40%
train          : 41.77%
tvmonitor      : 40.82%

Epoch 36/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.28it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.76it/s]


Train Loss: 0.0720
Val   Loss: 0.7797

Epoch 37/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.30it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.81it/s]


Train Loss: 0.0469
Val   Loss: 0.8001

Epoch 38/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.31it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.78it/s]


Train Loss: 0.0382
Val   Loss: 0.7675

Epoch 39/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.30it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.81it/s]


Train Loss: 0.0392
Val   Loss: 0.8019

Epoch 40/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.31it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.80it/s]


Train Loss: 0.0373
Val   Loss: 0.8084

Evaluating mIoU...


Evaluating IoU: 100%|██████████| 363/363 [00:12<00:00, 28.50it/s]


Mean IoU: 40.89%

Per-class IoU:
background     : 88.78%
aeroplane      : 63.77%
bicycle        : 34.16%
bird           : 31.59%
boat           : 34.09%
bottle         : 32.14%
bus            : 57.99%
car            : 55.30%
cat            : 45.65%
chair          : 11.94%
cow            : 29.24%
diningtable    : 21.57%
dog            : 37.67%
horse          : 28.49%
motorbike      : 48.56%
person         : 65.47%
pottedplant    : 26.45%
sheep          : 39.17%
sofa           : 17.48%
train          : 43.77%
tvmonitor      : 45.47%
✓ Model saved! Best mIoU: 40.89%

Epoch 41/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.28it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.69it/s]


Train Loss: 0.0316
Val   Loss: 0.8245

Epoch 42/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.30it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.73it/s]


Train Loss: 0.0288
Val   Loss: 0.8273

Epoch 43/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.29it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.80it/s]


Train Loss: 0.0283
Val   Loss: 0.8322

Epoch 44/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.30it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.77it/s]


Train Loss: 0.0258
Val   Loss: 0.8701

Epoch 45/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.31it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.69it/s]


Train Loss: 0.0243
Val   Loss: 0.8255

Evaluating mIoU...


Evaluating IoU: 100%|██████████| 363/363 [00:12<00:00, 28.45it/s]


Mean IoU: 41.36%

Per-class IoU:
background     : 88.83%
aeroplane      : 63.20%
bicycle        : 27.25%
bird           : 37.26%
boat           : 34.28%
bottle         : 30.84%
bus            : 58.12%
car            : 56.40%
cat            : 52.07%
chair          : 12.35%
cow            : 24.89%
diningtable    : 24.32%
dog            : 40.21%
horse          : 29.82%
motorbike      : 45.67%
person         : 65.19%
pottedplant    : 24.25%
sheep          : 44.76%
sofa           : 18.02%
train          : 44.38%
tvmonitor      : 46.35%
✓ Model saved! Best mIoU: 41.36%

Epoch 46/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.28it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.82it/s]


Train Loss: 0.0235
Val   Loss: 0.8524

Epoch 47/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.28it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.70it/s]


Train Loss: 0.0240
Val   Loss: 0.8418

Epoch 48/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.29it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.79it/s]


Train Loss: 0.0218
Val   Loss: 0.8544

Epoch 49/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.30it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.81it/s]


Train Loss: 0.0218
Val   Loss: 0.8991

Epoch 50/50


Training: 100%|██████████| 366/366 [00:35<00:00, 10.30it/s]
Validation: 100%|██████████| 363/363 [00:12<00:00, 28.76it/s]


Train Loss: 0.0208
Val   Loss: 0.8420

Evaluating mIoU...


Evaluating IoU: 100%|██████████| 363/363 [00:12<00:00, 28.55it/s]


Mean IoU: 41.43%

Per-class IoU:
background     : 88.81%
aeroplane      : 62.28%
bicycle        : 29.98%
bird           : 37.50%
boat           : 33.08%
bottle         : 32.29%
bus            : 57.63%
car            : 56.13%
cat            : 51.68%
chair          : 13.28%
cow            : 28.17%
diningtable    : 24.73%
dog            : 40.29%
horse          : 29.62%
motorbike      : 48.12%
person         : 65.04%
pottedplant    : 26.27%
sheep          : 38.25%
sofa           : 17.27%
train          : 43.80%
tvmonitor      : 45.75%
✓ Model saved! Best mIoU: 41.43%
