In [2]:
!pip install torchvision
!apt-get update && apt-get install libgl1

Get:1 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]
Get:2 https://packages.cloud.google.com/apt gcsfuse-focal InRelease [1225 B]   
Get:3 https://packages.cloud.google.com/apt cloud-sdk InRelease [6361 B]       
Get:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  InRelease [1581 B]
Hit:5 http://archive.ubuntu.com/ubuntu focal InRelease
Get:6 https://packages.cloud.google.com/apt google-fast-socket InRelease [5015 B]
Get:7 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]
Get:8 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [3549 kB]
Get:9 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1197 kB]
Get:10 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [29.8 kB]
Get:11 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [3490 kB]
Get:12 https://packages.cloud.google.com/apt cloud-sdk/main amd64 Packages [627 kB]
Hit:13 http://

In [1]:
import torch
import torch.optim as optim
import numpy as np
import argparse
from pathlib import Path
from sklearn.model_selection import KFold
import math 
import kornia
import logging
from tqdm import tqdm
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import random
import os
import optuna
import cv2
import albumentations as A
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from collections import OrderedDict
from torchvision._internally_replaced_utils import load_state_dict_from_url

In [3]:
# from utils.manual_fcn import load_fcn_resnet
# from utils.manual_unet import UNet
# from utils.manual_dlplus import DLv3plus
# 

In [4]:
class UAVDatasetPatches(Dataset):
    def __init__(self, img_list, msk_list, transform=None):
        '''
        img_ls: list of image Paths to load
        msk_ls: list of mask Paths to load
        loads the dataset from a list of images and masks
        '''
        self.transform = transform
        self.img_list= img_list
        self.msk_list= msk_list
        assert len(self.img_list) == len(self.msk_list), "Image and Mask Patches have different lengths."
    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):
        image = cv2.imread(str(self.img_list[idx]))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(str(self.msk_list[idx]), cv2.IMREAD_GRAYSCALE) # load as np.float32
        if self.transform is not None:
            augmentations = self.transform(image=image, mask=mask)
            image = augmentations["image"]
            mask = augmentations["mask"]
        return image, mask

In [5]:
def get_calculated_means_stds_per_fold(fold):
    means = [
        [0.4895940504368177, 0.4747875829353402, 0.42545172025367883],
        [0.4909516814094245, 0.47507395584447076, 0.4252166750637278],
        [0.4863172918463077, 0.4720067749001233, 0.42307293323046524],
        [0.48556443799258586, 0.471592906257259, 0.42337851381822833]
        ]
    stds = [
        [0.1329905783602554, 0.130645279821384, 0.12234299715980072],
        [0.12910633924968123, 0.12635436744763892, 0.1180632138245313],
        [0.1329739900037901, 0.1304754029316029, 0.12181500603654097],
        [0.1335583288658572, 0.1313047051909438, 0.12297522870807812]
    ]
    return means[fold], stds[fold]

In [6]:
from torch import nn
from torch.nn import functional as F

class FCNHead(nn.Sequential):
    def __init__(self, in_ch, out_ch):
        """
        Caution: we still use Dropout, even tho not in the original implementation
        """
        mid_ch = in_ch // 4
        layers = [
            nn.Conv2d(in_ch, mid_ch, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_ch),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Conv2d(mid_ch, out_ch, kernel_size=1),
        ]

        super().__init__(*layers)

class FCNtv(nn.Module):
    """
    FCN network without skip connections. 
    If there is no dilation in the backbone, the model will be equal to FCN32s.
    The interpolation will make [B, C, 16, 16] --> [B, C, 256, 256]
    If a dilation of x4 is used in the backbone, 
    then the model will be equal to FCN8s 
    without any skip connections between intermediate layers
    The interpolation will make [B, C, 64, 64] --> [B, C, 256, 256]
    Currently implemented is only bilinear interpolation. 
    """
    def __init__(self, encoder_name, backbone, head, num_classes=3, n_upsample=32, b_bilinear=True, replace_stride_with_dilation=False):
        super().__init__()
        self.encoder_name = "resnet50"
        self.backbone = backbone
        self.head = head
        print(f"using test {encoder_name}, {n_upsample}x upsampling with replace strides {replace_stride_with_dilation} and bilinear {b_bilinear}")


    def forward(self, x):
        input_shape = x.shape[-2:]
        x = self.backbone(x)["layer4"]
        x = self.head(x)

        x = F.interpolate(x, size=input_shape, mode="bilinear", align_corners=False)

        return x


class FCNskip(nn.Module):

    def __init__(self, encoder_name, backbone, head, num_classes=3, n_upsample=32, b_bilinear=True, replace_stride_with_dilation=False):
        super().__init__()
        self.n_upsample = n_upsample
        self.encoder_name = encoder_name
        self.replace_stride_with_dilation = replace_stride_with_dilation
        self.b_bilinear=b_bilinear
        print(f"using {encoder_name}, {n_upsample}x upsampling with replace strides {replace_stride_with_dilation} and bilinear {b_bilinear}")

        self.bn = nn.BatchNorm2d(num_features=num_classes)
        self.onebyone128 = nn.Conv2d(128, num_classes, kernel_size=1)
        self.onebyone256 = nn.Conv2d(256, num_classes, kernel_size=1)
        self.onebyone512 = nn.Conv2d(512, num_classes, kernel_size=1)
        self.onebyone1024 = nn.Conv2d(1024, num_classes, kernel_size=1)
        if not b_bilinear:
            self.convTranspose = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=4, stride=2, padding=1, bias=False)
        self.backbone = backbone


        self.head = head

    def forward(self, x):
        input_shape = x.shape[-2:]
        x = self.backbone(x)

        layer4 = self.head(x["layer4"])

        if self.n_upsample == 16: # use FCN-16s model
            # upsampling needs to be based on the num_classes feature maps

            # get intermediate layer and match channels
            if self.encoder_name in ["resnet18", "resnet34"]:
                layer3 = self.onebyone256(x["layer3"])
            else:
                layer3 = self.onebyone1024(x["layer3"])
            # upsample last layer 2x to match spatial resolution
            if self.b_bilinear:
                x = F.interpolate(layer4, scale_factor=2.0, mode="bilinear", align_corners=False)
            else:
                x = self.convTranspose(layer4)
            # concat both
            x = self.bn(x + layer3)
            # final upsampling: here x16. This was fixed in the original paper

            x = F.interpolate(x, size=input_shape, mode="bilinear", align_corners=False)
        elif self.n_upsample == 8: # use FCN-8s model

            # upsampling needs to be based on the num_classes feature maps

            # get intermediate layer and match channels
            if self.encoder_name in ["resnet18", "resnet34"]:
                layer3 = self.onebyone256(x["layer3"])
                layer2 = self.onebyone128(x["layer2"])
            else:
                layer3 = self.onebyone1024(x["layer3"])
                layer2 = self.onebyone512(x["layer2"])
            # upsample layer4 2x to match spatial resolution of layer3
            if self.b_bilinear:
                layer4 = F.interpolate(layer4, scale_factor=2.0, mode="bilinear", align_corners=False)
            else:
                layer4 = self.convTranspose(layer4)
            
            # concat both
            x = self.bn(layer3 + layer4)
            # upsample result of layer4 + 3 to match spatial resolution of layer2
            
            if self.b_bilinear:
                x = F.interpolate(x, scale_factor=2.0, mode="bilinear", align_corners=False)
            else:
                x = self.convTranspose(x)
            x = self.bn(x + layer2)
            # final upsampling: here x8. This was fixed in the original paper
            x = F.interpolate(x, size=input_shape, mode="bilinear", align_corners=False)
        else:
            raise NotImplementedError(f"Upsampling of {self.n_upsample} is not implemented. Use either, 8, 16 or 32")
        return x



def load_fcn_resnet(encoder_name, num_classes=3, pretrained = False, replace_stride_with_dilation=False, n_upsample=32, b_bilinear=True):
    """
    Constructs a Fully-Convolutional Network model with a ResNet backbone.
    """

    if encoder_name in ["resnet18", "resnet34"]:
        head = FCNHead(512, num_classes)
    else:
        head = FCNHead(2048, num_classes)

    if replace_stride_with_dilation:
        backbone = load_resnet(
            encoder_name=encoder_name, 
            num_classes=num_classes, 
            pretrained=pretrained, 
            replace_stride_with_dilation=True
            )
        # set n_upsample =8, if we use dilated convolutions in the feature extractor --> no skip connections needed
        if n_upsample == 8:
            fcn = FCNtv(encoder_name, backbone, head, num_classes=num_classes, n_upsample=n_upsample, b_bilinear=b_bilinear, replace_stride_with_dilation=True)
        else:
            raise NotImplementedError(f"upsampling of {n_upsample} not implemented when using dilation instead of stride")
    else:
        backbone = load_resnet(
            encoder_name=encoder_name, 
            num_classes=num_classes, 
            pretrained=pretrained, 
            replace_stride_with_dilation=False
            )
        if n_upsample in [8, 16]:
            fcn = FCNskip(encoder_name, backbone, head, num_classes=num_classes, n_upsample=n_upsample, b_bilinear=b_bilinear, replace_stride_with_dilation=False)
        elif n_upsample == 32:
            fcn = FCNtv(encoder_name, backbone, head, num_classes=num_classes, n_upsample=n_upsample, b_bilinear=b_bilinear, replace_stride_with_dilation=False)
        else:
            raise NotImplementedError(f"upsampling of {n_upsample} not implemented when not using dilation")

    return fcn

In [7]:
model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-f37072fd.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-b627a593.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-0676ba61.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-63fe2227.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-394f9c45.pth',
    }

def replace_strides_with_dilation(module, dilation_rate):
    """Patch Conv2d modules replacing strides with dilation"""
    for mod in module.modules():
        if isinstance(mod, nn.Conv2d):
            mod.stride = (1, 1)
            mod.dilation = (dilation_rate, dilation_rate)
            kh, _ = mod.kernel_size
            mod.padding = ((kh // 2) * dilation_rate, (kh // 2) * dilation_rate)

In [8]:
class BasicBlock(nn.Module):
    def __init__(self, in_ch, out_ch, stride=1, padding=1, dilation=1, downsample=None):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=3, stride=stride,
                     padding=padding, dilation=dilation, bias=False)
        self.bn1 = nn.BatchNorm2d(out_ch)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(in_channels=out_ch, out_channels=out_ch, kernel_size=3, stride=1,
                     padding=1, dilation=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_ch)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    def __init__(self, in_ch, mid_ch, out_ch, stride=1, padding=1, dilation=1, downsample=None):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=in_ch, out_channels=mid_ch, kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(mid_ch)
        self.conv2 = nn.Conv2d(in_channels=mid_ch, out_channels=mid_ch, kernel_size=3, stride=stride, padding=padding, dilation=dilation, bias=False)
        self.bn2 = nn.BatchNorm2d(mid_ch)
        self.conv3 = nn.Conv2d(in_channels=mid_ch, out_channels=out_ch, kernel_size=1, stride=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_ch)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)
        return out

In [9]:

class ResNet18(nn.Module):
    def __init__(self, num_classes=3, output_stride=32):
        super().__init__()
        self.num_classes = num_classes
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        #first basic block
        layers = [BasicBlock(in_ch=64, out_ch=64, stride=1, downsample=None),
                  BasicBlock(in_ch=64, out_ch=64, stride=1, downsample=None)
                  ]
        self.layer1 = nn.Sequential(*layers)
        #second basic block, here we need also a "downsample" sequential
        layers = [nn.Conv2d(in_channels=64, out_channels=128, kernel_size=1, stride=2, bias=False), 
                  nn.BatchNorm2d(128)
                  ]
        downsample = nn.Sequential(*layers)
        
        layers = [BasicBlock(in_ch=64, out_ch=128, stride=2, downsample=downsample),
                  BasicBlock(in_ch=128, out_ch=128, stride=1, downsample=None)]

        self.layer2 = nn.Sequential(*layers)

        # third basic block, here we need also a "downsample" sequential

        layers = [nn.Conv2d(in_channels=128, out_channels=256, kernel_size=1, stride=2, bias=False), 
                  nn.BatchNorm2d(256)
                  ]

        downsample = nn.Sequential(*layers)
        strides = [2, 1]
        paddings = [1, 1]
        dilations = [1, 1]

        layers = [BasicBlock(in_ch=128, out_ch=256, stride=strides[0], padding=paddings[0], dilation=dilations[0], downsample=downsample),
                  BasicBlock(in_ch=256, out_ch=256, stride=strides[1], padding=paddings[1], dilation=dilations[1], downsample=None)]

        self.layer3 = nn.Sequential(*layers)

        # fourth basic block, here we need also a "downsample" sequential
        layers = [nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=2, bias=False), 
                  nn.BatchNorm2d(512)
                  ]
        downsample = nn.Sequential(*layers)
        strides = [2, 1]
        paddings = [1, 1]
        dilations = [1, 1]

        layers = [BasicBlock(in_ch=256, out_ch=512, stride=strides[0], padding=paddings[0], dilation=dilations[0], downsample=downsample),
                  BasicBlock(in_ch=512, out_ch=512, stride=strides[1], padding=paddings[1], dilation=dilations[1], downsample=None)]

        self.layer4 = nn.Sequential(*layers)
        if output_stride !=32:
            self.make_dilated(output_stride=output_stride)

    def forward(self, x):
        layers = OrderedDict()
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        layers["layer0"]=x
        x = self.maxpool(x)
        x = self.layer1(x)
        layers["layer1"]=x
        x = self.layer2(x)
        layers["layer2"]=x
        x = self.layer3(x)
        layers["layer3"]=x
        x = self.layer4(x)
        layers["layer4"]=x
        return layers

    def get_stages(self):
        return [
            nn.Identity(),
            nn.Sequential(self.conv1, self.bn1, self.relu),
            nn.Sequential(self.maxpool, self.layer1),
            self.layer2,
            self.layer3,
            self.layer4,
        ]
    
    def make_dilated(self, output_stride):
        print(f"making dilated model")
        if output_stride == 16:
            stage_list=[5,]
            dilation_list=[2,]
            
        elif output_stride == 8:
            stage_list=[4, 5]
            dilation_list=[2, 4] 

        else:
            raise ValueError("Output stride should be 16 or 8, got {}.".format(output_stride))
        
        stages = self.get_stages()
        for stage_indx, dilation_rate in zip(stage_list, dilation_list):
            replace_strides_with_dilation(
                module=stages[stage_indx],
                dilation_rate=dilation_rate,
            )

class ResNet34(nn.Module):
    def __init__(self, num_classes=3, output_stride=32):
        super().__init__()
        self.num_classes = num_classes
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        #first basic block
        layers = [BasicBlock(in_ch=64, out_ch=64, stride=1, downsample=None),
                  BasicBlock(in_ch=64, out_ch=64, stride=1, downsample=None),
                  BasicBlock(in_ch=64, out_ch=64, stride=1, downsample=None)
                  ]
        self.layer1 = nn.Sequential(*layers)
        #second basic block, here we need also a "downsample" sequential
        layers = [nn.Conv2d(in_channels=64, out_channels=128, kernel_size=1, stride=2, bias=False), 
                  nn.BatchNorm2d(128)
                  ]
        downsample = nn.Sequential(*layers)
        
        layers = [BasicBlock(in_ch=64, out_ch=128, stride=2, downsample=downsample),
                  BasicBlock(in_ch=128, out_ch=128, stride=1, downsample=None),
                  BasicBlock(in_ch=128, out_ch=128, stride=1, downsample=None),
                  BasicBlock(in_ch=128, out_ch=128, stride=1, downsample=None)]

        self.layer2 = nn.Sequential(*layers)

        # third basic block, here we need also a "downsample" sequential
        layers = [nn.Conv2d(in_channels=128, out_channels=256, kernel_size=1, stride=2, bias=False), 
                  nn.BatchNorm2d(256)
                  ]
        downsample = nn.Sequential(*layers)
        
        strides = [2, 1, 1, 1, 1, 1]
        paddings = [1, 1, 1, 1, 1, 1]
        dilations = [1, 1, 1, 1, 1, 1]

        layers = [BasicBlock(in_ch=128, out_ch=256, stride=strides[0], padding=paddings[0], dilation=dilations[0], downsample=downsample),
                  BasicBlock(in_ch=256, out_ch=256, stride=strides[1], padding=paddings[1], dilation=dilations[1], downsample=None),
                  BasicBlock(in_ch=256, out_ch=256, stride=strides[2], padding=paddings[2], dilation=dilations[2], downsample=None),
                  BasicBlock(in_ch=256, out_ch=256, stride=strides[3], padding=paddings[3], dilation=dilations[3], downsample=None),
                  BasicBlock(in_ch=256, out_ch=256, stride=strides[4], padding=paddings[4], dilation=dilations[4], downsample=None),
                  BasicBlock(in_ch=256, out_ch=256, stride=strides[5], padding=paddings[5], dilation=dilations[5], downsample=None)]

        self.layer3 = nn.Sequential(*layers)

        # fourth basic block, here we need also a "downsample" sequential
        layers = [nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=2, bias=False), 
                  nn.BatchNorm2d(512)
                  ]
        downsample = nn.Sequential(*layers)
        

        strides = [2, 1, 1]
        paddings = [1, 1, 1]
        dilations = [1, 1, 1]

        layers = [BasicBlock(in_ch=256, out_ch=512, stride=strides[0], padding=paddings[0], dilation=dilations[0], downsample=downsample),
                  BasicBlock(in_ch=512, out_ch=512, stride=strides[1], padding=paddings[1], dilation=dilations[1], downsample=None),
                  BasicBlock(in_ch=512, out_ch=512, stride=strides[2], padding=paddings[2], dilation=dilations[2], downsample=None)]

        self.layer4 = nn.Sequential(*layers)
        if output_stride !=32:
            self.make_dilated(output_stride=output_stride)

    def forward(self, x):
        layers = OrderedDict()
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        layers["layer0"]=x
        x = self.maxpool(x)
        x = self.layer1(x)
        layers["layer1"]=x
        x = self.layer2(x)
        layers["layer2"]=x
        x = self.layer3(x)
        layers["layer3"]=x
        x = self.layer4(x)
        layers["layer4"]=x
        return layers

    def get_stages(self):
        return [
            nn.Identity(),
            nn.Sequential(self.conv1, self.bn1, self.relu),
            nn.Sequential(self.maxpool, self.layer1),
            self.layer2,
            self.layer3,
            self.layer4,
        ]
    
    def make_dilated(self, output_stride):
        print(f"making dilated model")
        if output_stride == 16:
            stage_list=[5,]
            dilation_list=[2,]
            
        elif output_stride == 8:
            stage_list=[4, 5]
            dilation_list=[2, 4] 

        else:
            raise ValueError("Output stride should be 16 or 8, got {}.".format(output_stride))
        
        stages = self.get_stages()
        for stage_indx, dilation_rate in zip(stage_list, dilation_list):
            replace_strides_with_dilation(
                module=stages[stage_indx],
                dilation_rate=dilation_rate,
            )

class ResNet50(nn.Module):
    """
    check if we can simplify the replace_stride_with_dilation if statement
    """
    def __init__(self, num_classes=3, output_stride=32):
        super().__init__()
        self.num_classes = num_classes
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        #first Bottleneck block
        layers = [nn.Conv2d(in_channels=64, out_channels=256, kernel_size=1, stride=1, bias=False), 
                  nn.BatchNorm2d(256)
                  ]
        downsample = nn.Sequential(*layers)

        layers = [Bottleneck(in_ch=64, mid_ch=64, out_ch=256, stride=1, downsample=downsample),
                  Bottleneck(in_ch=256, mid_ch=64, out_ch=256, stride=1, downsample=None),
                  Bottleneck(in_ch=256, mid_ch=64, out_ch=256, stride=1, downsample=None)
                  ]
        self.layer1 = nn.Sequential(*layers)


        #second Bottleneck block
        layers = [nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=2, bias=False), 
                  nn.BatchNorm2d(512)
                  ]
        downsample = nn.Sequential(*layers)

        layers = [Bottleneck(in_ch=256, mid_ch=128, out_ch=512, stride=2, downsample=downsample),
                  Bottleneck(in_ch=512, mid_ch=128, out_ch=512, stride=1, downsample=None),
                  Bottleneck(in_ch=512, mid_ch=128, out_ch=512, stride=1, downsample=None),
                  Bottleneck(in_ch=512, mid_ch=128, out_ch=512, stride=1, downsample=None),
                  ]
        self.layer2 = nn.Sequential(*layers)

        # third Bottleneck block
        layers = [nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1, stride=2, bias=False), 
                  nn.BatchNorm2d(1024)
                  ]
        
        downsample = nn.Sequential(*layers)
        strides = [2, 1, 1, 1, 1, 1]
        paddings = [1, 1, 1, 1, 1, 1]
        dilations = [1, 1, 1, 1, 1, 1]


        layers = [Bottleneck(in_ch=512, mid_ch=256, out_ch=1024, stride=strides[0], padding=paddings[0], dilation=dilations[0], downsample=downsample),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[1], padding=paddings[1], dilation=dilations[1], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[2], padding=paddings[2], dilation=dilations[2], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[3], padding=paddings[3], dilation=dilations[3], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[4], padding=paddings[4], dilation=dilations[4], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[5], padding=paddings[5], dilation=dilations[5], downsample=None),
                  ]

        self.layer3 = nn.Sequential(*layers)

        # fourth Bottleneck block

        layers = [nn.Conv2d(in_channels=1024, out_channels=2048, kernel_size=1, stride=2, bias=False), 
                  nn.BatchNorm2d(2048)
                  ]

        downsample = nn.Sequential(*layers)

        strides = [2, 1, 1]
        paddings = [1, 1, 1]
        dilations = [1, 1, 1]

        layers = [Bottleneck(in_ch=1024, mid_ch=512, out_ch=2048, stride=strides[0], padding=paddings[0], dilation=dilations[0], downsample=downsample),
                  Bottleneck(in_ch=2048, mid_ch=512, out_ch=2048, stride=strides[1], padding=paddings[1], dilation=dilations[1], downsample=None),
                  Bottleneck(in_ch=2048, mid_ch=512, out_ch=2048, stride=strides[2], padding=paddings[2], dilation=dilations[2], downsample=None),
                  ]

        self.layer4 = nn.Sequential(*layers)
        if output_stride !=32:
            self.make_dilated(output_stride=output_stride)

    def forward(self, x):
        layers = OrderedDict()
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        layers["layer0"]=x
        x = self.maxpool(x)
        x = self.layer1(x)
        layers["layer1"]=x
        x = self.layer2(x)
        layers["layer2"]=x
        x = self.layer3(x)
        layers["layer3"]=x
        x = self.layer4(x)
        layers["layer4"]=x

        return layers

    def get_stages(self):
        return [
            nn.Identity(),
            nn.Sequential(self.conv1, self.bn1, self.relu),
            nn.Sequential(self.maxpool, self.layer1),
            self.layer2,
            self.layer3,
            self.layer4,
        ]
    
    def make_dilated(self, output_stride):
        print(f"making dilated model")
        if output_stride == 16:
            stage_list=[5,]
            dilation_list=[2,]
            
        elif output_stride == 8:
            stage_list=[4, 5]
            dilation_list=[2, 4] 

        else:
            raise ValueError("Output stride should be 16 or 8, got {}.".format(output_stride))
        
        stages = self.get_stages()
        for stage_indx, dilation_rate in zip(stage_list, dilation_list):
            replace_strides_with_dilation(
                module=stages[stage_indx],
                dilation_rate=dilation_rate,
            )


class ResNet101(nn.Module):
    """
    check if we can simplify the replace_stride_with_dilation if statement
    """
    def __init__(self, num_classes=3, output_stride=32):
        super().__init__()
        self.num_classes = num_classes
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        #first Bottleneck block
        layers = [nn.Conv2d(in_channels=64, out_channels=256, kernel_size=1, stride=1, bias=False), 
                  nn.BatchNorm2d(256)
                  ]
        downsample = nn.Sequential(*layers)

        layers = [Bottleneck(in_ch=64, mid_ch=64, out_ch=256, stride=1, downsample=downsample),
                  Bottleneck(in_ch=256, mid_ch=64, out_ch=256, stride=1, downsample=None),
                  Bottleneck(in_ch=256, mid_ch=64, out_ch=256, stride=1, downsample=None)
                  ]
        self.layer1 = nn.Sequential(*layers)


        #second Bottleneck block
        layers = [nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=2, bias=False), 
                  nn.BatchNorm2d(512)
                  ]
        downsample = nn.Sequential(*layers)

        layers = [Bottleneck(in_ch=256, mid_ch=128, out_ch=512, stride=2, downsample=downsample),
                  Bottleneck(in_ch=512, mid_ch=128, out_ch=512, stride=1, downsample=None),
                  Bottleneck(in_ch=512, mid_ch=128, out_ch=512, stride=1, downsample=None),
                  Bottleneck(in_ch=512, mid_ch=128, out_ch=512, stride=1, downsample=None),
                  ]
        self.layer2 = nn.Sequential(*layers)

        # third Bottleneck block
        layers = [nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1, stride=2, bias=False), 
                  nn.BatchNorm2d(1024)
                  ]
        
        downsample = nn.Sequential(*layers)
        strides = [2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        paddings = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        dilations = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


        layers = [Bottleneck(in_ch=512, mid_ch=256, out_ch=1024, stride=strides[0], padding=paddings[0], dilation=dilations[0], downsample=downsample),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[1], padding=paddings[1], dilation=dilations[1], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[2], padding=paddings[2], dilation=dilations[2], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[3], padding=paddings[3], dilation=dilations[3], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[4], padding=paddings[4], dilation=dilations[4], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[5], padding=paddings[5], dilation=dilations[5], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[6], padding=paddings[6], dilation=dilations[6], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[7], padding=paddings[7], dilation=dilations[7], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[8], padding=paddings[8], dilation=dilations[8], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[9], padding=paddings[9], dilation=dilations[9], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[10], padding=paddings[10], dilation=dilations[10], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[11], padding=paddings[11], dilation=dilations[11], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[12], padding=paddings[12], dilation=dilations[12], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[13], padding=paddings[13], dilation=dilations[13], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[14], padding=paddings[14], dilation=dilations[14], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[15], padding=paddings[15], dilation=dilations[15], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[16], padding=paddings[16], dilation=dilations[16], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[17], padding=paddings[17], dilation=dilations[17], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[18], padding=paddings[18], dilation=dilations[18], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[19], padding=paddings[19], dilation=dilations[19], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[20], padding=paddings[20], dilation=dilations[20], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[21], padding=paddings[21], dilation=dilations[21], downsample=None),
                  Bottleneck(in_ch=1024, mid_ch=256, out_ch=1024, stride=strides[22], padding=paddings[22], dilation=dilations[22], downsample=None),
                  ]

        self.layer3 = nn.Sequential(*layers)

        # fourth Bottleneck block
        layers = [nn.Conv2d(in_channels=1024, out_channels=2048, kernel_size=1, stride=2, bias=False), 
                  nn.BatchNorm2d(2048)
                  ]

        downsample = nn.Sequential(*layers)

        strides = [2, 1, 1]
        paddings = [1, 1, 1]
        dilations = [1, 1, 1]

        layers = [Bottleneck(in_ch=1024, mid_ch=512, out_ch=2048, stride=strides[0], padding=paddings[0], dilation=dilations[0], downsample=downsample),
                  Bottleneck(in_ch=2048, mid_ch=512, out_ch=2048, stride=strides[1], padding=paddings[1], dilation=dilations[1], downsample=None),
                  Bottleneck(in_ch=2048, mid_ch=512, out_ch=2048, stride=strides[2], padding=paddings[2], dilation=dilations[2], downsample=None),
                  ]

        self.layer4 = nn.Sequential(*layers)
        if output_stride !=32:
            self.make_dilated(output_stride=output_stride)

    def forward(self, x):
        layers = OrderedDict()
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        layers["layer0"]=x
        x = self.maxpool(x)
        x = self.layer1(x)
        layers["layer1"]=x
        x = self.layer2(x)
        layers["layer2"]=x
        x = self.layer3(x)
        layers["layer3"]=x
        x = self.layer4(x)
        layers["layer4"]=x

        return layers

    def get_stages(self):
        return [
            nn.Identity(),
            nn.Sequential(self.conv1, self.bn1, self.relu),
            nn.Sequential(self.maxpool, self.layer1),
            self.layer2,
            self.layer3,
            self.layer4,
        ]
    
    def make_dilated(self, output_stride):
        print(f"making dilated model")
        if output_stride == 16:
            stage_list=[5,]
            dilation_list=[2,]
            
        elif output_stride == 8:
            stage_list=[4, 5]
            dilation_list=[2, 4] 

        else:
            raise ValueError("Output stride should be 16 or 8, got {}.".format(output_stride))
        
        stages = self.get_stages()
        for stage_indx, dilation_rate in zip(stage_list, dilation_list):
            replace_strides_with_dilation(
                module=stages[stage_indx],
                dilation_rate=dilation_rate,
            )

In [10]:
def load_resnet(encoder_name, num_classes, pretrained, replace_stride_with_dilation, progress=True):
    if replace_stride_with_dilation:
        print(f"replacing stride with dilation")
        output_stride = 8
    else:
        output_stride = 32
    if encoder_name == "resnet18":
        model = ResNet18(num_classes=num_classes, output_stride=output_stride)
        for param in model.parameters():
            param.requires_grad = True
    elif encoder_name == "resnet34":
        model = ResNet34(num_classes=num_classes, output_stride=output_stride)
        for param in model.parameters():
            param.requires_grad = True
    elif encoder_name == "resnet50":
        model = ResNet50(num_classes=num_classes, output_stride=output_stride)
        for param in model.parameters():
            param.requires_grad = True
    elif encoder_name == "resnet101":
        model = ResNet101(num_classes=num_classes, output_stride=output_stride)
        for param in model.parameters():
            param.requires_grad = True
    elif encoder_name == "resnet152":
        raise NotImplementedError(f"{encoder_name} is not implemented.")
    else:
        raise NotImplementedError(f"{encoder_name} is not implemented.")

    if pretrained:
        print("LOADING PRETRAINED MODEL WEIGHTS FROM IMAGENET")
        # get the state dict from URL
        state_dict = load_state_dict_from_url(model_urls[encoder_name],
                                              progress=progress)
        # we need to remove the keys for the fully connected layer, as we only need the feature extractor
        entries_to_remove = ('fc.weight', 'fc.bias')
        for k in entries_to_remove:
            state_dict.pop(k, None)
        # actually loading the weights to the model    
        model.load_state_dict(state_dict) 
    else:
        print("TRAINING WITH RANDOM INITIALIZED WEIGHTS")
    return model

In [11]:
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [12]:
def get_patch_lists(data_path, subset):
    path = Path(f"{data_path}/{subset}/patches")
    imgPaths = list(path.glob('./img/*.png'))
    img_list = sorted(imgPaths)
    annPaths = list(path.glob('./msk/*.png'))
    msk_list = sorted(annPaths)
    return img_list, msk_list 

In [13]:
def set_study(db_name, study_name,root_path, seed, b_clean_study=False):
    '''
    Creates a new study in a sqlite database located in ./results/
    '''
    sampler = optuna.samplers.TPESampler(seed=seed)
    storage = optuna.storages.RDBStorage(f"sqlite:///{root_path}/results/{db_name}.db", heartbeat_interval=1)
    if b_clean_study:
        print(f"CAUTION: Deleting existing trials in study {study_name}")
        optuna.delete_study(study_name=study_name, storage=f"sqlite:///{root_path}/results/{db_name}.db")
        
    study = optuna.create_study(storage=storage, study_name=study_name, sampler=sampler, direction="minimize", load_if_exists=True)
    return study

def seed_all(seed):
    '''
    sets the initial seed for numpy and pytorch to get reproducible results. 
    One still need to restart the kernel to get reproducible results, as discussed in:
    https://stackoverflow.com/questions/32172054/how-can-i-retrieve-the-current-seed-of-numpys-random-number-generator
    '''
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

def get_loaders(train_img_dir, train_msk_dir, valid_img_dir ,valid_msk_dir, mean, std, batch_size, num_workers=4, pin_memory=True):
    train_transform = A.Compose(
        [    
            A.HorizontalFlip(),
            A.VerticalFlip(),
            A.CLAHE(),
            A.RandomRotate90(),
            A.Transpose(),
            A.Normalize(
                mean = mean,
                std = std,
                max_pixel_value=255.0
            ),
            ToTensorV2(),
        ]
    )
    valid_transform = A.Compose(
        [
            A.Normalize(
                mean = mean,
                std = std,
                max_pixel_value=255.0
            ),
            ToTensorV2(),
        ]
    )
    train_ds = UAVDatasetPatches(img_list=train_img_dir, msk_list=train_msk_dir, transform=train_transform)
    train_loader = DataLoader(train_ds, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, shuffle=True)
    valid_ds = UAVDatasetPatches(img_list=valid_img_dir, msk_list=valid_msk_dir, transform=valid_transform)
    valid_loader = DataLoader(valid_ds, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, shuffle=False)

    return train_loader, valid_loader

In [14]:
def set_model(architecture, encoder_name, pretrained, b_bilinear, replace_stride_with_dilation, num_classes=3):
    model_name = f"{architecture}_{encoder_name}"
    print(f"MODEL NAME: {model_name}")
    if architecture == "fcn32s":
        if replace_stride_with_dilation:
            model=load_fcn_resnet(encoder_name, 
            num_classes=num_classes, 
            pretrained = pretrained, 
            replace_stride_with_dilation=replace_stride_with_dilation, 
            n_upsample=8, 
            b_bilinear=b_bilinear
            )
        else:
            model=load_fcn_resnet(encoder_name, 
            num_classes=num_classes, 
            pretrained = pretrained, 
            replace_stride_with_dilation=replace_stride_with_dilation, 
            n_upsample=32, 
            b_bilinear=b_bilinear
            )

    elif architecture == "fcn16s":
        model=load_fcn_resnet(encoder_name, 
        num_classes=num_classes, 
        pretrained = pretrained, 
        replace_stride_with_dilation=replace_stride_with_dilation, 
        n_upsample=16, 
        b_bilinear=b_bilinear
        )
    elif architecture == "fcn8s":
        model=load_fcn_resnet(encoder_name, 
        num_classes=num_classes, 
        pretrained = pretrained, 
        replace_stride_with_dilation=replace_stride_with_dilation, 
        n_upsample=8, 
        b_bilinear=b_bilinear
        )
    else:
        raise NotImplementedError("Specified Model is not defined. Currently implemented architectures are: fcn. Currently implemented feature extractors: resnet50, resnet101")
    return model

def save_checkpoint(state, filename="my_ckpt.pth.tar"):
    torch.save(state, filename)
    return

def train_epoch(loader, model, optimizer, loss_fn, scaler, trial_number=None, fold=None, cur_epoch=None):
    with tqdm(loader, unit="batch", leave=True) as tepoch:
        losses = []
        if fold is not None and trial_number is not None:
            tepoch.set_description(f"Training T{trial_number} F{fold} E{cur_epoch}")
        else:
            tepoch.set_description(f"Retraining E{cur_epoch}")
        for data, targets in tepoch:
            data = data.float().to(device=DEVICE )
            targets = targets.long().to(device=DEVICE)
            # forward 
            optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                with torch.set_grad_enabled(True):
                    predictions = model(data)
                    loss = loss_fn(predictions, targets)
                # backward
                
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
                # update loop
                tepoch.set_postfix(train_loss=loss.item())
                losses.append(loss.item())
            tepoch.set_postfix(train_losses=np.array(losses).mean())
    return loss.item()

def validate_epoch(loader, model, cur_epoch, fold=None, trial_number=None):
    dice_loss = 0
    predictions_whole = None 
    targets_whole = None 
    model.eval()
    with torch.no_grad():
        with tqdm(loader, unit="batch", leave=False) as tepoch:
            if fold is not None and trial_number is not None:
                tepoch.set_description(f"Validating T{trial_number} F{fold} E{cur_epoch}")
            else:
                tepoch.set_description(f"Validating E{cur_epoch}")
            for idx, (inputs, targets) in enumerate(tepoch):
                inputs = inputs.float().to(device=DEVICE)
                targets = targets.long().to(device=DEVICE)
                predictions = model(inputs)
                if predictions_whole is None:
                    predictions_whole = predictions
                else:
                    predictions_whole = torch.cat((predictions_whole, predictions), dim=0)
                if targets_whole is None:
                    targets_whole = targets
                else:
                    targets_whole = torch.cat((targets_whole, targets), dim=0)
                
                
            dice_loss = kornia.losses.dice_loss(predictions_whole, targets_whole).item()
    logging.info(f"Validating T{trial_number} F{fold} E{cur_epoch}: valid loss {dice_loss}")
    model.train()
    return dice_loss

In [15]:
def objective(trial):
    epochs_no_improve:int = 0
    kfold = KFold(n_splits=num_folds, shuffle=False)
    loss_total = np.ones(num_folds)*99999
    epochs = np.ones(num_folds)*0
    img_list, msk_list = get_patch_lists(data_path=data_path,subset="trainval")
    for fold, (train_ids, val_ids) in enumerate(kfold.split(img_list)):
        train_img_dir = [img_list[i] for i in train_ids]
        train_msk_dir = [msk_list[i] for i in train_ids]
        valid_img_dir = [img_list[i] for i in val_ids]
        valid_msk_dir = [msk_list[i] for i in val_ids]
        epochs_no_improve = 0

        model = set_model(architecture=architecture, encoder_name=encoder_name, pretrained=pretrained, b_bilinear=b_bilinear, replace_stride_with_dilation=replace_stride_with_dilation, num_classes=3).to(device=device)
        
        loss_fn = kornia.losses.DiceLoss()
        lr = trial.suggest_loguniform("lr", lr_ranges[0], lr_ranges[1])
        print(f"suggested LR: {lr}")
        reduce_factor = trial.suggest_int("lr_factor", int(lr_factor_ranges[0]*10), int(lr_factor_ranges[1]*10), step=int(lr_factor_ranges[2]*10))
        reduce_factor = reduce_factor*0.1
        optimizer = optim.Adam(model.parameters(), lr = lr)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=reduce_factor, min_lr=lr_ranges[0], patience=lr_scheduler_patience)
        means, stds = get_calculated_means_stds_per_fold(fold)
        train_loader, valid_loader = get_loaders(
            train_img_dir = train_img_dir,
            train_msk_dir = train_msk_dir,
            valid_img_dir = valid_img_dir, 
            valid_msk_dir = valid_msk_dir,
            mean = means,
            std = stds,
            batch_size = batch_size,
            num_workers = num_workers,
            pin_memory = False,
        )
        scaler = torch.cuda.amp.GradScaler()
        for epoch in range(max_epochs):
            train_loss = train_epoch(
                train_loader, 
                model, 
                optimizer, 
                loss_fn, 
                scaler, 
                cur_epoch=epoch,
                trial_number=trial.number,
                fold=fold,
                )
            checkpoint = {
                "state_dict": model.state_dict(),
                "optimizer":optimizer.state_dict(),
            }
            
            valid_loss = validate_epoch(
                valid_loader, 
                model, 
                cur_epoch=epoch, 
                trial_number=trial.number,
                fold=fold,
                )
            scheduler.step(valid_loss)
            
            if valid_loss < loss_total[fold]:
                loss_total[fold] = valid_loss
                if b_save_checkpoint:
                    save_checkpoint(checkpoint, filename=f"{str(model_path)}/{architecture}_{encoder_name}_dil{int(replace_stride_with_dilation)}_bilin{int(b_bilinear)}_pre{int(pretrained)}.pth.tar")
            else:
                epochs_no_improve+=1
            # sometimes it can happen, that valid_loss is nan --> cannot save nan to database, so we need to change it
            if math.isnan(valid_loss):
                valid_loss = 99999
            
            if epochs_no_improve >= es_patience:
                print(f"Early Stopping on epoch {epoch}")
                epochs[fold]=epoch
                break

    trial.set_user_attr('Valid loss per fold', list(loss_total))
    trial.set_user_attr('root path', root_path)
    trial.set_user_attr('architecture', architecture)
    trial.set_user_attr('encoder_name', encoder_name)
    trial.set_user_attr('batch_size', batch_size)
    trial.set_user_attr('b_bilinear', b_bilinear)
    trial.set_user_attr('pretrained', pretrained)
    trial.set_user_attr('replace_stride', replace_stride_with_dilation)
    trial.set_user_attr('final_epoch', list(epochs))
    trial.set_user_attr('lr_scheduler_patience', lr_scheduler_patience)
    print(f"Validation loss per fold: {loss_total}")  
    return np.mean(loss_total)

In [None]:
run_prefix:str = True
b_save_checkpoint:bool = True
pretrained:bool = True
b_bilinear:bool = True
replace_stride_with_dilation:bool = False
encoder_name:str = "resnet101"
architecture:str = "fcn32s"
lr_ranges = [0.1, 0.9, 0.1]
root_path: str = "/kaggle/working/"
data_path = Path("/kaggle/input/crop-dataset")
num_folds:int = 4
batch_size:int = 100
n_trials:int = 50
db_name= ""
study_name = ""
if db_name == "":
        db_name:str = f"{run_prefix}_{architecture}_{encoder_name}_dil{int(replace_stride_with_dilation)}_bilin{int(b_bilinear)}_pre{int(pretrained)}"
if study_name == "":
    study_name:str = f"{architecture}_{encoder_name}_dil{int(replace_stride_with_dilation)}_bilin{int(b_bilinear)}_pre{int(pretrained)}"
lr_factor_ranges = [0.1, 0.9, 0.1]
max_epochs:int = 100
es_patience:int = 10
lr_scheduler_patience:int = 5
seed:int = 42

device: str = "cuda" if torch.cuda.is_available() else "cpu"
num_workers: int = 2 if torch.cuda.is_available() else 0

seed_all(seed=seed)

# Create Paths
model_path = Path(f'{root_path}/models/')
model_path.mkdir(parents=True, exist_ok=True)
result_path = Path(f'{root_path}/results/')
result_path.mkdir(parents=True, exist_ok=True)

study = set_study(db_name=db_name, study_name=study_name, root_path=root_path, seed=seed)

study.optimize(lambda trial: objective(trial), n_trials=n_trials)

In [22]:
def get_calculated_means_stds_trainval():
    means = [0.48810686542128406, 0.4733653049842984, 0.4242799605915251]
    stds = [0.1321881434144248, 0.12971921686190743, 0.12131885037092494]
    return means, stds

In [24]:
seed_all(seed=42)
architecture:str = "fcn32s"
encoder_name:str = "resnet101"
db_name = ""
root_path: str = Path("/kaggle/working/")
if db_name =="":
    db_name:str = f"retrain_{architecture}_{encoder_name}"
else:
    db_name = db_name
print(f"loaded db {db_name}")
    # Parameters
max_epochs = 100
es_patience = 5
loss_total = 1
epochs_no_improve = 0
# NEED TO CHANGE THIS LINE OF CODE TO RE-TRAIN DIFFERENT MODELS
study_storage = f"sqlite:////kaggle/input/fcn-study-db-params/True_fcn32s_resnet101_dil0_bilin1_pre1.db"
studies = optuna.study.get_all_study_summaries(storage=study_storage)
loaded_study = optuna.load_study(study_name=studies[0].study_name, storage=study_storage)
trial = loaded_study.best_trial
device: str = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Loading Study: {studies[0].study_name} from {db_name}")
print(f"Best Trial:{trial.number}")
print(trial)
    
model_path = Path(f'{root_path}/models/')
model_path.mkdir(parents=True, exist_ok=True)
result_path = Path(f'{root_path}/results/')
result_path.mkdir(parents=True, exist_ok=True)

# extract hyperparameters, feature extractor and architecture from best trial
lr = trial.params["lr"]
lr_factor = trial.params["lr_factor"]
batch_size=trial.user_attrs["batch_size"]
lr_scheduler_patience = trial.user_attrs["lr_scheduler_patience"]
architecture = trial.user_attrs["architecture"]
encoder_name = trial.user_attrs["encoder_name"]
pretrained = trial.user_attrs["pretrained"]
b_bilinear = trial.user_attrs["b_bilinear"]
replace_stride_with_dilation = trial.user_attrs["replace_stride"]
data_path = Path("/kaggle/input/crop-dataset") 

train_img_dir, train_msk_dir = get_patch_lists(
data_path=data_path, 
subset="trainval")

valid_img_dir, valid_msk_dir = get_patch_lists(
data_path=data_path, 
subset="test")


model_save_str = f"model_{architecture}_{encoder_name}_dil{int(replace_stride_with_dilation)}_bilin{int(b_bilinear)}_retrained.pt"
model_save_path = Path(root_path) / "models" / model_save_str
model = set_model(architecture=architecture, encoder_name=encoder_name, pretrained=pretrained, b_bilinear=b_bilinear, replace_stride_with_dilation=replace_stride_with_dilation, num_classes=3).to(device=device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = lr)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=lr_factor*0.1, min_lr=1e-6, patience=lr_scheduler_patience)
means, stds = get_calculated_means_stds_trainval()       

train_loader, _ = get_loaders(
        train_img_dir = train_img_dir,
        train_msk_dir = train_msk_dir,
        valid_img_dir = valid_img_dir, 
        valid_msk_dir = valid_msk_dir,
        mean = means,
        std = stds,
        batch_size = batch_size,
        num_workers = 4,
        pin_memory = True,
    )
scaler = torch.cuda.amp.GradScaler()
for epoch in range(max_epochs):
    train_loss = train_epoch(
        train_loader, 
        model, 
        optimizer, 
        loss_fn, 
        scaler, 
        cur_epoch=epoch
        )
    checkpoint = {
        "state_dict": model.state_dict(),
    }
    scheduler.step(train_loss)
    if train_loss < loss_total:
        loss_total = train_loss
        print(f"Saving checkpoint in epoch {epoch}...")
        save_checkpoint(checkpoint, filename=f"{str(model_save_path)}")
    else:
        epochs_no_improve+=1
        # sometimes it can happen, that test_loss is nan --> cannot save nan to database, so we need to change it
    if math.isnan(train_loss):
        train_loss = 99999
    if epochs_no_improve >= es_patience:
        print(f"Early Stopping on epoch {epoch}")
        break
    print(f"Loss on Train set: {train_loss}")
print(train_loss)

loaded db retrain_fcn32s_resnet101
Loading Study: fcn32s_resnet101_dil0_bilin1_pre1 from retrain_fcn32s_resnet101
Best Trial:38
FrozenTrial(number=38, state=TrialState.COMPLETE, values=[74999.26223023795], datetime_start=datetime.datetime(2024, 4, 17, 4, 13, 57, 260461), datetime_complete=datetime.datetime(2024, 4, 17, 4, 27, 43, 861928), params={'lr': 0.10016018426773525, 'lr_factor': 7}, user_attrs={'Valid loss per fold': [99999.0, 99999.0, 0.048920951783657074, 99999.0], 'architecture': 'fcn32s', 'b_bilinear': True, 'batch_size': 100, 'encoder_name': 'resnet101', 'final_epoch': [9.0, 9.0, 10.0, 9.0], 'lr_scheduler_patience': 5, 'pretrained': True, 'replace_stride': False, 'root path': '/kaggle/working/'}, system_attrs={}, intermediate_values={}, distributions={'lr': FloatDistribution(high=0.9, log=True, low=0.1, step=None), 'lr_factor': IntDistribution(high=9, log=False, low=1, step=1)}, trial_id=39, value=None)
MODEL NAME: fcn32s_resnet101
LOADING PRETRAINED MODEL WEIGHTS FROM IMAG

Retraining E0: 100%|██████████| 22/22 [00:20<00:00,  1.10batch/s, train_losses=nan] 


Loss on Train set: 99999


Retraining E1: 100%|██████████| 22/22 [00:19<00:00,  1.11batch/s, train_losses=nan]


Loss on Train set: 99999


Retraining E2: 100%|██████████| 22/22 [00:19<00:00,  1.11batch/s, train_losses=nan]


Loss on Train set: 99999


Retraining E3: 100%|██████████| 22/22 [00:19<00:00,  1.12batch/s, train_losses=nan]


Loss on Train set: 99999


Retraining E4: 100%|██████████| 22/22 [00:19<00:00,  1.12batch/s, train_losses=nan]

Early Stopping on epoch 4
99999



