In [9]:
from google.colab import drive 
drive.mount('/content/myfiles')

Drive already mounted at /content/myfiles; to attempt to forcibly remount, call drive.mount("/content/myfiles", force_remount=True).


In [22]:
%cd "/content/myfiles/MyDrive/Politechnika/GSN/gsn-2022z-hand-segmentation"
!pwd

/content/myfiles/MyDrive/Politechnika/GSN/gsn-2022z-hand-segmentation
/content/myfiles/MyDrive/Politechnika/GSN/gsn-2022z-hand-segmentation


In [20]:
!git fetch
#!git pull

In [23]:
!git add skinny_no_i_d.ipynb

In [24]:
!git commit -m "Model implementation for basic version of Skinny architecture"

[main 1feaa1c] Model implementation for basic version of Skinny architecture
 1 file changed, 1 insertion(+)
 create mode 100644 skinny_no_i_d.ipynb


In [25]:
!git push

Counting objects: 3, done.
Delta compression using up to 2 threads.
Compressing objects:  33% (1/3)   Compressing objects:  66% (2/3)   Compressing objects: 100% (3/3)   Compressing objects: 100% (3/3), done.
Writing objects:  33% (1/3)   Writing objects:  66% (2/3)   Writing objects: 100% (3/3)   Writing objects: 100% (3/3), 2.69 KiB | 917.00 KiB/s, done.
Total 3 (delta 1), reused 0 (delta 0)
To https://gitlab-stud.elka.pw.edu.pl/mmarcin5/gsn-2022z-hand-segmentation.git
   ff11f12..1feaa1c  main -> main


In [3]:
from torch.utils.data import Dataset
import pandas as pd
from torchvision.io import read_image
from PIL import Image
import torchvision.transforms as transforms
import torch

# dataloader
class SkinDataset(Dataset):
    def __init__(self, description_file, out_size, transform_image=None, 
                 target_transform=None):
        self.images_file = pd.read_csv(description_file)
        self.transform_image = transform_image
        self.target_transform = target_transform
        self.size = out_size
        self.resize = transforms.Resize(self.size)

    def __len__(self):
        return len(self.images_file)

    def __getitem__(self, idx):
        img_path = self.images_file.iloc[idx, 0]
        image = read_image(img_path)
        w = image.size()[1]
        h = image.size()[2]

        if(w > h):
          diff = w - h
          pad = (0, diff, 0, 0)
        else:
          diff = h - w
          pad = (0, 0, 0, diff)
        if self.transform_image:
            image = self.transform_image(image.to(torch.float64))
        image = torch.nn.functional.pad(image, pad, mode='constant', value=1)
        image = self.resize(image)

        label_path = self.images_file.iloc[idx, 1]
        label = Image.open(label_path)
        if self.target_transform:
            label = self.target_transform(label)
        label = torch.nn.functional.pad(label, pad, mode='constant', value=1)
        label = self.resize(label)
        label_one_hot = torch.nn.functional.one_hot(label.to(torch.int64), 2).transpose(1, 3).squeeze().permute(0, 2, 1)


        return image, label_one_hot


In [4]:
mean = torch.tensor([48.1431, 47.4175, 42.4370], dtype=torch.float64)
std = torch.tensor([45.2122, 40.2552, 37.7503], dtype=torch.float64)

transform_label = transforms.Compose([transforms.ToTensor()])
transform_image = transforms.Compose([transforms.Normalize(mean, std)])

data_1 = SkinDataset("data_type1.csv", 512, transform_image, transform_label)
data_2a = SkinDataset("data_type2a.csv", 512, transform_image, transform_label)
data_2b = SkinDataset("data_type2b.csv", 512, transform_image, transform_label)
data_all = torch.utils.data.ConcatDataset([data_1, data_2a, data_2b])

In [28]:
import torch.nn as nn
import torch.nn.functional as F

# first part - double convolution
class InitConv(nn.Module):
  def __init__(self, in_channels, out_channels):
    super().__init__()

    self.sequential_stack = nn.Sequential(
        nn.Conv2d(in_channels=in_channels, out_channels=out_channels, 
                  kernel_size=3, bias=False),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=out_channels, out_channels=out_channels, 
                  kernel_size=3, bias=False),
        nn.BatchNorm2d(out_channels)
    )
  
  def forward(self, x):
    return self.sequential_stack(x)

In [29]:
# downscaling - double convolution with max pooling
class ConvDown(nn.Module):
  def __init__(self, in_channels, out_channels):
    super().__init__()

    self.sequential_stack = nn.Sequential(
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel=2),
        nn.Conv2d(in_channels=in_channels, out_channels=out_channels, 
                  kernel_size=3, bias=False),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=out_channels, out_channels=out_channels, 
                  kernel_size=3, bias=False),
        nn.BatchNorm2d(out_channels)
    )
  
  def forward(self, x):
    return self.sequential_stack(x)

In [30]:
# upscaling - double convolution with convolution transpose and concatenation
class DeconvUp(nn.Module):
  def __init__(self, in_channels, out_channels):
    super().__init__()

    mid_channels = out_channels // 2

    self.sequential_stack = nn.Sequential(
        nn.Conv2d(in_channels=in_channels, out_channels=out_channels, 
                  kernel_size=3, bias=False),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=out_channels, out_channels=out_channels, 
                  kernel_size=3, bias=False),
        nn.BatchNorm2d(out_channels),
        nn.ConvTranspose2d(in_channels=out_channels, out_channels=mid_channels, 
                           stride=2),
        nn.Conv2d(in_channels=mid_channels, out_channels=mid_channels, 
                  kernel_size=3, bias=False),
        nn.BatchNorm2d(mid_channels),
        nn.ReLU(inplace=True)
    )

  def forward(self, x1, x2):
    x1 = self.sequential_stack(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]

    x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2])
    return torch.cat([x2, x1], dim=1)

In [31]:
# final module with several convolution layers finished with sigmoid
class FinalConv(nn.Module):
  def __init__(self, in_channels, out_channels):
    super(FinalConv, self).__init__()

    mid_channels = in_channels // 2

    self.sequential_stack = nn.Sequential(
        nn.Conv2d(in_channels=in_channels, out_channels=mid_channels, 
                  kernel_size=3, bias=False),
        nn.BatchNorm2d(mid_channels),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=mid_channels, out_channels=mid_channels, 
                  kernel_size=3, bias=False),
        nn.BatchNorm2d(mid_channels),
        nn.Conv2d(in_channels=mid_channels, out_channels=mid_channels, 
                  kernel_size=3, bias=False),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=mid_channels, out_channels=mid_channels // 2, 
                  kernel_size=3, bias=False),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=mid_channels // 2, out_channels=out_channels, 
                  kernel_size=1, bias=False),
        nn.Sigmoid()
    )
  
  def forward(self, x):
    return self.sequential_stack(x)

In [32]:
# Skinny architecture variant without inception modules and dense blocks
class Skinny(nn.Module):
  def __init__(self, n_channels, n_classes):
    super(Skinny, self).__init__()

    self.n_channels = n_channels
    self.n_classes = n_classes

    self.init = (InitConv(self.n_channels, 15))

    self.conv_down1 = (ConvDown(15, 30))
    self.conv_down2 = (ConvDown(30, 60))
    self.conv_down3 = (ConvDown(60, 120))
    self.conv_down4 = (ConvDown(120, 240))

    self.deconv_up1 = (DeconvUp(240, 480))
    self.deconv_up2 = (DeconvUp(480, 240))
    self.deconv_up3 = (DeconvUp(240, 120))
    self.deconv_up4 = (DeconvUp(120, 60))
    self.deconv_up5 = (DeconvUp(60, 30))

    self.final = (FinalConv(30, n_classes))
  
  def forward(self, x):
    x1 = self.init(x)
    x2 = self.conv_down1(x1)
    x3 = self.conv_down2(x2)
    x4 = self.conv_down3(x3)
    x5 = self.conv_down4(x4)
    x6 = F.relu(x5, True)
    x7 = F.max_pool2d(x6, 2)

    x8 = self.deconv_up1(x7, x5)
    x9 = self.deconv_up2(x8, x4)
    x10 = self.deconv_up3(x9, x3)
    x11 = self.deconv_up4(x10, x2)
    x12 = self.deconv_up5(x11, x1)
    output = self.final(x12)

    return output