# This kernel is U-Net Baseline written by PyTorch
In this kernel, there are many places that are simplified now.  
So, you should fix these bad points.  

[U-Net web site](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/)  
[U-Net paper](https://arxiv.org/abs/1505.04597)  

I reference [this blog post](https://lp-tech.net/articles/hzfn7?page=2  ) in U-Net installation.  
Thank you awesome this blog post.  

This is [my EDA](https://www.kaggle.com/go1dfish/fgvc6-simple-eda).  
If you don't know this competition rule and data, this EDA might help you.  

# Import modules

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
print(os.listdir("../input"))
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm_notebook as tqdm

import torch
import torch.nn as nn
from torch import optim
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.autograd import Function, Variable
from pathlib import Path
from itertools import groupby

['label_descriptions.json', 'train', 'test', 'train.csv', 'sample_submission.csv']


In [2]:
input_dir = "../input/"
train_img_dir = "../input/train/"
test_img_dir = "../input/test/"

WIDTH = 512
HEIGHT = 512
category_num = 46 + 1

ratio = 8

epoch_num = 8
batch_size = 4

device = "cuda:0"

In [3]:
len(os.listdir("../input/train/"))

45625

In [4]:
len(os.listdir("../input/test/"))

3200

In [5]:
train_df = pd.read_csv(input_dir + "train.csv")
train_df.head()

Unnamed: 0,ImageId,EncodedPixels,Height,Width,ClassId
0,00000663ed1ff0c4e0132b9b9ac53f6e.jpg,6068157 7 6073371 20 6078584 34 6083797 48 608...,5214,3676,6
1,00000663ed1ff0c4e0132b9b9ac53f6e.jpg,6323163 11 6328356 32 6333549 53 6338742 75 63...,5214,3676,0
2,00000663ed1ff0c4e0132b9b9ac53f6e.jpg,8521389 10 8526585 30 8531789 42 8537002 46 85...,5214,3676,28
3,00000663ed1ff0c4e0132b9b9ac53f6e.jpg,12903854 2 12909064 7 12914275 10 12919485 15 ...,5214,3676,31
4,00000663ed1ff0c4e0132b9b9ac53f6e.jpg,10837337 5 10842542 14 10847746 24 10852951 33...,5214,3676,32


In [6]:
train_df.shape

(333415, 5)

# Define utils
For simplicity, It focus only category

In [7]:
def make_onehot_vec(x):
    vec = np.zeros(category_num)
    vec[x] = 1
    return vec

In [8]:
def make_mask_img(segment_df):
    seg_width = segment_df.at[0, "Width"]
    seg_height = segment_df.at[0, "Height"]
    seg_img = np.full(seg_width*seg_height, category_num-1, dtype=np.int32)
    for encoded_pixels, class_id in zip(segment_df["EncodedPixels"].values, segment_df["ClassId"].values):
        pixel_list = list(map(int, encoded_pixels.split(" ")))
        for i in range(0, len(pixel_list), 2):
            start_index = pixel_list[i] - 1
            index_len = pixel_list[i+1] - 1
            seg_img[start_index:start_index+index_len] = int(class_id.split("_")[0])
    seg_img = seg_img.reshape((seg_height, seg_width), order='F')
    seg_img = cv2.resize(seg_img, (WIDTH, HEIGHT), interpolation=cv2.INTER_NEAREST)
    """
    seg_img_onehot = np.zeros((HEIGHT, WIDTH, category_num), dtype=np.int32)
    #seg_img_onehot = np.zeros((seg_height//ratio, seg_width//ratio, category_num), dtype=np.int32)
    # OPTIMIZE: slow
    for ind in range(HEIGHT):
        for col in range(WIDTH):
            seg_img_onehot[ind, col] = make_onehot_vec(seg_img[ind, col])
    """
    return seg_img

In [9]:
def train_generator(df, batch_size):
    img_ind_num = df.groupby("ImageId")["ClassId"].count()
    index = df.index.values[0]
    trn_images = []
    seg_images = []
    for i, (img_name, ind_num) in enumerate(img_ind_num.items()):
        img = cv2.imread(train_img_dir + img_name)
        img = cv2.resize(img, (WIDTH, HEIGHT), interpolation=cv2.INTER_AREA)
        segment_df = (df.loc[index:index+ind_num-1, :]).reset_index(drop=True)
        index += ind_num
        if segment_df["ImageId"].nunique() != 1:
            raise Exception("Index Range Error")
        seg_img = make_mask_img(segment_df)
        
        # HWC -> CHW
        img = img.transpose((2, 0, 1))
        #seg_img = seg_img.transpose((2, 0, 1))
        
        trn_images.append(img)
        seg_images.append(seg_img)
        if((i+1) % batch_size == 0):
            yield np.array(trn_images, dtype=np.float32) / 255, np.array(seg_images, dtype=np.int32)
            trn_images = []
            seg_images = []
    if(len(trn_images) != 0):
        yield np.array(trn_images, dtype=np.float32) / 255, np.array(seg_images, dtype=np.int32)

In [10]:
def test_generator(df):
    img_names = df["ImageId"].values
    for img_name in img_names:
        img = cv2.imread(test_img_dir + img_name)
        img = cv2.resize(img, (WIDTH, HEIGHT), interpolation=cv2.INTER_AREA)
        # HWC -> CHW
        img = img.transpose((2, 0, 1))
        yield img_name, np.asarray([img], dtype=np.float32) / 255

In [11]:
def encode(input_string):
    return [(len(list(g)), k) for k,g in groupby(input_string)]

def run_length(label_vec):
    encode_list = encode(label_vec)
    index = 1
    class_dict = {}
    for i in encode_list:
        if i[1] != category_num-1:
            if i[1] not in class_dict.keys():
                class_dict[i[1]] = []
            class_dict[i[1]] = class_dict[i[1]] + [index, i[0]]
        index += i[0]
    return class_dict

# Define Network

In [12]:
class double_conv(nn.Module):
    '''(conv => BN => ReLU) * 2'''
    def __init__(self, in_ch, out_ch):
        super(double_conv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        x = self.conv(x)
        return x


class inconv(nn.Module):
    def __init__(self, in_ch, out_ch):
        super(inconv, self).__init__()
        self.conv = double_conv(in_ch, out_ch)

    def forward(self, x):
        x = self.conv(x)
        return x


class down(nn.Module):
    def __init__(self, in_ch, out_ch):
        super(down, self).__init__()
        self.mpconv = nn.Sequential(
            nn.MaxPool2d(2),
            double_conv(in_ch, out_ch)
        )

    def forward(self, x):
        x = self.mpconv(x)
        return x


class up(nn.Module):
    def __init__(self, in_ch, out_ch, bilinear=True):
        super(up, self).__init__()

        #  would be a nice idea if the upsampling could be learned too,
        #  but my machine do not have enough memory to handle all those weights
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        else:
            self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)

        self.conv = double_conv(in_ch, out_ch)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        diffX = x1.size()[2] - x2.size()[2]
        diffY = x1.size()[3] - x2.size()[3]
        x2 = F.pad(x2, (diffX // 2, int(diffX / 2),
                        diffY // 2, int(diffY / 2)))
        x = torch.cat([x2, x1], dim=1)
        x = self.conv(x)
        return x


class outconv(nn.Module):
    def __init__(self, in_ch, out_ch):
        super(outconv, self).__init__()
        self.conv = nn.Conv2d(in_ch, out_ch, 1)

    def forward(self, x):
        x = self.conv(x)
        return x

    
class UNet(nn.Module):
    def __init__(self, n_channels, n_classes):
        super(UNet, self).__init__()
        self.inc = inconv(n_channels, 64)
        self.down1 = down(64, 128)
        self.down2 = down(128, 256)
        self.down3 = down(256, 512)
        self.down4 = down(512, 512)
        self.up1 = up(1024, 256)
        self.up2 = up(512, 128)
        self.up3 = up(256, 64)
        self.up4 = up(128, 64)
        self.outc = outconv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        x = self.outc(x)
        return x

# Training

In [13]:
train_df.shape

(333415, 5)

In [14]:
333415 // 4

83353

In [15]:
train_df.iloc[83348:83354, :]

Unnamed: 0,ImageId,EncodedPixels,Height,Width,ClassId
83348,3fe668b00f4f6efa967866bdda12a977.jpg,390331 1 391529 2 392727 4 393925 6 395122 8 3...,1200,800,33
83349,3fe668b00f4f6efa967866bdda12a977.jpg,424846 1 426046 2 427245 4 428445 5 429645 6 4...,1200,800,15
83350,3fe668b00f4f6efa967866bdda12a977.jpg,316318 10 317509 21 318708 23 319907 26 321106...,1200,800,24
83351,3fe68ecb824fa1242718dd4688fa1363.jpg,279519 38 280507 70 281484 114 282479 129 2834...,1000,756,10
83352,3fe68ecb824fa1242718dd4688fa1363.jpg,392242 17 393243 17 394244 17 395245 17 396246...,1000,756,9
83353,3fe68ecb824fa1242718dd4688fa1363.jpg,175406 5 176399 14 177392 23 178389 28 179388 ...,1000,756,9


In [16]:
train_df.iloc[73350:73354, :]

Unnamed: 0,ImageId,EncodedPixels,Height,Width,ClassId
73350,38329c207ac07cfd3d32bf3b63078bc8.jpg,315968 18 317166 40 318363 48 319561 55 320759...,1200,801,24
73351,38329c207ac07cfd3d32bf3b63078bc8.jpg,379404 2 380603 6 381803 9 383002 12 384202 15...,1200,801,13
73352,3832c5a0483ad453057e578330327185.jpg,963 22 2162 24 3362 25 4561 27 5761 28 6960 29...,1200,800,10_9_14_20_22_52_61_90
73353,3832c5a0483ad453057e578330327185.jpg,310064 5 311257 9 312455 8 313652 8 314850 8 3...,1200,800,33


For simplicity, use about 25% data.  

In [17]:
net = UNet(n_channels=3, n_classes=category_num).to(device)

optimizer = optim.SGD(
    net.parameters(),
    lr=0.1,
    momentum=0.9,
    weight_decay=0.0005
)

criterion = nn.CrossEntropyLoss()

In [18]:
val_sta = 73352
val_end = 83351
train_loss = []
valid_loss = []
for epoch in range(epoch_num):
    epoch_trn_loss = 0
    train_len = 0
    net.train()
    for iteration, (X_trn, Y_trn) in enumerate(tqdm(train_generator(train_df.iloc[:val_sta, :], batch_size))):
        X = torch.tensor(X_trn, dtype=torch.float32).to(device)
        Y = torch.tensor(Y_trn, dtype=torch.long).to(device)
        train_len += len(X)
        
        #Y_flat = Y.view(-1)
        mask_pred = net(X)
        #mask_prob = torch.softmax(mask_pred, dim=1)
        #mask_prob_flat = mask_prob.view(-1)
        loss = criterion(mask_pred, Y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_trn_loss += loss.item()
        
        if iteration % 100 == 0:
            print("train loss in {:0>2}epoch  /{:>5}iter:    {:<10.8}".format(epoch+1, iteration, epoch_trn_loss/(iteration+1)))
        
    train_loss.append(epoch_trn_loss/(iteration+1))
    print("train {}epoch loss({}iteration):    {:10.8}".format(epoch+1, iteration, train_loss[-1]))
    
    epoch_val_loss = 0
    val_len = 0
    net.eval()
    for iteration, (X_val, Y_val) in enumerate(tqdm(train_generator(train_df.iloc[val_sta:val_end, :], batch_size))):
        X = torch.tensor(X_val, dtype=torch.float32).to(device)
        Y = torch.tensor(Y_val, dtype=torch.long).to(device)
        val_len += len(X)
        
        #Y_flat = Y.view(-1)
        
        mask_pred = net(X)
        #mask_prob = torch.softmax(mask_pred, dim=1)
        #mask_prob_flat = mask_prob.view(-1)
        loss = criterion(mask_pred, Y)
        epoch_val_loss += loss.item()
        
        if iteration % 100 == 0:
            print("valid loss in {:0>2}epoch  /{:>5}iter:    {:<10.8}".format(epoch+1, iteration, epoch_val_loss/(iteration+1)))
        
    valid_loss.append(epoch_val_loss/(iteration+1))
    print("valid {}epoch loss({}iteration):    {:10.8}".format(epoch+1, iteration, valid_loss[-1]))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))



train loss in 01epoch  /    0iter:    3.9305406 
train loss in 01epoch  /  100iter:    1.3079783 
train loss in 01epoch  /  200iter:    1.165138  
train loss in 01epoch  /  300iter:    1.1301909 
train loss in 01epoch  /  400iter:    1.1018424 
train loss in 01epoch  /  500iter:    1.0783478 
train loss in 01epoch  /  600iter:    1.0667526 
train loss in 01epoch  /  700iter:    1.0574065 
train loss in 01epoch  /  800iter:    1.0477893 
train loss in 01epoch  /  900iter:    1.0430411 
train loss in 01epoch  / 1000iter:    1.032117  
train loss in 01epoch  / 1100iter:    1.0258689 
train loss in 01epoch  / 1200iter:    1.021282  
train loss in 01epoch  / 1300iter:    1.020283  
train loss in 01epoch  / 1400iter:    1.0187985 
train loss in 01epoch  / 1500iter:    1.0134131 
train loss in 01epoch  / 1600iter:    1.0086406 
train loss in 01epoch  / 1700iter:    1.006286  
train loss in 01epoch  / 1800iter:    1.0007556 
train loss in 01epoch  / 1900iter:    0.9994886 
train loss in 01epoc

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

valid loss in 01epoch  /    0iter:    0.50634336
valid loss in 01epoch  /  100iter:    0.93556459
valid loss in 01epoch  /  200iter:    0.94819139
valid loss in 01epoch  /  300iter:    0.9374021 

valid 1epoch loss(349iteration):    0.93662956


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

train loss in 02epoch  /    0iter:    1.3023912 
train loss in 02epoch  /  100iter:    0.93594587
train loss in 02epoch  /  200iter:    0.90697418
train loss in 02epoch  /  300iter:    0.91307877
train loss in 02epoch  /  400iter:    0.91570201
train loss in 02epoch  /  500iter:    0.90981709
train loss in 02epoch  /  600iter:    0.91542806
train loss in 02epoch  /  700iter:    0.9234773 
train loss in 02epoch  /  800iter:    0.92256422
train loss in 02epoch  /  900iter:    0.92544622
train loss in 02epoch  / 1000iter:    0.92095611
train loss in 02epoch  / 1100iter:    0.92048802
train loss in 02epoch  / 1200iter:    0.92107203
train loss in 02epoch  / 1300iter:    0.92443113
train loss in 02epoch  / 1400iter:    0.92642074
train loss in 02epoch  / 1500iter:    0.92517088
train loss in 02epoch  / 1600iter:    0.92371039
train loss in 02epoch  / 1700iter:    0.92458595
train loss in 02epoch  / 1800iter:    0.92164098
train loss in 02epoch  / 1900iter:    0.92337591
train loss in 02epoc

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

valid loss in 02epoch  /    0iter:    0.61368769
valid loss in 02epoch  /  100iter:    0.98895022
valid loss in 02epoch  /  200iter:    0.99917606
valid loss in 02epoch  /  300iter:    0.99055964

valid 2epoch loss(349iteration):    0.99102518


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

train loss in 03epoch  /    0iter:    1.2972517 
train loss in 03epoch  /  100iter:    0.93556952
train loss in 03epoch  /  200iter:    0.90136917
train loss in 03epoch  /  300iter:    0.90242655
train loss in 03epoch  /  400iter:    0.89702005
train loss in 03epoch  /  500iter:    0.88654249


In [19]:
#plt.plot(list(range(epoch_num)), train_loss, color='green')
#plt.plot(list(range(epoch_num)), valid_loss, color='blue')

# Test

In [20]:
sample_df = pd.read_csv(input_dir + "sample_submission.csv")

In [21]:
import torch
import gc
for obj in gc.get_objects():
    try:
        if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
            print(type(obj), obj.size())
    except:
        pass

  """
  """


<class 'torch.Tensor'> torch.Size([3, 3, 512, 512])
<class 'torch.Tensor'> torch.Size([3, 512, 512])
<class 'torch.Tensor'> torch.Size([3, 47, 512, 512])
<class 'torch.Tensor'> torch.Size([])
<class 'torch.nn.parameter.Parameter'> torch.Size([64, 3, 3, 3])
<class 'torch.nn.parameter.Parameter'> torch.Size([64])
<class 'torch.nn.parameter.Parameter'> torch.Size([64])
<class 'torch.nn.parameter.Parameter'> torch.Size([64])
<class 'torch.nn.parameter.Parameter'> torch.Size([64, 64, 3, 3])
<class 'torch.nn.parameter.Parameter'> torch.Size([64])
<class 'torch.nn.parameter.Parameter'> torch.Size([64])
<class 'torch.nn.parameter.Parameter'> torch.Size([64])
<class 'torch.nn.parameter.Parameter'> torch.Size([128, 64, 3, 3])
<class 'torch.nn.parameter.Parameter'> torch.Size([128])
<class 'torch.nn.parameter.Parameter'> torch.Size([128])
<class 'torch.nn.parameter.Parameter'> torch.Size([128])
<class 'torch.nn.parameter.Parameter'> torch.Size([128, 128, 3, 3])
<class 'torch.nn.parameter.Paramete

  """
  """


In [22]:
sub_list = []
net.eval()
for img_name, img in test_generator(sample_df):
    X = torch.tensor(img, dtype=torch.float32).to(device)
    mask_pred = net(X)
    mask_pred = mask_pred.cpu().detach().numpy()
    mask_prob = np.argmax(mask_pred, axis=1)
    mask_prob = mask_prob.ravel(order='F')
    class_dict = run_length(mask_prob)
    if len(class_dict) == 0:
        sub_list.append([img_name, "1 1", 1])
    else:
        for key, val in class_dict.items():
            sub_list.append([img_name, " ".join(map(str, val)), key])



# Make Submission File

In [23]:
submission_df = pd.DataFrame(sub_list, columns=sample_df.columns.values)

In [24]:
submission_df

Unnamed: 0,ImageId,EncodedPixels,ClassId
0,003d41dd20f271d27219fe7ee6de727d.jpg,78696 2 170712 7 171224 7 171724 9 171735 8 17...,31
1,003d41dd20f271d27219fe7ee6de727d.jpg,92452 3 92964 3 93475 4 93987 4 94492 3 94498 ...,10
2,0046f98599f05fd7233973e430d6d04d.jpg,97195 2 97706 4 98218 4 98730 3 99242 3 99754 ...,10
3,004e9e21cd1aca568a8ffc77a54638ce.jpg,66917 10 67410 38 67913 51 68419 58 68925 65 6...,10
4,004e9e21cd1aca568a8ffc77a54638ce.jpg,123340 2 123850 6 125413 7 125924 8 126438 6 1...,23
5,004e9e21cd1aca568a8ffc77a54638ce.jpg,170718 2 171228 6 171736 12 172248 13 172761 1...,31
6,005b37fce3c0f641d327d95dd832f51b.jpg,82648 1 83140 1 83650 4 84161 6 84168 1 84673 ...,10
7,005b37fce3c0f641d327d95dd832f51b.jpg,94108 3 94619 4 95130 8 95628 5 95641 11 95690...,6
8,005b37fce3c0f641d327d95dd832f51b.jpg,154366 3 154875 7 155388 6 155400 2 155900 5 1...,31
9,0094940c58c343b742f48ae26eb5e9fa.jpg,46425 8 46933 16 47443 19 47954 20 48465 20 48...,10


In [25]:
submission_df.to_csv("submission.csv", index=False)

# Thank you for watching!
Please tell me when I make mistakes in program and English.  
I hope this kernel will help.  
If you think this kernel is useful, please upvote. If you do, I feel happy and get enough sleep.  