The EDA part of this kernel is inspired by [GoldFish](https://www.kaggle.com/go1dfish/updated4-29-fgvc6-simple-eda)

In [2]:
import numpy as np # linear algebra
import pandas as pd
pd.set_option("display.max_rows", 50)
import os
print(os.listdir("../input"))
import cv2
import json
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams["font.size"] = 15
import seaborn as sns
from collections import Counter
from PIL import Image
import math
import seaborn as sns
import tensorflow as tf
from tensorflow import keras

['test', 'train', 'train.csv', 'label_descriptions.json', 'sample_submission.csv']


In [3]:
input_dir = "../input/"
train_df = pd.read_csv(input_dir + "train.csv")

Utility functions

In [4]:
def json2df(data):
    df = pd.DataFrame()
    for idx, el in enumerate(data):
        for key, val in el.items():
            df.loc[idx,key] = val
    return df

In [5]:
with open(input_dir + 'label_descriptions.json') as f:
    label_description = json.load(f)

In [6]:
print(json.dumps(label_description['info'],indent = 2))

{
  "year": 2019,
  "version": "1.0",
  "description": "The 2019 FGVC^6 iMaterialist Competition - Fashion track dataset.",
  "contributor": "iMaterialist Fashion Competition group",
  "url": "https://github.com/visipedia/imat_comp",
  "date_created": "2019-04-19 12:38:27.493919"
}


In [None]:
category_df = json2df(label_description["categories"])
attributes_df = json2df(label_description["attributes"])
category_df['id'] = category_df['id'].astype(int)
category_df['level'] = category_df['level'].astype(int)
attributes_df['id'] = attributes_df['id'].astype(int)
attributes_df['level'] = attributes_df['level'].astype(int)

In [None]:
category_df.shape[0]

In [None]:
attributes_df.head()

In [None]:
category_df.id.count()

In [None]:
category_df.describe()

In [None]:
#number of labels in each image
labels_count = train_df.groupby('ImageId')['ClassId'].count().value_counts().sort_index()

In [None]:
sns.barplot(labels_count.index[:10],labels_count[:10])

In [None]:
num_train = len(os.listdir('../input/train'))
print('The number of training image is {}'.format(num_train))
num_test = len(os.listdir('../input/test'))
print('The number of testing image is {}'.format(num_test))

In [None]:
train_df['ClassId'].unique()

In [None]:
WIDTH = 512
HEIGHT = 512
train_img_dir = "../input/train/"

In [None]:
category_num = category_df.shape[0] + 1 
# Q : why + 1 here??

Exploring the image segmentation

In [None]:
pallete =  [
    'Pastel1', 'Pastel2', 'Paired', 'Accent', 'Dark2',
    'Set1', 'Set2', 'Set3', 'tab10', 'tab20', 'tab20b', 'tab20c']


def make_mask_img(segment_df):
    seg_width = segment_df.at[0, "Width"]
    seg_height = segment_df.at[0, "Height"]
    seg_img = np.full(seg_width*seg_height, category_num-1, dtype=np.int32)
    for encoded_pixels, class_id in zip(segment_df["EncodedPixels"].values, segment_df["ClassId"].values):
        pixel_list = list(map(int, encoded_pixels.split(" ")))
        for i in range(0, len(pixel_list), 2):
            start_index = pixel_list[i] - 1
            index_len = pixel_list[i+1] - 1
            # assign the class label
            seg_img[start_index:start_index+index_len] = int(class_id.split("_")[0])
    seg_img = seg_img.reshape((seg_height, seg_width), order='F')
    seg_img = cv2.resize(seg_img, (WIDTH, HEIGHT), interpolation=cv2.INTER_NEAREST)

    return seg_img

def train_generator(df, batch_size):
    img_ind_num = df.groupby("ImageId")["ClassId"].count()
    index = df.index.values[0]
    trn_images = []
    seg_images = []
    for i, (img_name, ind_num) in enumerate(img_ind_num.items()):
        img = cv2.imread("../input/train/" + img_name)
        segment_df = (df.loc[index:index+ind_num-1, :]).reset_index(drop=True)
        index += ind_num
        if segment_df["ImageId"].nunique() != 1:
            raise Exception("Index Range Error")
        seg_img = make_mask_img(segment_df)
        
        # HWC -> CHW
        img = img.transpose((2, 0, 1))
        
        trn_images.append(img)
        seg_images.append(seg_img)
        if((i+1) % batch_size == 0):
            return trn_images, seg_images
        
def cv2plt(img, isColor=True):
    """
    changing the representation of image from cv2 format to matplotlib format
    """
    original_img = img
    original_img = original_img.transpose(1, 2, 0) #order of channel
    original_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB) # image colorspace
    return original_img


In [None]:
original, segmented = train_generator(train_df, 6)
fig, ax = plt.subplots(3, 2, figsize=(16, 18))
for i, (img, seg) in enumerate(zip(original, segmented)):
    ax[i//2, i%2].imshow(cv2plt(img))
    seg[seg == 45] = 255
    ax[i//2, i%2].imshow(seg, cmap='tab20_r', alpha=0.6)
    ax[i//2, i%2].set_title("Sample {}".format(i))

Image segmentation using U-Net
* image generator
    * should loop forever, restrict the number of images through shorten the input df
    * a decode function to decode each run-through encoded pixels
    * generator resize funtion instead of resizing all the image
    * train_df already sorted by image id
* build and compile the model



In [None]:
def img_gen_trn(df,batch_size):
    
    id_num_mask = df.groupby('ImageId').ClassId.count()
    batch_img,batch_mask = [],[]
    index = df.index.values[0] #initial index (not exactly zero)
    print(id_num_mask.shape)
    for i,(img_id,num_mask) in enumerate(id_num_mask.items()): # i,(img_id,num_mask) ?
        img = cv2.imread(train_img_dir+img_id)
        img = cv2.resize(img, (WIDTH, HEIGHT), interpolation=cv2.INTER_AREA)
        segment_df = train_df.loc[index:index+num_mask-1].reset_index(drop = True)
        index += num_mask
        if segment_df['ImageId'].nunique()!= 1:
            raise Exception("Image Index Range Error")
        seg_img = make_mask_img(segment_df)
        
        # HWC -> CHW
#         img = img.transpose((2,0,1))
#         print(img.shape)
        batch_img.append(img)
        batch_mask.append(seg_img)
        if i+1 == batch_size: # if((i+1) % batch_size == 0):
            #yield image if it reaches the batch size
            yield np.array(batch_img,dtype = np.float32)/255, np.array(batch_mask,dtype = np.int32)
            batch_img,batch_mask = [],[]
    if len(batch_img) != 0:
        yield np.array(batch_img,dtype = np.float32)/255, np.array(batch_mask,dtype = np.int32)

In [None]:
# def img_gen_test(df):
    

## Define the UNET Architecture

[UNET Website](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/)

[UNET Paper](https://arxiv.org/abs/1505.04597)

![unet_arch](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/u-net-architecture.png)

The UNET consists of three path:
1. Downsampling path
2. Upsampling path
3. Skip-connection path

A Down-sampling unit consists of:
(3x3 convolution layer, ReLU)
(3x3 convolution layer, ReLU)
(2x2 max pooling layer with stride of 2)

A Up-sampling unit consists of:
(3x3 convolution layer, ReLU)
(3x3 convolution layer, ReLU)
(2x2 up convolution layer)

The up convolution layer has many other names, like decovolution layer, or the more acurate one: transposed convolution layer. In `keras` implementations, I haven't figured out whether `UpSampling2D()` layer and `Conv2DTranspose()` layer actually does the same thing, I will return to this matter latter.

By the way, I have found a very interesting repo done by [vdumoulin](https://github.com/vdumoulin/conv_arithmetic), containing some visualisations of convolution layer. if you find it hard to understand it like me, it might be useful.

In [None]:
def down_block(x,filters,kernel_size = (3,3),padding = 'same',strides = 1):
    c = keras.layers.Conv2D(filters,kernel_size,padding=padding,strides=strides,activation = "relu")(x)
    c = keras.layers.Conv2D(filters,kernel_size,padding=padding,strides=strides,activation = "relu")(c)
    p = keras.layers.MaxPool2D(pool_size = (2,2),strides = (2,2))(c)
    return c,p

def up_block(x,skip,filters,kernel_size = (3,3),padding = 'same',strides = 1):
    up = keras.layers.Conv2DTranspose(filters,kernel_size = (2,2),padding = 'same')(x)
    concat = keras.layers.Concatenate()([up,skip])
    c = keras.layers.Conv2D(filters,kernel_size,padding=padding,strides=strides,activation = "relu")(concat)
    c = keras.layers.Conv2D(filters,kernel_size,padding=padding,strides=strides,activation = "relu")(c)
    return c

def bottle_neck_block(x,filters,kernel_size = (3,3),padding = 'same',strides = 1):
    c = keras.layers.Conv2D(filters,kernel_size,padding=padding,strides=strides,activation = "relu")(x)
    c = keras.layers.Conv2D(filters,kernel_size,padding=padding,strides=strides,activation = "relu")(c)
    return c


In [None]:
def UNet():
    
    f = [64,128,256,512,1024] #number of feature channel
    
    #input layer
    inputs = keras.layers.Input(shape = (HEIGHT,WIDTH,3))
    
    # downsampling path
    c1,p1 = down_block(inputs,f[0])
    c2,p2 = down_block(p1,f[1])
    c3,p3 = down_block(p2,f[2])
    c4,p4 = down_block(p3,f[3])
    
    # bottle neck path
    b5 = bottle_neck_block(p4,f[4])
    
    # upsampling path
    u6 = up_block(b5,c4,f[3])
    u7 = up_block(u6,c3,f[2])
    u8 = up_block(u7,c2,f[2])
    u9 = up_block(u8,c1,f[2])
    
    #output layer
    outputs = keras.layers.Conv2D(category_num,(1,1),activation = 'sigmoid')(u9)
    model = keras.models.Model(inputs,outputs)
    return model
    
    

In [None]:
model = UNet()