# Segmentation End-to-End Processing

## 1. Data Processing

### 데이터 다운로드 후 압축풀기

In [None]:
# 데이터를 다운로드합니다
!wget https://pai-datasets.s3.ap-northeast-2.amazonaws.com/vision_multi_campus/images_labels.zip

# 압축을 풉니다
import zipfile
zip_obj = zipfile.ZipFile('./images_labels.zip')
zip_obj.extractall('./')
zip_obj.close()

### 데이터 구성

````
images_labels/images/
                |- *.jpg
                |- *.jpg
             /labels/car
                    |- hyundai.json
````


In [None]:
import os
import cv2
import json
import glob
import pprint
import numpy as np 
from tqdm import tqdm
from PIL import Image
import matplotlib.pyplot as plt
from tensorflow.python.keras.utils import to_categorical

### 1. Images / Labels 불러오기 

데이터가 들어있는 images_labels폴더에서 모든 이미지 경로들을 불러옵니다

In [None]:
# 모든 이미지 경로를 불러옵니다.
image_dir = './images_labels/images'
img_paths = glob.glob('{}/GH*.jpg'.format(image_dir))

# 샘플 이미지를 불러옵니다.
img = Image.open(img_paths[0])
plt.imshow(img)

### 2. Car Label

현재 자동차가 segmentation된 label값은 ./images_labels/labels/car 폴더내에 json형태로 저장되어있습니다.

In [None]:
car_label = './images_labels/labels/car/hyundai.json'
f = open(car_label, 'r')
car_jsons = json.load(f)

pprint.pprint(car_jsons.keys())
print('# annotations : {}'.format(len(car_jsons['annotations'])))

# Image IDfks
pprint.pprint(car_jsons['annotations'][3])
pprint.pprint(car_jsons['images'][3])

pprint.pprint(car_jsons['annotations'][4])
pprint.pprint(car_jsons['images'][4])
pprint.pprint(car_jsons['categories'])

### 3. Segmentation Label값 가져오기

In [None]:
def segmentation_info(jsons_info, ind, show=True):
    # Search Image id / name 
    image_name = jsons_info['images'][ind]['file_name']
    image_id = jsons_info['images'][ind]['id']

    # Load Image
    sample_img = Image.open(os.path.join(image_dir, image_name))
    resize_img = sample_img.resize([sample_img.size[0]//4, sample_img.size[1]//4])
    sample_img = np.array(resize_img)
    
    # Load Segmentation Annotation
    ret_names = [] 
    ret_ids = []
    ret_annos = []
    for anno in jsons_info['annotations']:
        if anno['image_id'] == image_id:
            seg_anno = anno['segmentation']
            # cv2.polylines 쓰기 위해 segmentation 정보 reshpe
            sample_anno = np.reshape(seg_anno, [1, -1, 2])
            sample_anno = sample_anno.astype(np.int64)
            sample_anno = sample_anno//4 
            cv2.polylines(sample_img, [sample_anno], True, (255,0,0))
            
            # Showing
            if show:
                print(image_name, image_id)
                plt.figure(figsize=(30,10))
                plt.imshow(sample_img)
                plt.show()
                
            ret_annos.append(sample_anno)
            
    return image_name, image_id, ret_annos

segmentation_info(car_jsons, 1227)

### 4. 전체 이미지에 대한 라벨 설정하기

1/4로 줄인 이미지에 대해 마스크를 생성합니다.

In [None]:
def generation_mask(image, polys, color):
    mask = np.zeros_like(image)
    for poly in polys:
        mask = cv2.fillPoly(mask, [poly], color)
    return mask

In [None]:
def generation_seg_label(mask):
    """
    Description:
        해당 함수를 사용 할 때 주의해야 할점. 
        해당 함수는 Binary 형태로 Classification Label 을 생성한다. 
        Color 로 여러 Label 을 지정했을 경우 해당 함수는 적절한 함수가 아니다. 
    """
    return to_categorical(np.sum(mask, axis=-1).astype(np.bool), 2)


In [None]:
# Mask 
save_mask_dir = './images_labels/quarter/mask'
os.makedirs(save_mask_dir, exist_ok=True)

# Label 
save_label_dir = './images_labels/quarter/label'
os.makedirs(save_label_dir, exist_ok=True)

n_images = len(car_jsons['images']) 

for ind in tqdm(range(n_images)[:]):
    
    # Extract Segmentation Annotation Info 
    img_name, img_id , annos = segmentation_info(car_jsons, ind, False)
    # Mask 갯수가 0이 아니면 Mask, Label을 만들고 저장한다. 
    if len(annos) !=0 :
        # Image Load 
        img = Image.open(os.path.join(image_dir, img_name))
        resize_img = img.resize([img.size[0]//4, img.size[1]//4])
        img = np.array(resize_img)
        # Generate Mask 
        mask = generation_mask(img, annos, [255,0,0])

        # Save Mask
        save_mask = os.path.join(save_mask_dir, img_name)
        plt.imsave(save_mask, mask)

        # Generate Label
        label = generation_seg_label(mask)

        # Save Label
        save_label = os.path.join(save_label_dir, img_name.replace('jpg', 'npy'))
        np.save(save_label, label)        

## 2. 이미지 - 라벨 매칭하기

In [None]:
# 모든 이미지 경로를 불러옵니다.
image_dir = './images_labels/images'
img_paths = glob.glob('{}/GH*.jpg'.format(image_dir))

label_dir = './images_labels/quarter/label'
label_path = glob.glob('{}/*.npy'.format(label_dir))

label_exist_img_path = []
for i in tqdm(range(len(img_paths))):
    for j in range(len(label_path)):
        label_exist_file_num = label_path[j].split('/')[4].split('.')[0]
        if label_exist_file_num in img_paths[i]:
            label_exist_img_path.append(img_paths[i])

label_path.sort()
label_exist_img_path.sort()

print("정렬된 label path의 갯수 : {}".format(len(label_path)))
print("정렬된 img path의 갯수 : {}".format(len(label_exist_img_path)))
print("정렬된 label path의 5번쨰 path : {}".format(label_path[5]))
print("정렬된 img path의 5번쨰 path : {}".format(label_exist_img_path[5]))

In [None]:
def match_image_and_label(image_path, label_path, num_data):
    image_path = image_path[:num_data]
    label_path = label_path[:num_data]
    image_label_list = []
    image_numpy_list = []
    label_numpy_list = []
    for i in tqdm(range(len(image_path))):
        # image numpy
        image = Image.open(image_path[i])
        resize_image = image.resize([image.size[0]//4, image.size[1]//4])
        resize_image_numpy = np.array(resize_image)
        image_numpy_list.append(resize_image_numpy)
        # label numpy
        label = np.load(label_path[i])
        label_numpy_list.append(label)
    # 합치기
    image_numpy_array = np.array(image_numpy_list)
    label_numpy_array = np.array(label_numpy_list)
    image_label_list.append(image_numpy_array)
    image_label_list.append(label_numpy_array)
    return image_label_list # np.array형태의 이미지와 라벨 쌍

## 3. Model Processing

### 데이터 세팅하기

In [None]:
image_path = label_exist_img_path
label_path = label_path

In [None]:
data_image, data_label = match_image_and_label(image_path, label_path, 2200)
train_image = data_image[:2000]
train_label = data_label[:2000]
test_image = data_image[2000:]
test_label = data_label[2000:]

In [None]:
print("train image의 shape : ",train_image.shape)
print("train label의 shape : ",train_label.shape)
print("test image의 shape : ",test_image.shape)
print("test label의 shape : ",test_label.shape)

### Unet 구성하기

In [None]:
from tensorflow.keras.layers import Layer, Conv2D, MaxPool2D, Conv2DTranspose, ZeroPadding2D, MaxPooling2D, Concatenate
from tensorflow.keras.layers import Input, UpSampling2D, BatchNormalization, Flatten, Dense, Dropout
from tensorflow.keras.layers import Softmax, Add
from tensorflow.keras.layers import Lambda, concatenate
from tensorflow.keras import optimizers
import tensorflow.keras.backend as K 
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt 
from keras.utils import np_utils
import numpy as np
import tensorflow as tf

In [None]:
tf.reset_default_graph()

# 마지막 시간이니 만큼 다른 자료들을 최대한 보지 말고 U-net 모델을 구성해주세요
# fix me!

In [None]:
def mean_iou(y_true, y_pred):
    
    n_classes = 11 
    y_cls = tf.argmax(y_true, axis=-1)
    pred_cls = tf.argmax(y_pred, axis=-1)

    y_uniques = tf.to_int64(tf.unique(tf.reshape(y_cls, [-1]))[0])
    pred_uniques = tf.to_int64(tf.unique(tf.reshape(pred_cls, [-1]))[0])
    
    uniques = tf.to_int64(tf.unique(tf.concat([y_uniques, pred_uniques], axis=0))[0])

    def get_union(index):
        union = tf.reduce_sum(tf.to_int64(tf.equal(y_cls, index) | tf.equal(pred_cls, index)))
        return union

    def get_intersection(index):
        inter = tf.reduce_sum(tf.to_int64(tf.equal(y_cls, index) & tf.equal(pred_cls, index)))
        return inter 
    
    unions = tf.map_fn(get_union, uniques)
    inters = tf.map_fn(get_intersection, uniques)
    return tf.reduce_mean(inters / unions)

def batch_mean_iou(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float64)
    y_pred = tf.cast(y_pred, tf.float64)
    n_classes = 11 
    stacked_cls = tf.stack([y_pred, y_true], axis=-1)
    print(stacked_cls)
    
    
    bm_iou = tf.reduce_mean(tf.map_fn(lambda cls: mean_iou(cls[..., 0], cls[..., 1]), stacked_cls))
    return bm_iou

### 학습하기

In [None]:
# metric으로 batch_mean_iou를 사용해 학습을 진행해주세요.
# fix me!

In [None]:
# Batch Mean IOU 추세선 그리기
plt.plot(results.history['batch_mean_iou'])
plt.title('Batch Mean IOU')
plt.ylabel('Batch Mean IOU')
plt.xlabel('Epoch')
plt.legend(['Train'], loc='upper left')
plt.show()

# Loss Function 추세선 그리기
plt.plot(results.history['loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train'], loc='upper left')
plt.show()

In [None]:
pred = model.predict(test_image[:100]/255.)
pred_cls = np.argmax(pred, axis=-1)

In [None]:
fig, axes = plt.subplots(2,10)
ognl_axes = np.array(axes)[0, :].flatten()
pred_axes = np.array(axes)[1, :].flatten()
fig.set_size_inches(30,10)
for i in range(10):
    ognl_axes[i].imshow(test_image[i])
    pred_axes[i].imshow(pred_cls[i] == 0)
plt.show()

---
⊙ Copyright(c) 2020 by PublicAI. All rights reserved <br>
All pictures, codes, writings cannot be copied without permission. <br>
Writen by PAI(info@publicai.co.kr) <br>
last updated on 2020/01/4 <br>

---