# Training set visualization
Use to visualize fast....

## Packages

In [1]:
%config IPCompleter.use_jedi = False

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import random
from skimage.io import imread
from collections import defaultdict
from matplotlib import colors
from ipywidgets import interact

## Setting

In [3]:
basedir_train_img = "./../data/raw/train_images/"
path_train_label = "./../data/raw/train.csv"
IMG_SHAPE = (1400, 2100, 3)
IMG_ROWS = IMG_SHAPE[0]
IMG_COLS = IMG_SHAPE[1]
IMG_CHANNEL = IMG_SHAPE[2]
IMG_SIZE = (IMG_ROWS, IMG_COLS)
FIGSIZE = (10, 10)
palette = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728',
           '#9467bd', '#8c564b', '#e377c2', '#7f7f7f',
           '#bcbd22', '#17becf']

In [4]:
img_names = [x for x in os.listdir(basedir_train_img)]

labels = defaultdict(dict)
with open(path_train_label) as f:
    header = f.readline()
    for line in f.readlines():
        n, l = line.split(',')[0].split('_')
        s = line.split(',')[1].replace('\n','')
        if s:
            labels[n][l] = s
            
classes = sorted(set([c for n in labels.keys() for c in labels[n].keys()]))
classes

label_enc = {}
label_enc_color = {}
for i, c in enumerate(classes):
    label_enc[c] = 2 ** i
    label_enc_color[c] = list(colors.to_rgb(palette[i]))

In [5]:
def draw_sample(img_name):

    path_sample = os.path.join(basedir_train_img, img_name)
    img = imread(path_sample)
    mask = np.zeros(IMG_SIZE, dtype=np.int).flatten()
    for label, seg_str in labels[img_name].items():
        label_num = label_enc[label]
        if seg_str:
            segment = np.array(seg_str.split(' ')).astype(np.int).reshape(-1, 2)
            for (start, length) in segment:
                mask[start:start + length] += label_num
    mask = mask.reshape(IMG_SIZE, order='F')

    plt.figure(figsize=FIGSIZE)
    plt.imshow(img, alpha=1)
    a_c = {}
    for label in classes:
        code = label_enc[label]
        color = label_enc_color[label]
        img_l = np.zeros(IMG_SHAPE)
        mask_c = (mask & code == code)
        area = mask_c.sum() / (IMG_COLS * IMG_ROWS)
        if area:
            a_c[label] = np.round(100 * area, 2)
            img_l[mask_c] = color
            plt.imshow(img_l, alpha=0.3)
    plt.title("{} - {}".format(img_name, a_c))
    plt.show()

    plt.figure(figsize=FIGSIZE)
    for i, label in enumerate(classes):
        plt.subplot(int(len(classes) / 2), 2, i + 1)
        code = label_enc[label]
        img_c = img.copy()
        mask_c = (mask & code == code)
        area = mask_c.sum() / (IMG_COLS * IMG_ROWS)
        if area:
            img_c[~mask_c] = 0
            plt.imshow(img_c)
            plt.title("{}: {}%".format(label, np.round(area * 100, 2)))
    plt.show()

## Plotting...

In [6]:
interact(draw_sample, img_name=img_names);

interactive(children=(Dropdown(description='img_name', options=('6550e47.jpg', 'ade61a0.jpg', 'cc841a2.jpg', '…