# Create labels from the RLE encoded masks.

source: [kaggle-kernel](https://www.kaggle.com/nikhilikhar/steel-create-labels?scriptVersionId=18640338)

# Import files

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline


In [2]:
import numpy as np 
import pandas as pd
import os
import cv2
import time
import zipfile

import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm_notebook as tq
from PIL import Image


# Mask to image

In [3]:
start = time.time()

input_dir = "/home/ubuntu/datasets/severstal-steel-defect-detection/"
train_img_dir = "/home/ubuntu/datasets/severstal-steel-defect-detection/train_images/"

category_num = 4 + 1

In [4]:
def make_mask_img(segment_df):
    seg_width = 1600
    seg_height = 256
    seg_img = np.full(seg_width*seg_height, category_num-1, dtype=np.int32)
    for encoded_pixels, class_id in zip(segment_df["EncodedPixels"].values, segment_df["ClassId"].values):
        if pd.isna(encoded_pixels): continue
        pixel_list = list(map(int, encoded_pixels.split(" ")))
        for i in range(0, len(pixel_list), 2):
            start_index = pixel_list[i] -1 
            index_len = pixel_list[i+1] 
            seg_img[start_index:start_index+index_len] = int(class_id) - 1
    seg_img = seg_img.reshape((seg_height, seg_width), order='F')
   
    return seg_img

In [5]:
train_df = pd.read_csv(input_dir + "train.csv")
#train_df[['ImageId', 'ClassId']] = train_df['ImageId_ClassId'].str.split('_', expand=True)
train_df.head()

Unnamed: 0,ImageId,ClassId,EncodedPixels
0,0002cc93b.jpg,1,29102 12 29346 24 29602 24 29858 24 30114 24 3...
1,0007a71bf.jpg,3,18661 28 18863 82 19091 110 19347 110 19603 11...
2,000a4bcdd.jpg,1,37607 3 37858 8 38108 14 38359 20 38610 25 388...
3,000f6bf48.jpg,4,131973 1 132228 4 132483 6 132738 8 132993 11 ...
4,0014fce06.jpg,3,229501 11 229741 33 229981 55 230221 77 230468...


In [6]:
train_df.shape

(7095, 3)

In [7]:
images = train_df["ImageId"].unique() 
images

array(['0002cc93b.jpg', '0007a71bf.jpg', '000a4bcdd.jpg', ...,
       'fffe98443.jpg', 'ffff4eaa8.jpg', 'ffffd67df.jpg'], dtype=object)

It takes around 13 min to create labels

In [9]:
!mkdir -p "~/kaggle/working/labels-np/"
!mkdir -p "~/kaggle/working/labels-img/"

In [10]:
zip_np = zipfile.ZipFile('labels-np.zip', 'w', zipfile.ZIP_DEFLATED)
zip_img = zipfile.ZipFile('labels-img.zip', 'w', zipfile.ZIP_DEFLATED)

for image in images:
    df = train_df[train_df['ImageId']==image]
    mask = make_mask_img(df)
    npf = "~/kaggle/working/labels-np/" + image.split('.')[0]
    imgf = "~/kaggle/working/labels-img/"+ image.split('.')[0] + '.png'
    
    np.save(npf, mask)
    zip_np.write(npf + ".npy", image.split('.')[0] + ".npy")
    
    img_mask_3_chn = np.dstack((mask, mask, mask))
    cv2.imwrite(imgf, img_mask_3_chn)
    zip_img.write(imgf, image.split('.')[0] + '.png')
    
    os.remove(npf + ".npy")
    os.remove(imgf)
    
zip_np.close()
zip_img.close()

In [9]:
# !unzip -l labels-np.zip | less

In [11]:
end = time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
print("Execution Time  {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

Execution Time  00:11:11.08
