In [2]:
import cv2
import numpy as np
import os
import pandas as pd
from matplotlib import pyplot as plt

In [3]:
df = pd.read_csv('/home/eas/Enol/pycharm_projects/clipseg_steel_defect/Severstal/train.csv')
df.head()


Unnamed: 0,ImageId,ClassId,EncodedPixels
0,0002cc93b.jpg,1,29102 12 29346 24 29602 24 29858 24 30114 24 3...
1,0007a71bf.jpg,3,18661 28 18863 82 19091 110 19347 110 19603 11...
2,000a4bcdd.jpg,1,37607 3 37858 8 38108 14 38359 20 38610 25 388...
3,000f6bf48.jpg,4,131973 1 132228 4 132483 6 132738 8 132993 11 ...
4,0014fce06.jpg,3,229501 11 229741 33 229981 55 230221 77 230468...


In [12]:
def rle_decode(mask_rle, shape):
    """
    Decodes run-length encoded segmentation mask string into 2d array

    Parameters
    ----------
    :param rle_mask (str): Run-length encoded segmentation mask string.
    :param shape (tuple): (height, width) of array to return
    :return mask [numpy.ndarray of shape (height, width)]: Decoded 2d segmentation mask
    """
    # Splits the RLE string into a list of string by whitespaces.
    s = mask_rle.split()

    # This creates two numpy arrays, one with the RLE starts and one with their respective lengths
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]

    # To obtain the end point we need to substract 1 to the length or start because the initial point counts.
    starts -= 1
    ends = starts + lengths

    # Create a 1D array of size H*W of zeros
    mask = np.zeros(shape[0]*shape[1], dtype=np.uint8)

    # Fill this array with ones in the positions where there is a mask using the RLE information
    for start, end in zip(starts, ends):
        mask[start:end] = 1

    # Reshape the 1D array into a 2D array so we can finally get the binary 2D mask.
    mask = mask.reshape(shape)
    return mask.T



In [13]:
def rle_encode(binary_matrix: np.ndarray) -> str:
    array = binary_matrix.T.flatten()
    counting = False
    length = 0
    rle_code = []
    for pos, val in enumerate(array):
        if val == 1 and not counting:
            counting = True
            rle_code.append(str(pos + 1))
            length += 1
        elif val == 1 and counting:
            length += 1
        elif val == 0 and counting:
            counting = False
            rle_code.append(str(length))
            length = 0
    if counting:
        rle_code.append(str(length))
    return ' '.join(rle_code)

In [14]:
height = 1600
width = 256
read_path = '/home/eas/Enol/pycharm_projects/clipseg_steel_defect/Severstal/train_images/'
write_path = '/home/eas/Enol/pycharm_projects/clipseg_steel_defect/Severstal/train_subimages/'
new_df = pd.DataFrame(columns=['ImageId', 'EncodedPixels1', 'EncodedPixels2', 'EncodedPixels3', 'EncodedPixels4'])

In [34]:
## Generate subimages and annotations
for filename in os.listdir(read_path):
    image = cv2.imread(read_path + filename)
    n_block = 1
    
    anns = df[df['ImageId'] == filename]
    #Subimages
    for x in range(0, 1600, 256):
        if n_block == 7:
            block = image[:, -256:] 
        else:
            block = image[:, x:x+256]
        new_filename = write_path + filename[0:9] + f'_{n_block}' + '.png'
        new_entry = {'ImageId': new_filename, 
                     'EncodedPixels1': '', 
                     'EncodedPixels2': '', 
                     'EncodedPixels3': '', 
                     'EncodedPixels4': ''}
        new_df = pd.concat([new_df, pd.DataFrame([new_entry])], ignore_index=True)
        for _, ann in anns.iterrows():
            mask = rle_decode(ann['EncodedPixels'], (height, width))
            mask_block = mask[:, -256:] if n_block == 7 else mask[:, x:x+256]
            class_id = ann['ClassId']
            new_df.loc[new_df['ImageId'] == new_filename, f'EncodedPixels{class_id}'] = rle_encode(mask_block)
        plt.imsave(new_filename, block)
        n_block += 1
    
    
    

In [35]:
new_df.to_csv('/home/eas/Enol/pycharm_projects/clipseg_steel_defect/Severstal/subimages.csv')