In [2]:
import numpy as np
import pycocotools.mask as mask_util
import random
import pandas as pd

In [3]:
def rle_decode(mask_rle, shape):
    """
    Decodes run-length encoded segmentation mask string into 2d array

    Parameters
    ----------
    :param rle_mask (str): Run-length encoded segmentation mask string.
    :param shape (tuple): (height, width) of array to return
    :return mask [numpy.ndarray of shape (height, width)]: Decoded 2d segmentation mask
    """
    # Splits the RLE string into a list of string by whitespaces.
    s = mask_rle.split()

    # This creates two numpy arrays, one with the RLE starts and one with their respective lengths
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]

    # To obtain the end point we need to substract 1 to the length or start because the initial point counts.
    starts -= 1
    ends = starts + lengths

    # Create a 1D array of size H*W of zeros
    mask = np.zeros(shape[0]*shape[1], dtype=np.uint8)

    # Fill this array with ones in the positions where there is a mask using the RLE information
    for start, end in zip(starts, ends):
        mask[start:end] = 1

    # Reshape the 1D array into a 2D array so we can finally get the binary 2D mask.
    mask = mask.reshape(shape)
    return mask.T

In [4]:
def binary_mask_to_rle(binary_mask):
    """
    Checkout: https://cocodataset.org/#format-results
    :param mask [numpy.ndarray of shape (height, width)]: Decoded 2d segmentation mask

    This function returns the following dictionary:
    {
        "counts": encoded mask suggested by the official COCO dataset webpage.
        "size": the size of the input mask/image
    }
    """
    # Create dictionary for the segmentation key in the COCO dataset
    rle = {'counts': [], 'size': list(binary_mask.shape)}
    # We need to convert it to a Fortran array
    binary_mask_fortran = np.asfortranarray(binary_mask)
    # Encode the mask as specified by the official COCO format
    encoded_mask = mask_util.encode(binary_mask_fortran)
    # We must decode the byte encoded string or otherwise we cannot save it as a JSON file
    rle["counts"] = encoded_mask["counts"].decode()
    return rle

In [5]:
from pycocotools.coco import COCO
import matplotlib.pyplot as plt
from pathlib import Path
from PIL import Image

In [6]:
coco = COCO("/home/eas/Enol/pycharm_projects/clipseg_steel_defect/Severstal/annotations_COCO.json").anns
dataDir=Path('/home/eas/Enol/pycharm_projects/clipseg_steel_defect/Severstal/train_subimages')

loading annotations into memory...
Done (t=0.73s)
creating index...
index created!


In [7]:
random.seed(33)
keys = list(coco.keys())
random.shuffle(keys)

In [8]:

id_list = []
for i in range(len(coco)):
    id_list.append(coco[i]['image_id'])


In [9]:
from collections import Counter

def find_duplicates(lst):
    counts = Counter(lst)
    duplicates = [item for item, frequency in counts.items() if frequency > 1]
    return duplicates

# Example usage
duplicates = find_duplicates(id_list)
n_duplicates = len(duplicates)# Output: {1: 2, 3: 3, 8: 2}


In [10]:
df = pd.read_csv('/home/eas/Enol/pycharm_projects/clipseg_steel_defect/Severstal/subimages.csv')

In [11]:
df[df['ImageId'] == duplicates[8]]

Unnamed: 0.1,Unnamed: 0,ImageId,ClassId,EncodedPixels
2225,2225,1fc012f23_6.jpg,3,57089 8 57345 23 57601 38 57857 53 58113 68 58...
2226,2226,1fc012f23_6.jpg,4,151 64 406 64 662 63 917 64 1172 64 1428 63 16...


In [12]:
coco = COCO(f"/home/eas/Enol/pycharm_projects/clipseg_steel_defect/Severstal/annotations_COCO.json")

loading annotations into memory...
Done (t=0.84s)
creating index...
index created!


In [13]:
keys = (1, 4, 5 ,6)

In [14]:
[coco.anns[i] for i in keys]

[{'segmentation': {'counts': 'PPP2', 'size': [256, 256]},
  'bbox': [None, None, None, None],
  'area': None,
  'image_id': '4895e51ff_4.jpg',
  'category_id': 5,
  'iscrowd': 0,
  'id': 1},
 {'segmentation': {'counts': 'PPP2', 'size': [256, 256]},
  'bbox': [None, None, None, None],
  'area': None,
  'image_id': '24b29d740_6.jpg',
  'category_id': 5,
  'iscrowd': 0,
  'id': 4},
 {'segmentation': {'counts': 'PPP2', 'size': [256, 256]},
  'bbox': [None, None, None, None],
  'area': None,
  'image_id': '37f331c96_3.jpg',
  'category_id': 5,
  'iscrowd': 0,
  'id': 5},
 {'segmentation': {'counts': 'Q]g09V7a0_Oa0_O`0H90000000O010000000O10000K5F9H9F:F:G9FWmR1',
   'size': [256, 256]},
  'bbox': [93, 99, 23, 71],
  'area': 1185,
  'image_id': '3da2c3982_6.jpg',
  'category_id': 1,
  'iscrowd': 0,
  'id': 6}]

In [15]:
[indexed_dict[i] for i in indices]

NameError: name 'indices' is not defined