In [1]:
#COCO Image Viewer
#This notebook will allow you to view details about a COCO dataset and preview segmentations on annotated images. Learn more about it at: http://cocodataset.org/
import IPython
import os
import json
import random
import numpy as np
import requests
from io import BytesIO
from math import trunc
from PIL import Image as PILImage
from PIL import ImageDraw as PILImageDraw

# Load the dataset json
class CocoDataset():
    def __init__(self, annotation_path, image_dir):
        self.annotation_path = annotation_path
        self.image_dir = image_dir
        self.colors = ['blue', 'purple', 'red', 'green', 'orange', 'salmon', 'pink', 'gold',
                       'orchid', 'slateblue', 'limegreen', 'seagreen', 'darkgreen', 'olive',
                       'teal', 'aquamarine', 'steelblue', 'powderblue', 'dodgerblue', 'navy',
                       'magenta', 'sienna', 'maroon']

        json_file = open(self.annotation_path)
        self.coco = json.load(json_file)
        json_file.close()

        self.process_info()
        self.process_licenses()
        self.process_categories()
        self.process_images()
        self.process_segmentations()

    def display_info(self):
        print('Dataset Info:')
        print('=============')
        if self.info is None:
            return
        for key, item in self.info.items():
            print('  {}: {}'.format(key, item))

        requirements = [['description', str],
                        ['url', str],
                        ['version', str],
                        ['year', int],
                        ['contributor', str],
                        ['date_created', str]]
        for req, req_type in requirements:
            if req not in self.info:
                print('ERROR: {} is missing'.format(req))
            elif type(self.info[req]) != req_type:
                print('ERROR: {} should be type {}'.format(req, str(req_type)))
        print('')

    def display_licenses(self):
        print('Licenses:')
        print('=========')

        if self.licenses is None:
            return
        requirements = [['id', int],
                        ['url', str],
                        ['name', str]]
        for license in self.licenses:
            for key, item in license.items():
                print('  {}: {}'.format(key, item))
            for req, req_type in requirements:
                if req not in license:
                    print('ERROR: {} is missing'.format(req))
                elif type(license[req]) != req_type:
                    print('ERROR: {} should be type {}'.format(
                        req, str(req_type)))
            print('')
        print('')

    def display_categories(self):
        print('Categories:')
        print('=========')
        for sc_key, sc_val in self.super_categories.items():
            print('  super_category: {}'.format(sc_key))
            for cat_id in sc_val:
                print('    id {}: {}'.format(
                    cat_id, self.categories[cat_id]['name']))
            print('')

    def display_image(self, image_id, show_polys=True, show_bbox=True, show_crowds=True, use_url=False):
        print('Image:')
        print('======')
        if image_id == 'random':
            image_id = random.choice(list(self.images.keys()))

        # Print the image info
        image = self.images[image_id]
        for key, val in image.items():
            print('  {}: {}'.format(key, val))

        # Open the image
        if use_url:
            image_path = image['coco_url']
            response = requests.get(image_path)
            image = PILImage.open(BytesIO(response.content))

        else:
            image_path = os.path.join(self.image_dir, image['file_name'])
            image = PILImage.open(image_path)

        # Calculate the size and adjusted display size
        max_width = 600
        image_width, image_height = image.size
        adjusted_width = min(image_width, max_width)
        adjusted_ratio = adjusted_width / image_width
        adjusted_height = adjusted_ratio * image_height

        # Create list of polygons to be drawn
        polygons = {}
        bbox_polygons = {}
        rle_regions = {}
        poly_colors = {}
        bbox_categories = {}
        print('  segmentations ({}):'.format(
            len(self.segmentations[image_id])))
        for i, segm in enumerate(self.segmentations[image_id]):
            polygons_list = []
            if segm['iscrowd'] != 0:
                # Gotta decode the RLE
                px = 0
                x, y = 0, 0
                rle_list = []
                for j, counts in enumerate(segm['segmentation']['counts']):
                    if j % 2 == 0:
                        # Empty pixels
                        px += counts
                    else:
                        # Need to draw on these pixels, since we are drawing in vector form,
                        # we need to draw horizontal lines on the image
                        x_start = trunc(
                            trunc(px / image_height) * adjusted_ratio)
                        y_start = trunc(px % image_height * adjusted_ratio)
                        px += counts
                        x_end = trunc(trunc(px / image_height)
                                      * adjusted_ratio)
                        y_end = trunc(px % image_height * adjusted_ratio)
                        if x_end == x_start:
                            # This is only on one line
                            rle_list.append(
                                {'x': x_start, 'y': y_start, 'width': 1, 'height': (y_end - y_start)})
                        if x_end > x_start:
                            # This spans more than one line
                            # Insert top line first
                            rle_list.append(
                                {'x': x_start, 'y': y_start, 'width': 1, 'height': (image_height - y_start)})

                            # Insert middle lines if needed
                            lines_spanned = x_end - x_start + 1  # total number of lines spanned
                            full_lines_to_insert = lines_spanned - 2
                            if full_lines_to_insert > 0:
                                full_lines_to_insert = trunc(
                                    full_lines_to_insert * adjusted_ratio)
                                rle_list.append(
                                    {'x': (x_start + 1), 'y': 0, 'width': full_lines_to_insert, 'height': image_height})

                            # Insert bottom line
                            rle_list.append(
                                {'x': x_end, 'y': 0, 'width': 1, 'height': y_end})
                if len(rle_list) > 0:
                    rle_regions[segm['id']] = rle_list
            else:
                # Add the polygon segmentation
                for segmentation_points in segm['segmentation']:
                    segmentation_points = np.multiply(
                        segmentation_points, adjusted_ratio).astype(int)
                    polygons_list.append(
                        str(segmentation_points).lstrip('[').rstrip(']'))
            polygons[segm['id']] = polygons_list
            if i < len(self.colors):
                poly_colors[segm['id']] = self.colors[i]
            else:
                poly_colors[segm['id']] = 'white'

            bbox = segm['bbox']
            bbox_points = [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1],
                           bbox[0] + bbox[2], bbox[1] +
                           bbox[3], bbox[0], bbox[1] + bbox[3],
                           bbox[0], bbox[1]]
            bbox_points = np.multiply(bbox_points, adjusted_ratio).astype(int)
            bbox_polygons[segm['id']] = str(
                bbox_points).lstrip('[').rstrip(']')
            bbox_categories[segm['id']] = self.categories[segm['category_id']]
            # Print details
            print('    {}:{}:{}'.format(
                segm['id'], poly_colors[segm['id']], self.categories[segm['category_id']]))

        # Draw segmentation polygons on image
        html = '<div class="container" style="position:relative;">'
        html += '<img src="{}" style="position:relative;top:0px;left:0px;width:{}px;">'.format(
            image_path, adjusted_width)
        html += '<div class="svgclass"><svg width="{}" height="{}">'.format(
            adjusted_width, adjusted_height)

        if show_polys:
            for seg_id, points_list in polygons.items():
                fill_color = poly_colors[seg_id]
                stroke_color = poly_colors[seg_id]
                for points in points_list:
                    html += '<polygon points="{}" style="fill:{}; stroke:{}; stroke-width:1; fill-opacity:0.5" />'.format(
                        points, fill_color, stroke_color)

        if show_crowds:
            for seg_id, rect_list in rle_regions.items():
                fill_color = poly_colors[seg_id]
                stroke_color = poly_colors[seg_id]
                for rect_def in rect_list:
                    x, y = rect_def['x'], rect_def['y']
                    w, h = rect_def['width'], rect_def['height']
                    html += '<rect x="{}" y="{}" width="{}" height="{}" style="fill:{}; stroke:{}; stroke-width:1; fill-opacity:0.5; stroke-opacity:0.5" />'.format(
                        x, y, w, h, fill_color, stroke_color)

        if show_bbox:
            for seg_id, points in bbox_polygons.items():
                x, y = [int(i) for i in points.split()[:2]]
                html += '<text x="{}" y="{}" fill="yellow">{}</text>'.format(
                    x, y, bbox_categories[seg_id]["name"])
                fill_color = poly_colors[seg_id]
                stroke_color = poly_colors[seg_id]
                html += '<polygon points="{}" style="fill:{}; stroke:{}; stroke-width:1; fill-opacity:0" />'.format(
                    points, fill_color, stroke_color)

        html += '</svg></div>'
        html += '</div>'
        html += '<style>'
        html += '.svgclass { position:absolute; top:0px; left:0px;}'
        html += '</style>'
        return html

    def process_info(self):
        self.info = self.coco.get('info')

    def process_licenses(self):
        self.licenses = self.coco.get('licenses')

    def process_categories(self):
        self.categories = {}
        self.super_categories = {}
        for category in self.coco['categories']:
            cat_id = category['id']
            super_category = category['supercategory']

            # Add category to the categories dict
            if cat_id not in self.categories:
                self.categories[cat_id] = category
            else:
                print("ERROR: Skipping duplicate category id: {}".format(category))

            # Add category to super_categories dict
            if super_category not in self.super_categories:
                # Create a new set with the category id
                self.super_categories[super_category] = {cat_id}
            else:
                self.super_categories[super_category] |= {
                    cat_id}  # Add category id to the set

    def process_images(self):
        self.images = {}
        for image in self.coco['images']:
            image_id = image['id']
            
            if image_id in self.images:
                print("ERROR: Skipping duplicate image id: {}".format(image))
            else:
                self.images[image_id] = image
          
        #print("in process images", self.images)
        
        
        

    def process_segmentations(self):
        self.segmentations = {}
        for segmentation in self.coco['annotations']:
            image_id = segmentation['image_id']
            if image_id not in self.segmentations:
                self.segmentations[image_id] = []
            self.segmentations[image_id].append(segmentation)   

In [2]:
annotation_path = 'output_coco/coco_all_vf.json'
image_dir = ''

coco_dataset_all = CocoDataset(annotation_path, image_dir)
coco_dataset_all.display_info()
coco_dataset_all.display_licenses()
coco_dataset_all.display_categories()

Dataset Info:
  year: 2020
  version: None
  description: CleanOut
  contributor: sara.elateif@gmail.com
  url: labelbox.com
  date_created: 2020-09-20T08:39:41.252233+00:00
ERROR: version should be type <class 'str'>

Licenses:

Categories:
  super_category: mask
    id 1: mask

  super_category: tissue
    id 2: tissue

  super_category: glove
    id 3: glove

  super_category: sanitizer_gel
    id 4: sanitizer_gel



In [4]:
from collections import OrderedDict 
 
catgs = coco_dataset_all.coco['categories']
catgs
srtd  = sorted(catgs, key = lambda i: i['id']) 

catg_ids =  [ sub['id'] for sub in srtd ] 


ctg_list = [ sub['name'] for sub in srtd ] 
ctg_dir = [ sub.replace(' ', '') for sub in ctg_list]  
ctg_dir = [ sub.replace('/', '_') for sub in ctg_dir]
ctg_dir = [ sub.replace(",", '_') for sub in ctg_dir] 


ctg_dir = [ sub.replace('\'', '') for sub in ctg_dir]  
ctg_dirs = dict(zip(catg_ids, ctg_dir))
print('category dirs:', ctg_dirs)
ctg_pfx = []
print(len(ctg_dir))
print(ctg_dir)
for i in range(len(ctg_dir)):
    #ctg_pfx.append(  str(catg_ids[i]) + '_'+ctg_dir[i])
    ctg_pfx.append(  ctg_dir[i])

ctg_pfx
catg_pfx = dict(zip(catg_ids, ctg_pfx))
catg_pfx

category dirs: {1: 'mask', 2: 'tissue', 3: 'glove', 4: 'sanitizer_gel'}
4
['mask', 'tissue', 'glove', 'sanitizer_gel']


{1: 'mask', 2: 'tissue', 3: 'glove', 4: 'sanitizer_gel'}

In [5]:
from pathlib import Path
from tqdm import tqdm
import numpy as np
import json
import urllib
import PIL.Image as Image
import cv2
import torch
import torchvision
from IPython.display import display
from sklearn.model_selection import train_test_split

import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc

In [6]:
%matplotlib inline


def create_cons_dataset( dataset):
    
    for key in ctg_dirs:
        catg = ctg_dirs[key]
        tr = Path(f"tr")
        tr.mkdir(parents=True, exist_ok=True)
        catgy_path = Path(f"{tr}/{catg}")
        catgy_path.mkdir(parents=True, exist_ok=True)
        images_path = Path(f"{tr}/{catg}/images")
        images_path.mkdir(parents=True, exist_ok=True)


    catgs = dataset.coco['categories']
    
    imageslist = dataset.coco['images']
    print(': creating Dataset of images and labels for COCO...')
    i = 0
    noant_imgs= 0
    category=''
    for img in imageslist:
        imgid = img['id']
    
        if (imgid not in dataset.segmentations):
            noant_imgs = noant_imgs+1
            print( ': no annotations for image id', imgid)
            continue        
    
        a_list = dataset.segmentations[imgid]
        ctg_id = a_list[0]['category_id']
        if(category != catg_pfx[ctg_id ]):
            category = catg_pfx[ctg_id ]
            print('Converting ', category, '....')
        
        catg_name = catgs['id' == ctg_id]['name']
        cpfx = catg_pfx[ctg_id]
        
        images_path ='tr/'+cpfx+'/images'
        
        img_det = dataset.coco['images']['id'==imgid]
    
        imageid = img['coco_url']
    
        #image_name = f"{cpfx}_{i}.jpeg"
        image_name = f"{imgid}.jpeg"
        #print(image_name)
        img = urllib.request.urlopen(imageid)
        img = Image.open(img)
        img = img.convert('RGB')
        img.save(str(images_path+ '/'+ image_name), "JPEG")
        w = img_det['width']
        h = img_det['height']

        i= i+1
    print(catg, ': Finished Dataset of images and labels for COCO : count ',i+1 )
    print(catg, ': JSON does not have annotations for images: count ', noant_imgs )

In [7]:
create_cons_dataset(coco_dataset_all)

: creating Dataset of images and labels for COCO...
Converting  mask ....
Converting  tissue ....
Converting  mask ....
Converting  glove ....
Converting  mask ....
Converting  glove ....
Converting  mask ....
Converting  glove ....
Converting  mask ....
Converting  glove ....
Converting  mask ....
Converting  glove ....
Converting  mask ....
Converting  glove ....
Converting  mask ....
: no annotations for image id ckestehjm00043j5rrt4lfczv
Converting  glove ....
: no annotations for image id ckestivu600073j5rn56io4i0
: no annotations for image id ckestj6h100083j5rphidoe40
: no annotations for image id ckestjkfp00093j5ru3cjgfs4
: no annotations for image id ckestjoq9000a3j5rplqw01ls
Converting  mask ....
Converting  glove ....
: no annotations for image id ckestohsi000f3j5rsg0dia7f
Converting  mask ....
Converting  glove ....
Converting  mask ....
Converting  glove ....
Converting  mask ....
: no annotations for image id ckestwvaq000m3j5r6zhj20i0
Converting  glove ....
Converting  mas

Converting  glove ....
Converting  mask ....
: no annotations for image id ckf5o00cd000p3k5rru1e2lfl
Converting  glove ....
Converting  mask ....
Converting  tissue ....
Converting  mask ....
: no annotations for image id ckf5o9x39000x3k5rvsvu7h3n
: no annotations for image id ckf5obpwo000z3k5r5u3c1n71
Converting  glove ....
Converting  mask ....
Converting  glove ....
Converting  mask ....
: no annotations for image id ckf6j6ip000053b5zomuq0gnu
Converting  glove ....
Converting  mask ....
Converting  glove ....
: no annotations for image id ckf6jk73i00053b5zd7l7c7lk
Converting  mask ....
: no annotations for image id ckf6k5x08000a3b5zwpji8vhz
Converting  glove ....
: no annotations for image id ckf6kahmk000f3b5zoy3dy3oe
Converting  mask ....
Converting  glove ....
Converting  tissue ....
Converting  mask ....
Converting  glove ....
Converting  mask ....
Converting  glove ....
Converting  mask ....
: no annotations for image id ckf6klyku000c3b5zw8s9oflq
: no annotations for image id ck

Converting  glove ....
Converting  mask ....
Converting  glove ....
Converting  tissue ....
Converting  mask ....
Converting  glove ....
: no annotations for image id ckf9i5edc00073b5z8fimvic7
Converting  mask ....
: no annotations for image id ckf9i7x54000a3b5zyo61s8nw
: no annotations for image id ckf9ifxhm00013b5zih3xyfi2
: no annotations for image id ckf9ih90a00043b5zk7xvnkft
Converting  glove ....
Converting  mask ....
: no annotations for image id ckf9imb4v00023b5z0zp9ss17
Converting  sanitizer_gel ....
: no annotations for image id ckf9inc5b00043b5zwe04n84a
Converting  mask ....
Converting  glove ....
Converting  mask ....
: no annotations for image id ckf9ivb6y00013b5z58n1i6g0
: no annotations for image id ckf9ivik000023b5zmmzksvu6
: no annotations for image id ckf9ivnmi00033b5zh8huntff
Converting  tissue ....
Converting  mask ....
Converting  glove ....
: no annotations for image id ckf9iyteg00073b5zjebnmbzs
: no annotations for image id ckf9iyx1c00083b5zfbzrmogj
Converting  m

In [None]:
#!cp -r images/* ../../dataset/
#!ls ../../dataset/ | wc -l #4417

In [8]:
mkdir data

In [9]:
ls

Dockerfile                             [34mdata[m[m/
Loading_Updating_dataset.ipynb         exceptions.py
Makefile                               [34minput_json[m[m/
Pipfile                                main.py
Pipfile.lock                           [34moutput_coco[m[m/
README.md                              requirements.txt
Update_Categories_ids.ipynb            test_labeling_coco_image_viewer.ipynb
[34m__pycache__[m[m/                           [34mtr[m[m/
coco_exporter.py


In [11]:
from pathlib import Path
import os 

dst = Path(f"./data")
imgs_npfx = Path(f"./tr/images")

directories = []
for dir in os.listdir('./tr'):
  directories.append(dir)

In [12]:
dst = "./data/"

for path in directories: 
    ipath = './tr/' + path + '/images/*'
    os.system(f"cp -r {ipath} {dst}")