In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import sys
import os
sys.path.append('/content/drive/MyDrive/gcolab/bacteria_segmentation')
os.chdir('/content/drive/MyDrive/gcolab/bacteria_segmentation')

In [3]:
import cv2
import json
import numpy as np
import pandas as pd
import os
import shutil

## Constants:
- abbreviated labels — these labels will be used in future \\
- labels with full names (used in .json files descriptioning masks)
- paths to data and etc. (file extensions, ...)

In [11]:
# constants

LABELS = ['epidermidis', 'pneumoniae', 'aureus', 'moraxella', 'kefir', 'cloacae']
LABELS_FULL = ['staphylococcus_epidermidis', 'klebsiella_pneumoniae', 'staphylococcus_aureus',
               'moraxella_catarrhalis', 'c_kefir', 'ent_cloacae']
LABELS_DICT = dict(zip(LABELS_FULL, LABELS))


PATH_TRAIN_RAW = 'data/train_raw/'

PATH_TRAIN = 'data/train/'
MASKS_TRAIN_FOLDER_NAME = 'masks'

ANSWER_FILTER = '.json'
MASK_FORMAT = '.png'

## Create .png masks for each label by .json file
Also copy .png images of bacteria colonies to train folder

In [5]:
def create_masks(filename):
      with open(os.path.join(PATH_TRAIN_RAW, f'{filename}{ANSWER_FILTER}'), 'r') as f:
        layout = json.load(f)

      h, w = layout['imageHeight'], layout['imageWidth']

      masks = {label: np.zeros((h,w), np.uint8) for label in LABELS_FULL}
      mask_background = np.ones((h, w), np.uint8) * 255

      for shape in layout['shapes']:
        label = shape['label']
        
        polygon = np.array([point[::-1] for point in shape['points']])
        cv2.fillPoly(masks[label], [polygon[:, [1, 0]]], 255)

      # make background mask
      for mask_label in masks.values():
        mask_background[mask_label == 255] = 0

      shutil.copyfile(os.path.join(PATH_TRAIN_RAW, f'{filename}{MASK_FORMAT}'),
                      f'{PATH_TRAIN}{filename}{MASK_FORMAT}')

      masks_path = f'{PATH_TRAIN}{filename}_{MASKS_TRAIN_FOLDER_NAME}/'
      if not os.path.exists(masks_path):
        os.mkdir(masks_path)

      for label_full in LABELS_FULL:
        cv2.imwrite(os.path.join(masks_path, f'{filename}_{LABELS_DICT[label_full]}{MASK_FORMAT}'), masks[label_full])
      cv2.imwrite(os.path.join(masks_path, f'{filename}_background{MASK_FORMAT}'), mask_background)

      label_file = open(os.path.join(masks_path, f'{filename}_label.txt'), 'w')
      label_file.write(f'{LABELS_DICT[label]}')
      label_file.close()


## Process all data

In [6]:
def create_masks_for_all_files():
  files = os.listdir(PATH_TRAIN_RAW)
  files_jsons = list(filter(lambda x: x[-len(ANSWER_FILTER):].lower() == ANSWER_FILTER, files))

  for i, file in enumerate(files_jsons):
    
    if i % 10 == 0:
      print(f'done {i} of {len(files_jsons)}')

    create_masks(file[:-len(ANSWER_FILTER)])

In [12]:
create_masks_for_all_files()

done 0 of 261
done 10 of 261
done 20 of 261
done 30 of 261
done 40 of 261
done 50 of 261
done 60 of 261
done 70 of 261
done 80 of 261
done 90 of 261
done 100 of 261
done 110 of 261
done 120 of 261
done 130 of 261
done 140 of 261
done 150 of 261
done 160 of 261
done 170 of 261
done 180 of 261
done 190 of 261
done 200 of 261
done 210 of 261
done 220 of 261
done 230 of 261
done 240 of 261
done 250 of 261
done 260 of 261
