# Rename and Convert Apeer mask annotations for COCO convertion and Detectron2 network training. 

<div class="alert alert-block alert-warning">
<b>Attention:</b> Update Data Directories !
</div>

## Get data from Apeer

#### File structure requested by PyCocoCreator

A good read explains why COCO format is needed and how to convert masks to COCO [here](https://patrickwasp.com/create-your-own-coco-style-dataset/). More info on this [github](https://github.com/waspinator/pycococreator/) repository.


### Structure needed: 

One directory with .jpg images (tiles) 

One directory with .tiff images, masks from Apeer. 

Rename the images and place them in a folder like the following: 

```

└── particles
    └── train
        ├── annotations
        │   │ <image_id>_<object_class_name>_<annotation_id>.png
        └── particles_train2022
        │   │   <image_id>.jpeg
```

**IMPORTANT:** the image_id and annotation_id shouldn't contain characters like "\_" or "-". This will cause problems with the split functions of pycococreator that need to be adapted. 

##### Server Structure

Structure present on the server. For each new trial, the "particles" directory is renamed, and the current one remains named "particles". 

```
└── 1_IMPTOX 
    └── TransferLearning
        └── 0_data
            └── particles
                └── train
                    └── annotations : contains the .PNG masks
                    └── annotations_tiff : contains the tiff annotations from Apeer
                    └── particles_train2022 : contains the .jpg images matching the Annotation name as above
        
        └── 1_keras_detectron_convertion
            └── h5_pth_convertion.ipynb (this notebook)
            └── model.h5 : keras model from Apeer (useless, as seen above)

```

#### Rename, move and convert files from Apeer into the correct file structure

This job is done manually depending the usecase. Test before applying !





### Split masks into one image per particle

Separate the binary images into one image per connected component

Source: https://stackoverflow.com/questions/71627178/how-to-split-image-to-objects-islands-using-opencv



In [None]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow import keras
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import rcParams
import re
import cv2
from PIL import Image
import detectron2
import scipy.ndimage # fill holes

In [None]:
full_mask_dir = "../0_data/CocoConvertion/particles/train/annotations_full" # One binary image per tile
split_mask_dir = "../0_data/CocoConvertion/particles/train/annotations" # One binary image per particle (~270K) 

def get_islands(img):
    n, labels = cv2.connectedComponents(img.astype('uint8'))
    islands = [labels == i for i in range(1, n)]
    return islands



fullmaskimg_list = [img for img in os.listdir(full_mask_dir)]





for i, fullmaskimg in enumerate(fullmaskimg_list):
    print("---------------------------------")
    print(f"     Processing mask {i}")
    print("---------------------------------")
    
    
    img = cv2.imread(os.path.join(full_mask_dir, fullmaskimg), cv2.IMREAD_GRAYSCALE)
    print(f"Image: {fullmaskimg}")
    print(f"Image dimention: {img.shape}")
    
    # Get list of images, containing one island (particle) each.
    islands = get_islands(img)
    
    # For each island, save a separate image file
    for j, island in enumerate(islands):
        print(f"==== Processing island {j} / {len(islands)} ====")
        # This part should be valid if the pycococreator naming is respected
        new_name = fullmaskimg.split("_")[0] + "_" +fullmaskimg.split("_")[1] + "_p" + str(j) + ".PNG"
        island_img_path = os.path.join(split_mask_dir, new_name)
        print(f"Island {j} path: {island_img_path}")
        
        # Save image
        img_island = Image.fromarray(island)
        img_island.save(island_img_path, quality=100)
        
# If necessary delete the huge output fromt he terminal:
# https://janakiev.com/blog/jupyter-git-remove-output/ 
    


### Rename images


#### Functions



In [None]:
def newname_images_listv2(imglist):
    """Returns dict with ID, and old and new filenames. Adapt the function according to the original files names."""
    image_lib = {}
    for filename in imglist:
        #print(filename)
        idtmp = re.split("_auto_generated_tile", filename)
        
        s1 = idtmp[0] 
        s2 = "-original"
        if idtmp[1] != "_":
            s2 = idtmp[1]
        s1 = s1 + s2
        new_name = re.sub("_", "-", s1)
        
        id = re.sub("_", "-", s1)
        id = re.sub(".jpg", "", id)
        
        image_lib[id] = {"old_name": filename, "new_name":new_name, "id":id}
        
        
    return image_lib

# customise depending on the usecase. See above.
def newname_images_list(imglist):
    """Returns dict with ID, and old and new filenames"""
    image_lib = {}
    return image_lib


def rename_file(old_filename, new_filename, directory, test=True):
    """Rename the file given in #1 to #2 in directory #3. Test by default. """
    old_path = os.path.join(directory, old_filename)
    new_path = os.path.join(directory, new_filename)
    
    if old_path != new_path:
        if(test): 
            print("TEST")
            print(f"===>>> File \n\t{old_path} to \n\t{new_path}")
            return
        else:
            os.rename(old_path, new_path)

    return 


#### Apply functions 

Test first !

In [None]:

imgdir = "../0_data/CocoConvertion/particles/train/particles_train2022"
imglist = [img for img in os.listdir(imgdir)]
#print(imglist)
img_dict = newname_images_listv2(imglist)
print(img_dict)
# Rename all the annotation files
for id in img_dict:
    old_name = img_dict[id]['old_name']
    new_name = img_dict[id]['new_name']
    rename_file(old_name, new_name, imgdir, test=True) # Set test to True to just show new and old name
    


In [None]:
# Second run to correct the mess
def newname_images_listv2(imglist):
    """Returns dict with ID, and old and new filenames. Adapt the function according to the original files names."""
    image_lib = {}
    for filename in imglist:
        #print(filename)
        
        new_name = re.sub("-", "x", filename)
        
        id = re.sub(".jpg","", new_name)
        
        image_lib[id] = {"old_name": filename, "new_name":new_name, "id":id}
        
        
    return image_lib


imgdir = "../0_data/CocoConvertion/particles/train/particles_train2022"
imglist = [img for img in os.listdir(imgdir)]
#print(imglist)
img_dict = newname_images_listv2(imglist)
#print(img_dict)
# Rename all the annotation files
test = True
for id in img_dict:
    old_name = img_dict[id]['old_name']
    new_name = img_dict[id]['new_name']
    rename_file(old_name, new_name, imgdir, test=test) # Set test to True to just show new and old name
    if test:
        break

### Rename annotation files

In [None]:
def newname_prediction_list(imglist, clas = "particle"):
    """Returns dict with id and the old and new file names. 
    According to pycococreator naming convention.
    """
    #print(imglist[0:12]) #10_19_auto_generated_tile_flip_finalprediction.ome.tiff
    image_lib = {}
    for filename in imglist:
        #print(filename)
        idtmp = re.split("_auto_generated_tile_", filename)
        
        s1 = idtmp[0] 
        s2 = idtmp[1]
        
        s2 = re.sub("finalprediction.ome", "", s2)
        s2 = re.sub("_", "", s2)
        
        
        if s2 == ".tiff":
            s2 = "original.tiff"
        
        s1 = s1 + "-" +s2
        s1 = re.sub("_", "-", s1)
        
        # Actually not present in images
        s1 = re.sub("-original", "", s1)
        
        new_name = s1
        id = re.sub(".tiff", "", s1)
        #s = re.sub("finalprediction.ome.tiff", "", s)
        #new_name = "Sample" + s + "_" + clas + "_PredSample" + s + ".tiff"
        ##print(filename + " - " + new_name)
        image_lib[s1] = {"old_name":filename, "new_name":new_name, "id":id}
    
    return(image_lib)

In [None]:
tifdir = "../0_data/CocoConvertion/particles/train/annotations_tiff"

# Image files list
annotlist = [img for img in os.listdir(tifdir)]

# Dict with ID, old names and new names
annot_dict = newname_prediction_list(annotlist)
#print(annot_dict)

# Rename all the annotation files
for id in annot_dict:
    old_name = annot_dict[id]['old_name']
    new_name = annot_dict[id]['new_name']
    rename_file(old_name, new_name, tifdir, test=True)


In [None]:
# Second run because I forgot stuff, so I have to rename all the PNG
pngdir = "../0_data/CocoConvertion/particles/train/annotations"
annotlist = [img for img in os.listdir(pngdir)]

def newname_prediction_list(imglist, clas = "particle"):
    """Returns dict with id and the old and new file names. 
    According to pycococreator naming convention.
    """
    #print(imglist[0:12]) #10_19_auto_generated_tile_flip_finalprediction.ome.tiff
    image_lib = {}
    for filename in imglist:
        
        basename = os.path.splitext(os.path.basename(filename))[0]
        new_basename = re.sub("-", "x", basename)
        
        id = new_basename # in this case
        new_name = id + ".PNG"
        
        image_lib[id] = {"old_name":filename, "new_name":new_name, "id":id}
    
    return(image_lib)

# Dict with ID, old names and new names
annot_dict = newname_prediction_list(annotlist)
#print(annot_dict)

# Rename all the annotation files
test = True
for id in annot_dict:
    old_name = annot_dict[id]['old_name']
    new_name = annot_dict[id]['new_name']
    rename_file(old_name, new_name, pngdir, test=test)
    if test:
        break

# Convert images and create structure for pycococreator



### Explore the tiff files

The multiple images in the stack represent opposite masks (background vs particles), we need to take the right one.



In [None]:
tiffdir = "../0_data/CocoConvertion/particles/train/annotations_tiff"
src = [os.path.join(tiffdir,tifffilename) for tifffilename in os.listdir(tiffdir) if ".tiff" in tifffilename]
print(src)

for im in src[0:20]:
    # Analyse tiff files
    print(im)
    im = cv2.imread(im)
    print(im.shape)
    print(np.unique(im))
    #fig, ax = plt.subplots(1,3)
    #ax[0].imshow(im[:,:,0])
    #ax[1].imshow(im[:,:,1])
    #ax[2].imshow(im[:,:,2])
    #plt.show()

### Get the frame matching with the particles (not the background)

Convert the frame to png. The frame [0] matches the background and the frame[1] the particles.

In [None]:
import tifffile
# https://forum.image.sc/t/tiff-file-that-can-be-opened-in-fiji-but-not-python-matlab-due-to-offset-related-error/59483


def get_tiff_stack(impath):
    """Returns a np array with the different stacks"""
    with tifffile.TiffFile(impath) as tif:
        frames = []
        try:
            for page in tif.pages:
                frames.append(page.asarray())

        except Exception:
            pass

    return(np.stack(frames))

### Fill particles holes and save file to .PNG

Error with holes

Currently (and most likely forever) Pycocotools does not support when there are holes in the masks. It is not yet clear if the holes in the particles cause problems or not with the coco annotations. 

In [None]:
#print(stack[1])
#plt.imshow(stack[1])
#plt.show()


tiff_dir = "../0_data/CocoConvertion/particles/train/annotations_tiff"
conv_dir = "../0_data/CocoConvertion/particles/train/annotations"

tiff_files = [tifffilename for tifffilename in os.listdir(tiff_dir) if ".tiff" in tifffilename]

for tiff_file in tiff_files:
    #print(tiff_file)
    file_basename = os.path.basename(tiff_file).split('.')[0]
    
    # Get particles layer
    particle_stack = get_tiff_stack(os.path.join(tiff_dir, tiff_file))[1]
    
    #print(particle_stack.shape)
    
    # Fill holes to avoid (or not) problems with coco format
    particle_stack = scipy.ndimage.binary_fill_holes(particle_stack)
    
    im = Image.fromarray(particle_stack)
    #im.thumbnail(particle_stack.size)
    save_path = os.path.join(conv_dir, file_basename + ".PNG")
    print("Save to: ", save_path)
    im.save(save_path, quality=100)
    
    #plt.imshow(im)
    #plt.show()