In [None]:
# general packages
import random 
import numpy as np
from collections import Counter
from datetime import date
import os

# packages for 3d visualization
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [10, 10]
from PIL import Image
from PIL.ImageOps import invert

# core packages
from skimage.draw import random_shapes

In [None]:
# core function to generate a random nuclei instance mask 

def generate_mask(size: tuple, numb_inst: tuple, size_inst: tuple, shape: str, out: str, counter: int, overlap: bool):
    # parameters: size of mask (in px), min and max number of instances, random seed, shape of instances (circle or ellipse), output folder path

    seed = random.choice(range(0,1000))
    images, labels = random_shapes(size, min_shapes = numb_inst[0], max_shapes = numb_inst[1],\
        shape = shape, random_seed = seed, min_size = size_inst[0], max_size = size_inst[1], allow_overlap = overlap)
    bw = np.asarray(images).copy()
    bw[bw < 255] = 0
    bw[bw < 255] = 0
    mask = Image.fromarray(bw)
    mask = invert(mask) # sets background as black and instances as white
    mask.save(f'{out}\img_{counter}.png')


    instance_number = (Counter(elem[0] for elem in labels))[shape]
    log = (f'Type: {shape}; Counter: {counter}; Density: {instance_number}; Mask successfully exported!')
    print(log)

    # TODO
    # allow a maximum overlap 
    # add noise at the borders, make the shapes less perfect 
    # do the same as a second channel for cell instances
    
    return mask, log, instance_number

In [None]:
# generate an user-specified number of nuclei mask images 

##################################################################################################################################

# number of masks randomly generated 
images = 10000

# mask size: tuple(x, y)
size = (1000, 1000)

# number of instances on mask: tuple(min, max)
numb_inst = (20, 120)

# shape: 'circle', 'ellipse', 'triangle'
shape = 'ellipse'

# size of the instances on mask: tuple(min, max)
size_inst = (30, 50)

# overlap of instances: boolean
overlap = True

# plot instances density distribution - boolean 
plot = False 

# output folder
out = "//research.files.med.harvard.edu/sysbio/MEGASON LAB/People/AntoineRuzette/Data"
folder_name = 'NeuralTube_2D_mask_distr'
folder_path = f'{out}/{folder_name}_{str(date.today())}'
log_path = f'{folder_path}/log.txt'
###################################################################################################################################    


if ~os.path.isdir(folder_path):
    os.mkdir(folder_path)

inst_numb_list = np.zeros(images)
for idx in range(images):
    slice_z, log, instance_number = generate_mask(size, numb_inst, size_inst, shape, folder_path, idx, overlap)
    inst_numb_list[idx] = int(instance_number)

    # write in log file
    with open(log_path, 'a') as log_file:
        log_file.write(log + '\n')

# plot instances density distribution 

if plot == True: 
    print(np.mean(inst_numb_list))

    plt.hist(inst_numb_list, bins = 50, density=True, facecolor='g', alpha=0.75)
    plt.xlabel('Number of instances')
    plt.ylabel('Density')
    plt.title('Distribution of the number of nuclei instances in mask, N = 10.000, bins = 50')
    plt.grid(True)
    plt.show()