
Model Training
====================

First we import all the functions. This Notebook will allow us to grab files for the training of a new model.

In [30]:
import tensorflow as tf
import numpy as np
from numpy import random
import os
from tqdm import tqdm
import cv2
from shutil import copy

import mask_prediction.unet_semantics as model_setup
import mask_prediction.data_retrievals as data
import mask_prediction.apply_weights as weights

from glob import glob

print('Imports succesful!')

Imports succesful!


________________
Initialising
---------
- First the train and test model folders are set up.
- We're setting up the environment, and initialise some file names.


In [31]:
##### GPU setup #####
os.environ["CUDA_VISIBLE_DEVICES"]="0"    #chose which GPU to use (0-3)

##### CPU setup #####
os.environ['MKL_NUM_THREADS'] = '20'     #chose the amount of CPUs to use, idk if this is working right now?
os.environ['GOTO_NUM_THREADS'] = '20'    #however, maybe works if GPU is not present/working
os.environ['OMP_NUM_THREADS'] = '20'
os.environ['openmp'] = 'True'

Training parameters
--
The model can be trained both on manually annotated masks, and partially annotated masks, based on a paper by Qu et. al.
Another option is to use secondary data in the training set or not. By default this will look up the Hoechst data in the datasets.
Preprocessing on the input data can also be done, by normalizing or even thresholding this secondary dataset.

To initialize the training set, a percentage of the training data can also be used to populate the testing folders.

In [32]:
##### General variables #####
IMG_SIZE = 1024     #size in pixels of 1 EM tile
BATCH_SIZE = 4          #How many images to take in at once on the GPU.

##### Parameters of this particular run #####
model_name = 'qu_base_em_predho'
ini_data_path = 'X:\\BEP_data\\'#File containing data structure
Ho_adjust = False               #Threshold the Hoechst for the purpose of amplification.
normalize = True                #Whether to normalize the secondary data coming in
using_weights = True           #Whether weights are used within the mask data
TRAIN_WITH_FM = True            #Use the FM data when training. Creates model with either 3 (True) or 1 (False) input channels.
PATIENCE = 50                   #How many epochs with no discernable difference in loss to wait before prematurely terminating training.
TRAIN_TEST_SPLIT = .8           #What percentage of available mask images will be used as training data. The inverse will be used as validation data.

##### Folders that will be used in this run #####

mask_folder = 'X:\\BEP_data\\RL012\\Manual Masks'       #Folder containing manually annotated masks
train_folder = 'X:\\BEP_data\\Train_set\\Train_masks\\1'#Folder from which the model will collect training images
test_folder = 'X:\\BEP_data\\Test_set\\Test_masks\\1'   #Folder from which the model will collect validation images
partial_annotation_folder = 'X:\\BEP_data\\Partial Annotation\\temp'
mask_list =  glob(mask_folder + '\\*.png')              #Glob string filtering which masks to take.

In [33]:
def choice(str_list, percent):
    random.shuffle(str_list)
    cut = int(len(str_list) * percent)
    return str_list[:cut], str_list[cut:]

if using_weights:
    ITERATION_COUNT = 4     #How many partial annotation iterations to go over before calling the model done.
    size_filter = 5e5       #area in pixels which has to cover the weights image for it to be used in training. (pixel values range from 0 to 255)
    pnt_ratio = .8          #ratio of blobs which will be used in the final image.

    radius_array = weights.get_radius_sample('X:\\BEP_data\\Train_set\\blobs\\1\\')
    mean_diam = np.mean(radius_array, dtype=int)

    point_mask_list = weights.convert_partial_annotation(mask_folder, partial_annotation_folder, mean_diam, IMG_SIZE=IMG_SIZE, pnt_ratio=pnt_ratio)


train_list, test_list = choice((mask_list, point_mask_list)[using_weights], TRAIN_TEST_SPLIT)

for image_name in glob(train_folder + '\\*.png'):
    os.remove(image_name)
for image_name in glob(test_folder + '\\*.png'):
    os.remove(image_name)

for train in train_list:
    copy(train, train_folder)
for test in test_list:
    copy(test, test_folder)

print('Training on {} images'.format(len(train_list)))
print('Testing on {} images'.format(len(test_list)))

Training on 28 images
Testing on 7 images


Next the model is trained.

In [None]:
model_export = 'X:\\BEP_Project\\Models\\{}'.format(model_name)
IMG_CHANNELS = (1,3)[TRAIN_WITH_FM]


model_setup.Train_Model(ini_data_path, model_export, IMG_WIDTH=IMG_SIZE, IMG_HEIGHT=IMG_SIZE, IMG_CHANNELS= IMG_CHANNELS,
                        BATCH_SIZE=BATCH_SIZE, patience=PATIENCE, normalize=normalize, using_weights=using_weights)