In [1]:
import os
import datetime
import errno

import numpy as np

import deepcell
from deepcell.utils.tracking_utils import load_trks

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [210]:
# The path to the data file is currently required for `train_model_()` functions

# Change DATA_DIR if you are not using `deepcell.datasets`

# DATA_FILE should be a trks file (contains 2 np arrays and a lineage dictionary)
DATA_FILE = "../22956814.trks"
DATA_DIR = '~'
# confirm the data file is available
assert os.path.isfile(DATA_FILE)

In [213]:
# Set up other required filepaths

# If the data file is in a subdirectory, mirror it in MODEL_DIR and LOG_DIR
PREFIX = 'test_0806'

ROOT_DIR = '../22956814_train_test'  # TODO: Change this! Usually a mounted volume
MODEL_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'models', PREFIX))
LOG_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'logs', PREFIX))

# create directories if they do not exist
for d in (MODEL_DIR, LOG_DIR):
    try:
        os.makedirs(d)
    except OSError as exc:  # Guard against race condition
        if exc.errno != errno.EEXIST:
            raise

In [214]:
from tensorflow.keras.optimizers import SGD
from deepcell.utils.train_utils import rate_scheduler

n_epoch = 10     # Number of training epochs
test_size = .20  # % of data saved as validation
train_seed = 1   # Random seed for training/validation data split

optimizer = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
lr_sched = rate_scheduler(lr=0.01, decay=0.99)

# Tracking training settings
features = {'appearance', 'distance', 'neighborhood', 'regionprop'}
min_track_length = 9
neighborhood_scale_size = 30
batch_size = 2  # changed  
crop_dim = 100  # changed
in_shape = (crop_dim, crop_dim, 1)

model_name = 'tracking_model_seed{}_tl{}'.format(train_seed, min_track_length)

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [216]:
import deepcell.image_generators as generators
from deepcell.utils.data_utils import get_data

# Get the data
train_dict, test_dict = get_data(DATA_FILE, mode='siamese_daughters',
                                 seed=train_seed, test_size=test_size)

# Build the generators and iterators
datagen_train = generators.SiameseDataGenerator(
    rotation_range=180, # randomly rotate images by 0 to rotation_range degrees
    shear_range=0,      # randomly shear images in the range (radians , -shear_range to shear_range)
    horizontal_flip=1,  # randomly flip images
    vertical_flip=1)    # randomly flip images

train_data = datagen_train.flow(
    test_dict,
    batch_size=batch_size,
    seed=train_seed,
    crop_dim=crop_dim,
    neighborhood_scale_size=neighborhood_scale_size,
    min_track_length=min_track_length,
    features=features)

datagen_test = generators.SiameseDataGenerator(
    rotation_range=0,  # randomly rotate images by 0 to rotation_range degrees
    shear_range=0,     # randomly shear images in the range (radians , -shear_range to shear_range)
    horizontal_flip=0, # randomly flip images
    vertical_flip=0)   # randomly flip images

test_data = datagen_test.flow(
    test_dict,
    batch_size=batch_size,
    seed=train_seed,
    crop_dim=crop_dim,
    neighborhood_scale_size=neighborhood_scale_size,
    min_track_length=min_track_length,
    features=features)

In [217]:
from deepcell import model_zoo

tracking_model = model_zoo.siamese_model(
    input_shape=in_shape,
    neighborhood_scale_size=neighborhood_scale_size,
    features=features)

In [218]:
from deepcell import losses

n_classes = tracking_model.layers[-1].output_shape[-1]

def loss_function(y_true, y_pred):
    return losses.weighted_categorical_crossentropy(y_true, y_pred,
                                                    n_classes=n_classes,
                                                    from_logits=False)

In [219]:
tracking_model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy'])

### Train the model

Call `fit_generator` on the compiled model, along with a default set of callbacks.

In [220]:
from deepcell.utils.train_utils import get_callbacks
from deepcell.utils.train_utils import count_gpus
from deepcell.utils import tracking_utils


model_path = os.path.join(MODEL_DIR, '{}.h5'.format(model_name))
loss_path = os.path.join(MODEL_DIR, '{}.npz'.format(model_name))

num_gpus = count_gpus()

print('Training on', num_gpus, 'GPUs.')

train_callbacks = get_callbacks(
    model_path,
    lr_sched=lr_sched,
    tensorboard_log_dir=LOG_DIR,
    save_weights_only=num_gpus >= 2,
    monitor='val_loss',
    verbose=1)

# rough estimate for steps_per_epoch
total_train_pairs = tracking_utils.count_pairs(train_dict['y'], same_probability=5.0)
total_test_pairs = tracking_utils.count_pairs(test_dict['y'], same_probability=5.0)

# fit the model on the batches generated by datagen.flow()
loss_history = tracking_model.fit_generator(
    train_data,
    steps_per_epoch=total_train_pairs // batch_size,
    epochs=n_epoch,
    validation_data=test_data,
    validation_steps=total_test_pairs // batch_size,
    callbacks=train_callbacks)

Training on 0 GPUs.




Epoch 1/10

Epoch 00001: val_loss improved from inf to 3.20191, saving model to /home/jupyter/22956814_train_test/models/test_0806/tracking_model_seed1_tl9.h5


In [None]:
Y

## Evaluate Model Performance

**Requires a Seed Value**

In [239]:
from sklearn.metrics import confusion_matrix

Y = []
Y_pred = []

for i in range(1,1000):
    if i % 100 == 0:
        print(".", end="")
    lst, y_true = next(test_data)
    y_true = np.argmax(y_true, axis=-1)
    y_pred = np.argmax(tracking_model.predict(lst), axis=-1)
    Y.append(y_true)
    Y_pred.append(y_pred)
    
#Y = np.concatenate(Y, axis=0)
#Y_pred = np.concatenate(Y_pred, axis=0)

print("")
cm = confusion_matrix(Y, Y_pred)
print(cm)

.........


ValueError: Classification metrics can't handle a mix of binary and multilabel-indicator targets