In [1]:
import os
import datetime
import errno

import numpy as np

import deepcell
from deepcell.utils.tracking_utils import load_trks

In [2]:
# The path to the data file is currently required for `train_model_()` functions

# Change DATA_DIR if you are not using `deepcell.datasets`

# DATA_FILE should be a trks file (contains 2 np arrays and a lineage dictionary)
##DATA_FILE = "../trks_141923_repeat.trks"
DATA_FILE = "../train.npz"
DATA_DIR = '~'
# confirm the data file is available
assert os.path.isfile(DATA_FILE)

In [3]:
# Set up other required filepaths

# If the data file is in a subdirectory, mirror it in MODEL_DIR and LOG_DIR
PREFIX = 'test'

ROOT_DIR = '../track_training/attempt_2'  # TODO: Change this! Usually a mounted volume
MODEL_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'models', PREFIX))
LOG_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'logs', PREFIX))

# create directories if they do not exist
for d in (MODEL_DIR, LOG_DIR):
    try:
        os.makedirs(d)
    except OSError as exc:  # Guard against race condition
        if exc.errno != errno.EEXIST:
            raise

In [5]:
from tensorflow.keras.optimizers import SGD
from deepcell.utils.train_utils import rate_scheduler

n_epoch = 3    # Number of training epochs
test_size = .20  # % of data saved as validation
train_seed = 1   # Random seed for training/validation data split

optimizer = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
lr_sched = rate_scheduler(lr=0.01, decay=0.99)

# Tracking training settings
features = {'appearance', 'distance', 'neighborhood', 'regionprop'}
min_track_length = 9
neighborhood_scale_size = 30
batch_size = 16  # changed  
crop_dim = 40  # changed
in_shape = (crop_dim, crop_dim, 1)

model_name = 'tracking_model_seed{}_tl{}'.format(train_seed, min_track_length)

In [7]:
import deepcell.image_generators as generators
from deepcell.utils.data_utils import get_data

# Get the data
#train_dict, test_dict = get_data(DATA_FILE, mode='siamese_daughters',
#                                 seed=train_seed, test_size=test_size)
train_dict, test_dict = get_data(DATA_FILE, mode='sample', seed=train_seed, test_size=test_size)

# Build the generators and iterators
datagen_train = generators.SiameseDataGenerator(
    rotation_range=20, # randomly rotate images by 0 to rotation_range degrees
    shear_range=0,      # randomly shear images in the range (radians , -shear_range to shear_range)
    horizontal_flip=0.2,  # randomly flip images
    vertical_flip=0.2)    # randomly flip images

train_data = datagen_train.flow(
    #test_dict,  # original
    train_dict,
    batch_size=batch_size,
    seed=train_seed,
    crop_dim=crop_dim,
    neighborhood_scale_size=neighborhood_scale_size,
    min_track_length=min_track_length,
    features=features)

datagen_test = generators.SiameseDataGenerator(
    rotation_range=0,  # randomly rotate images by 0 to rotation_range degrees
    shear_range=0,     # randomly shear images in the range (radians , -shear_range to shear_range)
    horizontal_flip=0, # randomly flip images
    vertical_flip=0)   # randomly flip images

test_data = datagen_test.flow(
    test_dict,
    batch_size=batch_size,
    seed=train_seed,
    crop_dim=crop_dim,
    neighborhood_scale_size=neighborhood_scale_size,
    min_track_length=min_track_length,
    features=features)

KeyboardInterrupt: 

In [6]:
from deepcell import model_zoo

tracking_model = model_zoo.siamese_model(
    input_shape=in_shape,
    neighborhood_scale_size=neighborhood_scale_size,
    features=features)

In [7]:
from deepcell import losses

n_classes = tracking_model.layers[-1].output_shape[-1]

def loss_function(y_true, y_pred):
    return losses.weighted_categorical_crossentropy(y_true, y_pred,
                                                    n_classes=n_classes,
                                                    from_logits=False)

In [8]:
tracking_model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy'])

### Train the model

Call `fit_generator` on the compiled model, along with a default set of callbacks.

In [9]:
from deepcell.utils.train_utils import get_callbacks
from deepcell.utils.train_utils import count_gpus
from deepcell.utils import tracking_utils


model_path = os.path.join(MODEL_DIR, '{}.h5'.format(model_name))
loss_path = os.path.join(MODEL_DIR, '{}.npz'.format(model_name))

num_gpus = count_gpus()

print('Training on', num_gpus, 'GPUs.')

train_callbacks = get_callbacks(
    model_path,
    lr_sched=lr_sched,
    tensorboard_log_dir=LOG_DIR,
    save_weights_only=num_gpus >= 2,
    monitor='val_loss',
    verbose=1)

# rough estimate for steps_per_epoch
#total_train_pairs = tracking_utils.count_pairs(train_dict['y'], same_probability=5.0)
#total_test_pairs = tracking_utils.count_pairs(test_dict['y'], same_probability=5.0)
steps_per_epoch=len(train_dict['y'])/batch_size
validation_steps = len(test_dict['y'])/batch_size

# fit the model on the batches generated by datagen.flow()
loss_history = tracking_model.fit_generator(
    train_data,
    #steps_per_epoch=total_train_pairs // batch_size,
    steps_per_epoch=steps_per_epoch,
    validation_data=test_data,
    #validation_steps=total_test_pairs // batch_size,
    epochs=10,
    validation_steps = validation_steps,
    callbacks=train_callbacks)

Training on 0 GPUs.




Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.08693, saving model to /home/jupyter/train_0812/models/test_0812_seq/tracking_model_seed1_tl9.h5
Epoch 2/10

Epoch 00002: val_loss improved from 0.08693 to 0.06254, saving model to /home/jupyter/train_0812/models/test_0812_seq/tracking_model_seed1_tl9.h5
Epoch 3/10

Epoch 00003: val_loss did not improve from 0.06254
Epoch 4/10

Epoch 00004: val_loss did not improve from 0.06254
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.06254
Epoch 6/10

Epoch 00006: val_loss improved from 0.06254 to 0.04692, saving model to /home/jupyter/train_0812/models/test_0812_seq/tracking_model_seed1_tl9.h5
Epoch 7/10

Epoch 00007: val_loss improved from 0.04692 to 0.04384, saving model to /home/jupyter/train_0812/models/test_0812_seq/tracking_model_seed1_tl9.h5
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.04384
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.04384
Epoch 10/10

Epoch 00010: val_loss improved from 0.04384 

In [10]:
batch_size

32

In [11]:
train_dict.keys()

dict_keys(['X', 'y'])

In [12]:
test_dict.keys()

dict_keys(['X', 'y'])

In [39]:
lst, y_true = next(test_data)

In [28]:
lst.keys()

dict_keys(['appearance_input1', 'appearance_input2', 'distance_input1', 'distance_input2', 'neighborhood_input1', 'neighborhood_input2', 'regionprop_input1', 'regionprop_input2'])

In [27]:
lst['appearance_input1'].shape

(32, 9, 100, 100, 1)

In [29]:
y_true.keys()

dict_keys(['classification'])

In [40]:
y_true["classification"].shape

(32, 3)

In [41]:
y_true_max = np.argmax(y_true['classification'], axis=-1)
y_true_max
#len(y_true['classification'])

array([0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 0, 1, 0, 0, 0, 1, 0, 1, 0])

In [17]:
# rm
a = np.arange(6).reshape(2,3) + 10
a

array([[10, 11, 12],
       [13, 14, 15]])

In [18]:
np.argmax(a, axis=-1)

array([2, 2])

## Evaluate Model Performance

**Requires a Seed Value**

In [43]:
from sklearn.metrics import confusion_matrix

Y = []
Y_pred = []

for i in range(1,1000):
    if i % 100 == 0:
        print(".", end="")
    lst, y_true = next(test_data)
    y_true = np.argmax(y_true['classification'], axis=-1)
    y_pred = np.argmax(tracking_model.predict(lst), axis=-1)
    Y.append(y_true)
    Y_pred.append(y_pred)
    
Y = np.concatenate(Y, axis=0)
Y_pred = np.concatenate(Y_pred, axis=0)

print("")
cm = confusion_matrix(Y, Y_pred)
print(cm)

.........
[[11101     0]
 [    0 20117]]


In [33]:
len(Y_pred)

99

In [35]:
len(Y)

99

In [36]:
Y_pred[0:10]

[array([0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1,
        1, 0, 0, 0, 0, 1, 1, 1, 1, 1]),
 array([0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0,
        1, 1, 1, 1, 1, 1, 1]),
 array([1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
        1, 0, 0, 1, 1, 1, 0, 0, 1, 0]),
 array([1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1,
        1, 1, 0, 0, 1, 1, 0, 1, 1, 1]),
 array([1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 0, 0, 0, 0]),
 array([1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0,
        1, 1, 1, 1, 0, 1, 0]),
 array([0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
        0, 0, 0, 1, 0, 1, 0, 1, 0, 1]),
 array([0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 0, 1, 0, 0, 1, 1, 1, 1]),
 array([1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0,
        1, 1, 1, 0, 0, 0, 1, 1, 

In [None]:
Y_pred_new = np.concatenate(Y_pred, axis=0)

In [None]:
np.unique(Y_pred_new)

In [None]:
test_acc = sum(np.array(Y) == np.array(Y_pred)) / len(Y)
print('Accuracy across all three classes: ', test_acc)

# Normalize the diagonal entries of the confusion matrix
cm = cm.astype('float')/cm.sum(axis=1)[:, np.newaxis]
# Diagonal entries are the accuracies of each class
print('Accuracy for each individual class [Different, Same, Daughter]: ', cm.diagonal())