In [1]:
import os
import datetime
import errno

import numpy as np

import deepcell
from deepcell.utils.tracking_utils import load_trks, trks_stats

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [2]:
import torch
use_cuda = torch.cuda.is_available()

In [3]:
from tensorflow.python.client import device_lib
devices = device_lib.list_local_devices()
devices

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 15714735188428714538]

In [4]:
# The path to the data file is currently required for `train_model_()` functions

# Change DATA_DIR if you are not using `deepcell.datasets`

# DATA_FILE should be a trks file (contains 2 np arrays and a lineage dictionary)
##DATA_FILE = "../trks_141923_repeat.trks"
DATA_FILE = "../train.npz"
TEST_FILE = '../test.npz'  ## Added

DATA_DIR = '~'
# confirm the data file is available
assert os.path.isfile(DATA_FILE)
#assert os.path.isfile(TEST_FILE)

In [8]:
trks_stats('../train.trks')
trks_stats('../test.trks')

Dataset Statistics: 
Image data shape:  (440, 100, 530, 530, 1)
Number of lineages (should equal batch size):  440
Total number of unique tracks (cells)      -  988
Total number of divisions                  -  0
Average cell density (cells/100 sq pixels) -  0.0004974271012006861
Average number of frames per track         -  75
Dataset Statistics: 
Image data shape:  (66, 100, 530, 530, 1)
Number of lineages (should equal batch size):  66
Total number of unique tracks (cells)      -  174
Total number of divisions                  -  0
Average cell density (cells/100 sq pixels) -  0.0004903071296805722
Average number of frames per track         -  64


In [9]:
# Set up other required filepaths

# If the data file is in a subdirectory, mirror it in MODEL_DIR and LOG_DIR
PREFIX = 'test_0814'

ROOT_DIR = '../train_0814'  # TODO: Change this! Usually a mounted volume
MODEL_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'models', PREFIX))
LOG_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'logs', PREFIX))

# create directories if they do not exist
for d in (MODEL_DIR, LOG_DIR):
    try:
        os.makedirs(d)
    except OSError as exc:  # Guard against race condition
        if exc.errno != errno.EEXIST:
            raise

In [10]:
from tensorflow.keras.optimizers import SGD
from deepcell.utils.train_utils import rate_scheduler

n_epoch = 10    # Number of training epochs
test_size = .1  # % of data saved as validation
train_seed = 1   # Random seed for training/validation data split

optimizer = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
lr_sched = rate_scheduler(lr=0.01, decay=0.99)

# Tracking training settings
features = {'appearance', 'distance', 'neighborhood', 'regionprop'}
min_track_length = 9
neighborhood_scale_size = 30
batch_size = 16  # changed  
crop_dim = 100  # changed
in_shape = (crop_dim, crop_dim, 1)

model_name = 'tracking_model_seed{}_tl{}'.format(train_seed, min_track_length)

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [11]:
import deepcell.image_generators as generators
from deepcell.utils.data_utils import get_data

# Get the data
#train_dict, test_dict = get_data(DATA_FILE, mode='siamese_daughters',
#                                 seed=train_seed, test_size=test_size)
train_dict, test_dict = get_data(DATA_FILE, mode='sample', seed=train_seed, test_size=test_size)

In [12]:
print(train_dict['X'].shape)
print(test_dict['X'].shape)

(396, 100, 530, 530, 1)
(44, 100, 530, 530, 1)


In [13]:
# Build the generators and iterators
datagen_train = generators.SiameseDataGenerator(
    rotation_range=0, # randomly rotate images by 0 to rotation_range degrees
    shear_range=0,      # randomly shear images in the range (radians , -shear_range to shear_range)
    horizontal_flip=0,  # randomly flip images
    vertical_flip=0)    # randomly flip images

train_data = datagen_train.flow(
    #test_dict,  # original
    train_dict,
    batch_size=batch_size,
    seed=train_seed,
    crop_dim=crop_dim,
    neighborhood_scale_size=neighborhood_scale_size,
    min_track_length=min_track_length,
    features=features)

In [14]:
datagen_test = generators.SiameseDataGenerator(
    rotation_range=0,  # randomly rotate images by 0 to rotation_range degrees
    shear_range=0,     # randomly shear images in the range (radians , -shear_range to shear_range)
    horizontal_flip=0, # randomly flip images
    vertical_flip=0)   # randomly flip images

test_data = datagen_test.flow(
    test_dict,
    batch_size=batch_size,
    seed=train_seed,
    crop_dim=crop_dim,
    neighborhood_scale_size=neighborhood_scale_size,
    min_track_length=min_track_length,
    features=features)

In [15]:
from deepcell import model_zoo

tracking_model = model_zoo.siamese_model(
    input_shape=in_shape,
    neighborhood_scale_size=neighborhood_scale_size,
    features=features)

In [16]:
from deepcell import losses

n_classes = tracking_model.layers[-1].output_shape[-1]

def loss_function(y_true, y_pred):
    return losses.weighted_categorical_crossentropy(y_true, y_pred,
                                                    n_classes=n_classes,
                                                    from_logits=False)

In [17]:
tracking_model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy'])

### Train the model

Call `fit_generator` on the compiled model, along with a default set of callbacks.

In [21]:
steps_per_epoch= train_dict['y'].shape[0] // batch_size
b = np.array(steps_per_epoch).astype(np.float32)
print(b.dtype)

float32


In [23]:
from deepcell.utils.train_utils import get_callbacks
from deepcell.utils.train_utils import count_gpus
from deepcell.utils import tracking_utils


model_path = os.path.join(MODEL_DIR, '{}.h5'.format(model_name))
loss_path = os.path.join(MODEL_DIR, '{}.npz'.format(model_name))

num_gpus = count_gpus()

print('Training on', num_gpus, 'GPUs.')

train_callbacks = get_callbacks(
    model_path,
    lr_sched=lr_sched,
    tensorboard_log_dir=LOG_DIR,
    save_weights_only=num_gpus >= 2,
    monitor='val_loss',
    verbose=1)

# rough estimate for steps_per_epoch
#total_train_pairs = tracking_utils.count_pairs(train_dict['y'], same_probability=5.0)
#total_test_pairs = tracking_utils.count_pairs(test_dict['y'], same_probability=5.0)
steps_per_epoch= train_dict['y'].shape[0] // batch_size
validation_steps = test_dict['y'].shape[0] // batch_size

#print(steps_per_epoch.dtype)

# fit the model on the batches generated by datagen.flow()
loss_history = tracking_model.fit_generator(
    train_data,
    #steps_per_epoch=total_train_pairs // batch_size,
    steps_per_epoch=np.array(steps_per_epoch).astype(np.float32),
    validation_data=test_data,
    #validation_steps=total_test_pairs // batch_size,
    epochs=10,
    validation_steps = np.array(validation_steps).astype(np.float32),
    callbacks=train_callbacks)

Training on 0 GPUs.
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.29009, saving model to /home/jupyter/train_0814/models/test_0814/tracking_model_seed1_tl9.h5
Epoch 2/10

Epoch 00002: val_loss did not improve from 0.29009
Epoch 3/10

Epoch 00003: val_loss improved from 0.29009 to 0.08646, saving model to /home/jupyter/train_0814/models/test_0814/tracking_model_seed1_tl9.h5
Epoch 4/10

Epoch 00004: val_loss improved from 0.08646 to 0.05790, saving model to /home/jupyter/train_0814/models/test_0814/tracking_model_seed1_tl9.h5
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.05790
Epoch 6/10

Epoch 00006: val_loss improved from 0.05790 to 0.03474, saving model to /home/jupyter/train_0814/models/test_0814/tracking_model_seed1_tl9.h5
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.03474
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.03474
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.03474
Epoch 10/10

Epoch 00010: val_loss did not improve fr

In [24]:
len(train_dict['y'])//batch_size

24

## Evaluate Model Performance

**Requires a Seed Value**

In [50]:
# Rosary added
## Redefine test_data

train_dict, test_dict = get_data(TEST_FILE, mode='sample', seed=train_seed, test_size=0.95)

In [51]:
test_data = datagen_test.flow(
    test_dict,
    batch_size=batch_size,
    seed=train_seed,
    crop_dim=crop_dim,
    neighborhood_scale_size=neighborhood_scale_size,
    min_track_length=min_track_length,
    features=features)

In [52]:
lst, y_true = next(test_data)
tracking_model.predict(lst)

array([[2.2238530e-02, 9.7749251e-01, 2.6892775e-04],
       [6.5876590e-03, 9.8986036e-01, 3.5520762e-03],
       [4.0780385e-03, 9.9536538e-01, 5.5665779e-04],
       [9.4052845e-01, 5.7981577e-02, 1.4899881e-03],
       [1.1098699e-01, 8.8694257e-01, 2.0703997e-03],
       [7.9518398e-03, 9.9151236e-01, 5.3578313e-04],
       [1.4865002e-02, 9.8447084e-01, 6.6412718e-04],
       [2.8137362e-02, 9.7079873e-01, 1.0639080e-03],
       [6.1087590e-03, 9.9196047e-01, 1.9307449e-03],
       [1.2552477e-02, 9.8624194e-01, 1.2055965e-03],
       [1.5114242e-02, 9.8393548e-01, 9.5031550e-04],
       [1.4350423e-01, 8.5544914e-01, 1.0466333e-03],
       [8.5899487e-02, 9.1268992e-01, 1.4105946e-03],
       [2.3103507e-02, 9.7575164e-01, 1.1448293e-03],
       [5.9303150e-02, 9.3952590e-01, 1.1709327e-03],
       [1.0987557e-02, 9.8722410e-01, 1.7883488e-03]], dtype=float32)

In [53]:
from sklearn.metrics import confusion_matrix

Y = []
Y_pred = []

for i in range(1,1000):
    if i % 100 == 0:
        print(".", end="")
    lst, y_true = next(test_data)
    y_true = np.argmax(y_true['classification'], axis=-1)
    y_pred = np.argmax(tracking_model.predict(lst), axis=-1)
    Y.append(y_true)
    Y_pred.append(y_pred)
    
Y = np.concatenate(Y, axis=0)
Y_pred = np.concatenate(Y_pred, axis=0)

print("")
cm = confusion_matrix(Y, Y_pred)
print(cm)

.........
[[4827  437]
 [   0 9095]]


In [54]:
test_acc = sum(np.array(Y) == np.array(Y_pred)) / len(Y)
print('Accuracy across all three classes: ', test_acc)

# Normalize the diagonal entries of the confusion matrix
cm = cm.astype('float')/cm.sum(axis=1)[:, np.newaxis]
# Diagonal entries are the accuracies of each class
print('Accuracy for each individual class [Different, Same, Daughter]: ', cm.diagonal())

Accuracy across all three classes:  0.9695661257747754
Accuracy for each individual class [Different, Same, Daughter]:  [0.91698328 1.        ]
