In [1]:
import os
import datetime
import errno

import numpy as np

import deepcell
from deepcell.utils.tracking_utils import load_trks, trks_stats

In [2]:
# The path to the data file is currently required for `train_model_()` functions

# Change DATA_DIR if you are not using `deepcell.datasets`

# DATA_FILE should be a trks file (contains 2 np arrays and a lineage dictionary)
##DATA_FILE = "../trks_141923_repeat.trks"
DATA_FILE = "../train.npz"
TEST_FILE = '../test.npz'  ## Added

DATA_DIR = '~'
# confirm the data file is available
assert os.path.isfile(DATA_FILE)
#assert os.path.isfile(TEST_FILE)

In [3]:
#trks_stats('../train.trks')
#trks_stats('../test.trks')

In [4]:
# Set up other required filepaths

# If the data file is in a subdirectory, mirror it in MODEL_DIR and LOG_DIR
PREFIX = 'attempt_1'

ROOT_DIR = '../track_train'  # TODO: Change this! Usually a mounted volume
MODEL_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'models', PREFIX))
LOG_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'logs', PREFIX))

# create directories if they do not exist
for d in (MODEL_DIR, LOG_DIR):
    try:
        os.makedirs(d)
    except OSError as exc:  # Guard against race condition
        if exc.errno != errno.EEXIST:
            raise

In [15]:
import deepcell.image_generators as generators
from deepcell.utils.data_utils import get_data

# Get the data
#train_dict, test_dict = get_data(DATA_FILE, mode='siamese_daughters',
#                                 seed=train_seed, test_size=test_size)
train_dict, test_dict = get_data(DATA_FILE, mode='sample', seed=train_seed, test_size=test_size)

In [16]:
print(train_dict['X'].shape)
print(test_dict['X'].shape)

(396, 100, 530, 530, 1)
(44, 100, 530, 530, 1)


In [17]:
train_dict['X'].dtype

dtype('int32')

In [18]:
from tensorflow.keras.optimizers import SGD
from deepcell.utils.train_utils import rate_scheduler

n_epoch = 10    # Number of training epochs
test_size = .1  # % of data saved as validation
train_seed = 1   # Random seed for training/validation data split

optimizer = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
lr_sched = rate_scheduler(lr=0.01, decay=0.99)

# Tracking training settings
features = {'appearance', 'distance', 'neighborhood', 'regionprop'}
min_track_length = 30  ## 9
neighborhood_scale_size = 10  ## 30
batch_size = 16  # changed  ## 128  
crop_dim = 40  # changed  ## 32
in_shape = (crop_dim, crop_dim, 1)

model_name = 'tracking_model_seed{}_tl{}'.format(train_seed, min_track_length)

In [19]:
# Build the generators and iterators
datagen_train = generators.SiameseDataGenerator(
    rotation_range=0, # randomly rotate images by 0 to rotation_range degrees
    shear_range=0,      # randomly shear images in the range (radians , -shear_range to shear_range)
    horizontal_flip=0,  # randomly flip images
    vertical_flip=0)  # randomly flip images

In [None]:
train_data = datagen_train.flow(
    #test_dict,  # original
    train_dict,
    batch_size=batch_size,
    seed=train_seed,
    crop_dim=crop_dim,
    neighborhood_scale_size=neighborhood_scale_size,
    min_track_length=min_track_length,
    features=features)

In [None]:
datagen_test = generators.SiameseDataGenerator(
    rotation_range=0,  # randomly rotate images by 0 to rotation_range degrees
    shear_range=0,     # randomly shear images in the range (radians , -shear_range to shear_range)
    horizontal_flip=0, # randomly flip images
    vertical_flip=0)   # randomly flip images

test_data = datagen_test.flow(
    test_dict,
    batch_size=batch_size,
    seed=train_seed,
    crop_dim=crop_dim,
    neighborhood_scale_size=neighborhood_scale_size,
    min_track_length=min_track_length,
    features=features)

In [None]:
from deepcell import model_zoo

tracking_model = model_zoo.siamese_model(
    input_shape=in_shape,
    neighborhood_scale_size=neighborhood_scale_size,
    features=features)

In [None]:
from deepcell import losses

n_classes = tracking_model.layers[-1].output_shape[-1]

def loss_function(y_true, y_pred):
    return losses.weighted_categorical_crossentropy(y_true, y_pred,
                                                    n_classes=n_classes,
                                                    from_logits=False)

In [None]:
tracking_model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy'])

### Train the model

Call `fit_generator` on the compiled model, along with a default set of callbacks.

In [None]:
steps_per_epoch= train_dict['y'].shape[0] // batch_size
b = np.array(steps_per_epoch).astype(np.float32)
print(b.dtype)

In [None]:
from deepcell.utils.train_utils import get_callbacks
from deepcell.utils.train_utils import count_gpus
from deepcell.utils import tracking_utils


model_path = os.path.join(MODEL_DIR, '{}.h5'.format(model_name))
loss_path = os.path.join(MODEL_DIR, '{}.npz'.format(model_name))

num_gpus = count_gpus()

print('Training on', num_gpus, 'GPUs.')

train_callbacks = get_callbacks(
    model_path,
    lr_sched=lr_sched,
    tensorboard_log_dir=LOG_DIR,
    save_weights_only=num_gpus >= 2,
    monitor='val_loss',
    verbose=1)

# rough estimate for steps_per_epoch
#total_train_pairs = tracking_utils.count_pairs(train_dict['y'], same_probability=5.0)
#total_test_pairs = tracking_utils.count_pairs(test_dict['y'], same_probability=5.0)
steps_per_epoch= train_dict['y'].shape[0] // batch_size
validation_steps = test_dict['y'].shape[0] // batch_size

#print(steps_per_epoch.dtype)

In [None]:
# fit the model on the batches generated by datagen.flow()
loss_history = tracking_model.fit_generator(
    train_data,
    #steps_per_epoch=total_train_pairs // batch_size,
    steps_per_epoch=np.array(steps_per_epoch).astype(np.float32),
    validation_data=test_data,
    #validation_steps=total_test_pairs // batch_size,
    epochs=10,
    validation_steps = np.array(validation_steps).astype(np.float32),
    callbacks=train_callbacks)

In [None]:
len(train_dict['y'])//batch_size

## Evaluate Model Performance

**Requires a Seed Value**

In [None]:
# Rosary added
## Redefine test_data

train_dict, test_dict = get_data(TEST_FILE, mode='sample', seed=train_seed, test_size=0.95)

In [None]:
test_data = datagen_test.flow(
    test_dict,
    batch_size=batch_size,
    seed=train_seed,
    crop_dim=crop_dim,
    neighborhood_scale_size=neighborhood_scale_size,
    min_track_length=min_track_length,
    features=features)

In [None]:
lst, y_true = next(test_data)
tracking_model.predict(lst)

In [None]:
from sklearn.metrics import confusion_matrix

Y = []
Y_pred = []

for i in range(1,1000):
    if i % 100 == 0:
        print(".", end="")
    lst, y_true = next(test_data)
    y_true = np.argmax(y_true['classification'], axis=-1)
    y_pred = np.argmax(tracking_model.predict(lst), axis=-1)
    Y.append(y_true)
    Y_pred.append(y_pred)
    
Y = np.concatenate(Y, axis=0)
Y_pred = np.concatenate(Y_pred, axis=0)

print("")
cm = confusion_matrix(Y, Y_pred)
print(cm)

In [None]:
test_acc = sum(np.array(Y) == np.array(Y_pred)) / len(Y)
print('Accuracy across all three classes: ', test_acc)

# Normalize the diagonal entries of the confusion matrix
cm = cm.astype('float')/cm.sum(axis=1)[:, np.newaxis]
# Diagonal entries are the accuracies of each class
print('Accuracy for each individual class [Different, Same, Daughter]: ', cm.diagonal())