In [8]:
!nvidia-smi

Mon Aug 12 13:46:58 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   73C    P0   159W / 250W |  16212MiB / 16280MiB |     93%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [3]:
from __future__ import absolute_import, division, print_function, unicode_literals
%matplotlib inline
#%load_ext autoreload
#%autoreload 2
import tensorflow as tf
from tensorflow import keras

import DataGenerator as DG
from DataGenerator import DataGenerator

from tensorflow.keras import layers
from tensorflow.keras.utils import multi_gpu_model
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras import backend as K
#from keras_contrib.layers import crf
#from keras_contrib.losses import crf_loss
#from keras_contrib.metrics import crf_viterbi_accuracy
#Progress bar fix: use callbacks=[Logger.JupyterProgbarLogger()] in fit method
#verbose=0 is also required
import JupyterProgbarLogger as Logger
from tqdm import tqdm_notebook as tqdm
#from kerastuner.tuners import RandomSearch


import sklearn.metrics as metrics

import numpy as np
import random
import math

from datetime import datetime
import os
from shutil import copy
from functools import partial

import matplotlib.pyplot as plt

import h5py

import multiprocessing as mp

mp.set_start_method("spawn",force=True)
os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
###FIX NUMPY LOAD FOR DICTIONARIES
np_load_old = np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

###Tensorflow session
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config))

In [4]:
#Data Variables
BATCH_SIZE=32
DATA_AMOUNT=100000
VALIDATION_AMOUNT=30000
OFFSET_AMOUNT=300000
VAL_OFFSET = 800100
USE_TUNER = False
#Broken
USE_CRF = False
#frames per sample / 3rd dimension for 3D CNN
depth=10
#Data file
filepath = "images_synthetic_mouse_uncompressed.h5"
#If you want to copy the file to a scratch directory
scratch_dir = "/mnt/disks/sec/Amit/"#"/n/scratch2/ap487/"
#Copy file to scratch directory if it is specified, and change filepath to match.
if not os.path.exists((scratch_dir+filepath)):
    copy(filepath,scratch_dir)
    print("File Copied to scratch directory")
filepath=scratch_dir+filepath
print("Using path: ",filepath)

Using path:  /mnt/disks/sec/Amit/images_synthetic_mouse_uncompressed.h5


In [5]:
def conv_block(x,
               num_layers,
               num_filters,
               block_id,
               conv_parameters,
               kernel=(3,3),
               padding='same',
               activation=layers.Activation('relu')):
    block_id = str(block_id)
    for i in range(0,num_layers):
        x = layers.TimeDistributed(layers.Conv2D(num_filters, kernel, **conv_parameters),name='block' + block_id+'_conv2d')(x)
    x = layers.TimeDistributed(layers.BatchNormalization(),name='block' + block_id+'_batchnormalization')(x)
    x = layers.TimeDistributed(activation,name='block' + block_id+'_activation')(x)
    return x
def pooling_block(x,
                  block_id,
                  padding='same',
                  activation=layers.Activation('relu')):
    block_id = str(block_id)
    x = layers.TimeDistributed(layers.MaxPooling2D((2, 2), padding=padding),name='pooling' + block_id+'_maxpooling2d')(x)
    x = layers.TimeDistributed(layers.BatchNormalization(),name='pooling' + block_id+'_batchnormalization')(x)
    x = layers.TimeDistributed(activation,name='pooling' + block_id+'_activation')(x)
    return x
def concat_block(a, b,
                  block_id,
                  padding='same',
                  activation=layers.Activation('relu')):
    block_id = str(block_id)
    x = layers.Concatenate(name='block' + block_id+"_concat")([a,b])
    x = layers.TimeDistributed(layers.BatchNormalization(),name='block' + block_id+'_batchnormalization')(x)
    x = layers.TimeDistributed(activation,name='block' + block_id+'_activation')(x)
    return x
def build_model(tuner,
                input_shape=(80, 80, 1),
                stride_length=(1, 1),
                kernel=(3,3),
                kernel_initializer='glorot_uniform',
                activation=layers.Activation('elu'),
                dense_activation=layers.Activation('relu'),
                output_activation=layers.Activation('softmax'),
                batch_momentum=.999,
                dropout_chance=0.1,
                combine=True,
                padding='same',
                batch_norm=False,
                gpus = 1
            ):
    name = "LSTM CNN"
    input_shape = (depth,)+input_shape
    inputs = layers.Input(shape=input_shape)
    x = inputs
    conv_parameters = {
        'padding': padding,
        'strides': stride_length,
        'kernel_initializer': kernel_initializer
    }
    # encode net
    block_1 = conv_block(x,1,64,1,conv_parameters)
    pooling_1 = pooling_block(block_1,1)
    block_2 = conv_block(pooling_1,1,64,2,conv_parameters)
    
    concat_3 = concat_block(pooling_1,block_2,3)
    block_4 = conv_block(concat_3,1,128,4,conv_parameters)
    block_5 = conv_block(block_4,1,128,5,conv_parameters)
    concat_6 = concat_block(concat_3,block_5,6)
    pooling_2 = pooling_block(concat_6,2)
    block_7 = conv_block(pooling_2,1,256,7,conv_parameters)
    block_8 = conv_block(block_7,1,256,8,conv_parameters)
    
    concat_9 = concat_block(pooling_2,block_8,9)
    pooling_3 = pooling_block(concat_9,3)
    block_10 = conv_block(pooling_3,1,512,10,conv_parameters)
    block_11 = conv_block(block_10,1,256,11,conv_parameters)
    
    concat_12 = concat_block(pooling_3,block_11,12)
    block_13 = conv_block(concat_12,1,128,13,conv_parameters)
    pooling_4 = pooling_block(block_13,4)
    #x = conv_block(x,2,256,5,conv_parameters)
    #x = layers.TimeDistributed(layers.GlobalAveragePooling2D(),name='GlobalAveragePooling')(x)
    x = layers.TimeDistributed(layers.Flatten(),name='Flatten')(pooling_4)
    if dropout_chance>0:
        x = layers.SpatialDropout1D(dropout_chance,name='Dropout')(x)
    if USE_CRF:
        x = layers.LSTM(512,return_sequences=True)(x)
        output = crf.CRF(64)(x)
    else:
        x = layers.GRU(512,return_sequences=False,name='GRU')(x)
        x = layers.Dense(4096, activation='relu',name='FC1')(x)
        x = layers.Dense(60, activation='relu',name='Output')(x)
        output = output_activation(x)
    model = keras.models.Model(inputs, output)
    if gpus > 1:
        single_model = model
        model = multi_gpu_model(model,gpus=4,cpu_relocation=True)
        model.__setattr__("callback_model",single_model)
    model.compile(
        optimizer=keras.optimizers.SGD(
            learning_rate=1e-4,
            momentum=.9,
            nesterov=True,
            decay=1e-6
        ),
        loss='sparse_categorical_crossentropy',#crf_loss,
        metrics=['accuracy']#crf_viterbi_accuracy]
    )
    return model,name

In [6]:
if not depth == 1:
    if USE_TUNER:
        tuner = RandomSearch(
            build_model,
            objective='val_accuracy',
            max_trials=40,
            executions_per_trial=1,
            directory='actionrecog',
            project_name='Action Recognition',
            is_generator=True)
        print(tuner.search_space_summary())
    else:
        model,name = build_model(None,input_shape=(80,80,1),gpus=1,dropout_chance=0.25)
        model.summary(line_length=115)
else:
    model = keras.applications.DenseNet121(include_top=True, weights=None, input_shape=(80,80,1), classes=64)
    model.compile(
        optimizer=keras.optimizers.SGD(
            learning_rate=1e-4,
            momentum=.9,
            nesterov=True,
            decay=1e-6
        ),
        loss='sparse_categorical_crossentropy',#crf_loss,
        metrics=['accuracy']#crf_viterbi_accuracy]
    )#model,name = build_model()
    model.summary()

Model: "model"
___________________________________________________________________________________________________________________
Layer (type)                         Output Shape              Param #       Connected to                          
input_1 (InputLayer)                 [(None, 10, 80, 80, 1)]   0                                                   
___________________________________________________________________________________________________________________
block1_conv2d (TimeDistributed)      (None, 10, 80, 80, 64)    640           input_1[0][0]                         
___________________________________________________________________________________________________________________
block1_batchnormalization (TimeDistr (None, 10, 80, 80, 64)    256           block1_conv2d[0][0]                   
___________________________________________________________________________________________________________________
block1_activation (TimeDistributed)  (None, 10, 80, 80, 6

In [10]:
#Initialize generators
data_gen = DataGenerator(filepath,data_amount=DATA_AMOUNT,batch_size=BATCH_SIZE,frames_per_sample=depth,offset=OFFSET_AMOUNT,sliding_window=1,standardize=True,labels_structured=False)
validation_gen = DataGenerator(filepath,data_amount=VALIDATION_AMOUNT,batch_size=BATCH_SIZE,frames_per_sample=depth,offset=VAL_OFFSET,sliding_window=1,standardize=True,labels_structured=False)

In [None]:
if USE_TUNER:
    tuner.search(generator=data_gen, validation_data=validation_gen,
                use_multiprocessing=True, workers=10,
                epochs = 8)
    model = tuner.get_best_models(num_models=5)[0]
else:
    start_time = datetime.today().strftime('%Y-%m-%d-%H:%M:%S')
    history = model.fit_generator(generator=data_gen,
                    validation_data=validation_gen,
                    epochs=5,
                    verbose=0,
                    use_multiprocessing=True,
                    workers=2,
                    max_queue_size=10,
                    callbacks=[Logger.JupyterProgbarLogger(count_mode='steps',measure_gpu=True),
                              keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0, patience=6,verbose=1, mode='auto',restore_best_weights=True),
                              keras.callbacks.ModelCheckpoint(filepath='models/model-'+start_time+'-progress-{epoch:02d}.h5')]
                   )

HBox(children=(IntProgress(value=0, description='Epoch 1/5[CPU:  0%]', max=3123, style=ProgressStyle(descripti…

In [None]:
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.show()

[autoreload of IPython.core.application failed: Traceback (most recent call last):
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 245, in check
    try:
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 450, in superreload
    autoreloaded.
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 387, in update_generic
    if new_refs:
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 357, in update_class
    # reload module
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 280, in update_instances
    continue
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/inspect.py"

[autoreload of IPython.core.history failed: Traceback (most recent call last):
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 245, in check
    try:
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 450, in superreload
    autoreloaded.
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 387, in update_generic
    if new_refs:
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 357, in update_class
    # reload module
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 280, in update_instances
    continue
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/inspect.py", li

[autoreload of IPython.terminal.ptutils failed: Traceback (most recent call last):
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 245, in check
    try:
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 434, in superreload
    to new versions when 'xxx' is reloaded.
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/imp.py", line 315, in reload
    return importlib.reload(module)
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/importlib/__init__.py", line 166, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 618, in _exec
  File "<frozen importlib._bootstrap_external>", line 678, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/

[autoreload of numpy.lib failed: Traceback (most recent call last):
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 245, in check
    try:
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 434, in superreload
    to new versions when 'xxx' is reloaded.
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/imp.py", line 315, in reload
    return importlib.reload(module)
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/importlib/__init__.py", line 166, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 618, in _exec
  File "<frozen importlib._bootstrap_external>", line 678, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/s

The iterable function was deprecated in Matplotlib 3.1 and will be removed in 3.3. Use np.iterable instead.
  _bootstrap._exec(spec, module)
[autoreload of matplotlib.cm failed: Traceback (most recent call last):
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 245, in check
    try:
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 434, in superreload
    to new versions when 'xxx' is reloaded.
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/imp.py", line 315, in reload
    return importlib.reload(module)
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/importlib/__init__.py", line 166, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 618, in _exec
  File "<frozen importlib._bootstrap_external>", line 678, in exec_module
  File "<fr

W0809 20:11:31.454982 140491946297152 __init__.py:943] Bad val 'silent' on line #476
	"verbose.level  : silent      # one of silent, helpful, debug, debug-annoying
"
	in file "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/classic.mplstyle"
W0809 20:11:31.456016 140491946297152 __init__.py:943] Bad val 'sys.stdout' on line #477
	"verbose.fileo  : sys.stdout  # a log filename, sys.stdout or sys.stderr
"
	in file "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/classic.mplstyle"
W0809 20:11:31.464228 140491946297152 __init__.py:943] Bad val 'True' on line #421
	"savefig.frameon : True
"
	in file "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle"
W0809 20:11:31.464929 140491946297152 __init__.py:943] Bad val 'silent' on line #472
	"verbose.level  : silent      # one of silent, hel

[autoreload of matplotlib.units failed: Traceback (most recent call last):
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 245, in check
    try:
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 450, in superreload
    autoreloaded.
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 387, in update_generic
    if new_refs:
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 357, in update_class
    # reload module
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 280, in update_instances
    continue
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/inspect.py", line 1

[autoreload of scipy.sparse.bsr failed: Traceback (most recent call last):
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 245, in check
    try:
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 385, in superreload
    update_generic(old_obj, new_obj)
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 324, in update_generic
    update(a, b)
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 289, in update_class
    if update_generic(old_obj, new_obj): continue
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 324, in update_generic
    update(a, b)
  File "/home/g17amitprasad_gmail_com/minico

[autoreload of scipy.integrate._ivp.rk failed: Traceback (most recent call last):
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 245, in check
    try:
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 385, in superreload
    update_generic(old_obj, new_obj)
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 324, in update_generic
    update(a, b)
  File "/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 279, in update_class
    if old_obj == new_obj:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
]
[autoreload of scipy.stats._continuous_distns failed: Traceback (most recent call last):
  File "/home/g17amitprasad_gmail_c

In [6]:
filename = "model-"+datetime.today().strftime('%Y-%m-%d-%H:%M:%S')+".h5"
model.save(filename)
print("Model saved to ",filename)

Model saved to  model-2019-08-09-17:13:05.h5


In [7]:
def create_training_node(model,
                  outdir = 'training',
                  job_name='train',
                  cores=2,
                  mem='8GB',
                  max_time='120:00:00',
                  data_file = 'images_synthetic_mouse.h5',
                  data_amount = 0,
                  data_offset = 0,
                  validation_amount = 0,
                  validation_offset = 0,
                  batch_size = 8,
                  slide = 5,
                  verbosity = 1,
                  epochs = 40
                 ):
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    model.save(outdir + '/initial.h5')
    cmd = 'sbatch '
    cmd += '--job-name ' + job_name + ' '
    cmd += '--output "' + os.getcwd() + '/train-%j.log" '
    cmd += '--partition small-gpu-b-preemptible '
    cmd += '-n ' + str(cores) + ' '
    cmd += '--mem ' + mem + ' '
    cmd += '--time ' + max_time + ' '
    cmd += '--wrap="' + os.path.expanduser("~") + '/miniconda3/envs/tf2_gpu/bin/python train_model.py continue '
    cmd += '--data-amount ' + str(data_amount) + ' '
    cmd += '--data-offset ' + str(data_offset) + ' '
    cmd += '--validation-amount ' + str(validation_amount) + ' '
    cmd += '--validation-offset ' + str(validation_offset) + ' '
    cmd += '--batch-size ' + str(batch_size) + ' '
    cmd += '--slide ' + str(slide) + ' '
    cmd += '--verbosity ' + str(verbosity) + ' '
    cmd += '--epochs ' + str(epochs) + ' '
    cmd += "'" + data_file +"' "
    cmd += "'" + os.getcwd() + '/' + outdir + '-%s/' +"' "
    cmd += "'" + outdir  +'/initial.h5'+"' "
    cmd += '"'
    return cmd
print(create_training_node(model,
              data_file=filepath,
              outdir='training',
              data_amount = 800000,
              data_offset = 0,
              validation_amount=30000,
              validation_offset=800100,
              batch_size=32,
              slide = 1,
              epochs=10,
              verbosity=0))

sbatch --job-name train --output "/home/g17amitprasad_gmail_com/2019-notebooks/Action Recognition/train-%j.log" --partition small-gpu-b-preemptible -n 2 --mem 8GB --time 120:00:00 --wrap="/home/g17amitprasad_gmail_com/miniconda3/envs/tf2_gpu/bin/python train_model.py continue --data-amount 800000 --data-offset 0 --validation-amount 30000 --validation-offset 800100 --batch-size 32 --slide 1 --verbosity 0 --epochs 10 '/mnt/disks/sec/Amit/images_synthetic_mouse_uncompressed.h5' '/home/g17amitprasad_gmail_com/2019-notebooks/Action Recognition/training-%s/' 'training/initial.h5' "
