In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
%matplotlib inline

import tensorflow as tf
from tensorflow import keras

import DataGenerator as DG
from DataGenerator import DataGenerator

from tensorflow.keras import layers
from tensorflow.keras.callbacks import TensorBoard
#Progress bar fix: use callbacks=[Logger.JupyterProgbarLogger()] in model.fit
#verbose=0 is also required
import JupyterProgbarLogger as Logger
from tqdm import tqdm_notebook as tqdm
#from kerastuner.tuners import RandomSearch

import sklearn.metrics as metrics

import numpy as np
import random
import math

import matplotlib.pyplot as plt

import h5py

###FIX NUMPY LOAD FOR DICTIONARIES\
np_load_old = np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

In [2]:
BATCH_SIZE=1
DATA_AMOUNT=100000
#frames per sample / 3rd dimension for 3D CNN
depth=32

In [3]:
def build_model(
                input_shape=(1,80, 80, 1),
                stride_length=(1, 1, 1),
                kernel=(3,3,3),
                kernel_initializer='glorot_uniform',
                activation=layers.Activation('elu'),
                dense_activation=layers.Activation('relu'),
                output_activation=layers.Activation('softmax'),
                batch_momentum=.999,
                dropout_chance=0.2,
                combine=True,
                padding='valid',
                batch_norm=False,
                dropout=False
            ):
    name = "3D CNN"
    nfilters=[64,128,256]
    inputs = layers.Input(shape=input_shape)
    x = inputs


    conv_parameters = {
        'padding': padding,
        'strides': stride_length,
        'kernel_initializer': kernel_initializer
    }


    # encode net
    for filters in nfilters:
        x = layers.Conv3D(filters, kernel, **conv_parameters)(x)
        if batch_norm:
            x = layers.BatchNormalization(momentum=batch_momentum)(x)
        elif dropout:
            x = layers.Dropout(dropout_chance)(x)
        #x = activation(x)
        x = layers.MaxPooling3D((2, 2, 2), padding=padding)(x)
    x = activation(x)
    x = layers.GlobalAveragePooling3D()(x)
    output = output_activation(x)


    model = keras.models.Model(inputs, output)
    model.compile(
        optimizer=keras.optimizers.SGD(
            learning_rate=1e-3,
            momentum=0.9,
            nesterov=True,
            decay=1e-6
        ),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model,name

In [4]:
if not depth == 1:
    model,name = build_model(input_shape=(depth,80,80,1))#,dropout=True,dropout_chance=0.05)
    #if(len(train_data.shape)<5):
    #    train_data=np.reshape(train_data,(int(len(train_data)/depth),depth,80,80,1))
    #    val_data=np.reshape(val_data,(int(len(val_data)/depth),depth,80,80,1))
    #    test_data=np.reshape(test_data,(int(len(test_data)/depth),depth,80,80,1))
    #    train_labels=train_labels[::depth]
    #    val_labels=val_labels[::depth]
    #    test_labels=test_labels[::depth]
else:
    model,name = build_model()
print(model.summary())

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 80, 80, 1)]   0         
_________________________________________________________________
conv3d (Conv3D)              (None, 30, 78, 78, 64)    1792      
_________________________________________________________________
max_pooling3d (MaxPooling3D) (None, 15, 39, 39, 64)    0         
_________________________________________________________________
conv3d_1 (Conv3D)            (None, 13, 37, 37, 128)   221312    
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 6, 18, 18, 128)    0         
_________________________________________________________________
conv3d_2 (Conv3D)            (None, 4, 16, 16, 256)    884992    
_________________________________________________________________
max_pooling3d_2 (MaxPooling3 (None, 2, 8, 8, 256)      0     

In [5]:
data_gen = DataGenerator("images_processed.h5",data_amount=DATA_AMOUNT,batch_size=BATCH_SIZE,frames_per_sample=depth)
history = model.fit_generator(generator=data_gen,
                    validation_data=data_gen,
                    epochs=20,
                    verbose=0,
                    use_multiprocessing=True,
                    workers=14,
                    callbacks=[Logger.JupyterProgbarLogger(),
                              keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0, patience=4,verbose=1, mode='auto',restore_best_weights=True)]
                   )

Epoch 1/20


HBox(children=(IntProgress(value=0, max=80), HTML(value='')))

W0718 16:52:16.304684 139750614456128 deprecation.py:323] From /home/amitp/anaconda3/envs/tf2_gpu/lib/python3.7/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Metrics: loss: 11.8486 - accuracy: 0.4984+00

Process Keras_worker_ForkPoolWorker-10:
Process Keras_worker_ForkPoolWorker-13:
Process Keras_worker_ForkPoolWorker-12:
Process Keras_worker_ForkPoolWorker-2:
Process Keras_worker_ForkPoolWorker-9:
Process Keras_worker_ForkPoolWorker-11:
Process Keras_worker_ForkPoolWorker-8:
Process Keras_worker_ForkPoolWorker-4:
Process Keras_worker_ForkPoolWorker-5:
Process Keras_worker_ForkPoolWorker-6:
Process Keras_worker_ForkPoolWorker-1:
Process Keras_worker_ForkPoolWorker-14:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/amitp/anaconda3/envs/tf2_gpu/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Process Keras_worker_ForkPoolWorker-3:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process Keras

  File "/home/amitp/anaconda3/envs/tf2_gpu/lib/python3.7/multiprocessing/queues.py", line 351, in get
    with self._rlock:
  File "/home/amitp/anaconda3/envs/tf2_gpu/lib/python3.7/multiprocessing/queues.py", line 351, in get
    with self._rlock:
  File "/home/amitp/anaconda3/envs/tf2_gpu/lib/python3.7/multiprocessing/connection.py", line 398, in _send_bytes
    self._send(buf)
KeyboardInterrupt
  File "/home/amitp/anaconda3/envs/tf2_gpu/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
  File "/home/amitp/anaconda3/envs/tf2_gpu/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
  File "/home/amitp/anaconda3/envs/tf2_gpu/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
KeyboardInterrupt
  File "/home/amitp/anaconda3/envs/tf2_gpu/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__

KeyboardInterrupt: 

In [None]:
def accuracy(predictions, answers):
    accs = np.zeros(len(answers))
    for i in range(0,len(answers)):
        if(predictions[i]==answers[i]):
            accs[i]=1
    return np.mean(accs)

In [None]:
predicts = np.argmax(model.predict(test_data),1)

In [None]:
#plt.hist((test_labels,predicts), density=True, histtype='bar', stacked=True, label = ("Data","Predictions"))
#plt.legend()
plt.plot(test_labels)
plt.plot(predicts)
plt.xlim(0,100)
plt.show()

In [None]:
print(accuracy(predicts,test_labels))
print(metrics.f1_score(test_labels,predicts,average=None))

In [None]:
model_benchmarks = np.load("benchmarks.npy")
model_benchmarks.item()[name]=metrics.f1_score(test_labels,predicts,average=None)
np.save("benchmarks.npy",model_benchmarks)
print(model_benchmarks)

In [None]:
print(train_data.shape)

In [None]:
print(test_labels[0:300])

In [None]:
plt.plot(labels)
ind = 500
plt.xlim(ind,ind+10)
plt.show()

In [None]:
plt.hist((train_labels,val_labels,test_labels),density=True)
#plt.hist(train_labels,46,density=True)
plt.show()

In [None]:
print(tf.test.is_gpu_available())

In [None]:
!nvidia-smi

In [None]:
file_path = "images_raw_doric_round1.h5"
with h5py.File(file_path,'r') as f:
    labels = f['/labels'][:DATA_AMOUNT]
    data = f['/frames/raw'][:DATA_AMOUNT]
data = data[(labels>=0)]
labels = labels[(labels>=0)]
DATA_AMOUNT=len(data)
if len(data.shape) < 4:
    data=data[...,None]
tl = math.floor(DATA_AMOUNT/2)
vl=tsl=math.floor(DATA_AMOUNT/4)
train_data,train_labels,val_data,val_labels,test_data,test_labels = sort_data(data,labels,tl,vl,tsl,blocksize=depth)
del data
del labels

In [None]:
def sort_data(data,labels,train_len,val_len,test_len,blocksize=1):
    curBlock = labels[0]
    train_data = np.zeros((train_len,80,80,1))
    val_data = np.zeros((val_len,80,80,1))
    test_data = np.zeros((test_len,80,80,1))
    train_labels = np.full(train_len,-1)
    val_labels = np.full(val_len,-1)
    test_labels = np.full(test_len,-1)
    train_ind = 0
    val_ind = 0
    test_ind = 0
    choices = np.arange(3)
    i=0
    cur_len = 0
    while i < len(data):
        start = i-cur_len
        iter_amt = len(data) if blocksize == 1 else blocksize
        while labels[i]==curBlock and cur_len < iter_amt:
            i+=1
            cur_len+=1
            if i == len(data):
                break
        end = i
        if not i == len(data):
            if labels[i]==curBlock:
                cur_len-=int(blocksize/2)
            else:
                cur_len=0
            curBlock = labels[i]
        if not blocksize == 1 and end-start < blocksize:
            continue
        cur_frames = data[start:end]
        cur_labels = labels[start:end]
        choice=-1
        if choices.size>0:
            choice = np.random.choice(choices)
        else:
            break
        if choice == 0:
            if train_ind + len(cur_frames) >= train_len:
                cur_frames = cur_frames[:train_len-train_ind]
                cur_labels = cur_labels[:train_len-train_ind]
                choices=np.delete(choices,np.argwhere(choices==0))
            train_data[train_ind:train_ind+len(cur_frames)]=cur_frames
            train_labels[train_ind:train_ind+len(cur_frames)]=cur_labels
            train_ind += len(cur_frames)
        elif choice == 1:
            if val_ind + len(cur_frames) >= val_len:
                cur_frames = cur_frames[:val_len-val_ind]
                cur_labels = cur_labels[:val_len-val_ind]
                choices=np.delete(choices,np.argwhere(choices==1))
            val_data[val_ind:val_ind+len(cur_frames)]=cur_frames
            val_labels[val_ind:val_ind+len(cur_frames)]=cur_labels
            val_ind += len(cur_frames)
        elif choice == 2:
            if test_ind + len(cur_frames) >= test_len:
                cur_frames = cur_frames[:test_len-test_ind]
                cur_labels = cur_labels[:test_len-test_ind]
                choices=np.delete(choices,np.argwhere(choices==2))
            test_data[test_ind:test_ind+len(cur_frames)]=cur_frames
            test_labels[test_ind:test_ind+len(cur_frames)]=cur_labels
            test_ind += len(cur_frames)
    return train_data[:train_ind-(train_ind%blocksize)],train_labels[:train_ind-(train_ind%blocksize)],val_data[:val_ind-(val_ind%blocksize)],val_labels[:val_ind-(val_ind%blocksize)],test_data[:test_ind-(test_ind%blocksize)],test_labels[:test_ind-(test_ind%blocksize)]

In [None]:
DG.process_file("images_raw_doric_round1.h5","images_processed.h5",data_amount=DATA_AMOUNT,frames_per_sample=depth)