In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
%matplotlib inline

import tensorflow as tf
from tensorflow import keras

import DataGenerator as DG
from DataGenerator import DataGenerator

from tensorflow.keras import layers
from tensorflow.keras.callbacks import TensorBoard
#Progress bar fix: use callbacks=[Logger.JupyterProgbarLogger()] in model.fit
#verbose=0 is also required
import JupyterProgbarLogger as Logger
from tqdm import tqdm_notebook as tqdm
#from kerastuner.tuners import RandomSearch

import sklearn.metrics as metrics

import numpy as np
import random
import math

import matplotlib.pyplot as plt

import h5py

###FIX NUMPY LOAD FOR DICTIONARIES
np_load_old = np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

In [2]:
BATCH_SIZE=8
DATA_AMOUNT=650000
VALIDATION_AMOUNT=40000
#frames per sample / 3rd dimension for 3D CNN
depth=4

In [3]:
def build_model(
                input_shape=(1,80, 80, 1),
                stride_length=(1, 1, 1),
                kernel=(3,3,3),
                kernel_initializer='glorot_uniform',
                activation=layers.Activation('relu'),
                dense_activation=layers.Activation('relu'),
                output_activation=layers.Activation('softmax'),
                batch_momentum=.999,
                dropout_chance=0.2,
                combine=True,
                padding='same',
                batch_norm=False,
                dropout=False
            ):
    name = "3D CNN"
    inputs = layers.Input(shape=input_shape)
    x = inputs
    conv_parameters = {
        'padding': padding,
        'strides': stride_length,
        'kernel_initializer': kernel_initializer,
    }
    # encode net
    #if batch_norm:
    #    x = layers.BatchNormalization(momentum=batch_momentum)(x)
    #elif dropout:
    #    x = layers.Dropout(dropout_chance)(x)
    #x = activation(x)
    x = layers.Conv3D(32, kernel, **conv_parameters)(x)
    x = layers.MaxPooling3D((1, 2, 2), padding=padding)(x)
    x = layers.Conv3D(64, kernel, **conv_parameters)(x)
    x = layers.Conv3D(64, kernel, **conv_parameters)(x)
    x = layers.MaxPooling3D((2, 2, 2), padding=padding)(x)
    x = layers.Conv3D(128, kernel, **conv_parameters)(x)
    x = layers.Conv3D(128, kernel, **conv_parameters)(x)
    x = layers.MaxPooling3D((2, 2, 2), padding=padding)(x)
    x = activation(x)
    x = layers.GlobalAveragePooling3D()(x)
    output = output_activation(x)
    model = keras.models.Model(inputs, output)
    model.compile(
        optimizer=keras.optimizers.SGD(
            learning_rate=1e-4,
            momentum=0.9,
            nesterov=True,
            decay=1e-6
        ),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model,name

In [4]:
if not depth == 1:
    model,name = build_model(input_shape=(depth,80,80,1))
else:
    model,name = build_model()
print(model.summary())

W0722 15:19:35.693381 140693000214336 deprecation.py:506] From /home/ap487/miniconda3/envs/tf2_gpu/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 4, 80, 80, 1)]    0         
_________________________________________________________________
conv3d (Conv3D)              (None, 4, 80, 80, 32)     896       
_________________________________________________________________
max_pooling3d (MaxPooling3D) (None, 4, 40, 40, 32)     0         
_________________________________________________________________
conv3d_1 (Conv3D)            (None, 4, 40, 40, 64)     55360     
_________________________________________________________________
conv3d_2 (Conv3D)            (None, 4, 40, 40, 64)     110656    
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 2, 20, 20, 64)     0         
_________________________________________________________________
conv3d_3 (Conv3D)            (None, 2, 20, 20, 128)    221312

In [5]:
data_gen = DataGenerator("images_raw_doric_round1.h5",data_amount=DATA_AMOUNT,batch_size=BATCH_SIZE,frames_per_sample=depth)
validation_gen = DataGenerator("images_raw_doric_round1.h5",data_amount=VALIDATION_AMOUNT,batch_size=BATCH_SIZE,frames_per_sample=depth)#,offset=DATA_AMOUNT)
history = model.fit_generator(generator=data_gen,
                    validation_data=validation_gen,
                    epochs=40,
                    verbose=0,
                    use_multiprocessing=True,
                    workers=10,
                    callbacks=[Logger.JupyterProgbarLogger(),
                              keras.callbacks.EarlyStopping(monitor='val_acc', min_delta=0, patience=10,verbose=1, mode='auto',restore_best_weights=True)]
                   )

Epoch 1/40


HBox(children=(IntProgress(value=0, max=80), HTML(value='')))

Metrics: loss: 4.7253 - acc: 0.0614 - val_loss: 4.4878 - val_acc: 0.0680

Epoch 2/40


HBox(children=(IntProgress(value=0, max=80), HTML(value='')))

Metrics: loss: 3.8602 - acc: 0.0609 - val_loss: nan - val_acc: 0.0754

Epoch 3/40


HBox(children=(IntProgress(value=0, max=80), HTML(value='')))

Metrics: loss: nan - acc: 0.0192 - val_loss: 4.8520 - val_acc: 0.0143

Epoch 4/40


HBox(children=(IntProgress(value=0, max=80), HTML(value='')))

Metrics: loss: nan - acc: 0.0178 - val_loss: nan - val_acc: 0.0130

Epoch 5/40


HBox(children=(IntProgress(value=0, max=80), HTML(value='')))

Metrics: loss: 4.8520 - acc: 0.0132e-04

Process Keras_worker_ForkPoolWorker-78:
Process Keras_worker_ForkPoolWorker-84:
Process Keras_worker_ForkPoolWorker-82:
Process Keras_worker_ForkPoolWorker-77:
Traceback (most recent call last):
Process Keras_worker_ForkPoolWorker-72:
Traceback (most recent call last):
Process Keras_worker_ForkPoolWorker-73:
Process Keras_worker_ForkPoolWorker-71:
Process Keras_worker_ForkPoolWorker-75:
Traceback (most recent call last):
  File "/home/ap487/miniconda3/envs/tf2_gpu/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Process Keras_worker_ForkPoolWorker-74:
Process Keras_worker_ForkPoolWorker-80:
  File "/home/ap487/miniconda3/envs/tf2_gpu/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/ap487/miniconda3/envs/tf2_gpu/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/ap487/mini

Metrics: loss: 4.8520 - acc: 0.0132Metrics: loss: 4.8520 - acc: 0.0132

  File "/home/ap487/miniconda3/envs/tf2_gpu/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
  File "/home/ap487/miniconda3/envs/tf2_gpu/lib/python3.7/multiprocessing/queues.py", line 351, in get
    with self._rlock:
  File "/home/ap487/miniconda3/envs/tf2_gpu/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt


KeyboardInterrupt: 

In [None]:
def accuracy(predictions, answers):
    accs = np.zeros(len(answers))
    for i in range(0,len(answers)):
        if(predictions[i]==answers[i]):
            accs[i]=1
    return np.mean(accs)

In [None]:
print(accuracy(predicts,test_labels))
print(metrics.f1_score(test_labels,predicts,average=None))

In [None]:
model_benchmarks = np.load("benchmarks.npy")
model_benchmarks.item()[name]=metrics.f1_score(test_labels,predicts,average=None)
np.save("benchmarks.npy",model_benchmarks)
print(model_benchmarks)

In [None]:
#Old Data Function -- using DataGenerator now.
def sort_data(data,labels,train_len,val_len,test_len,blocksize=1):
    curBlock = labels[0]
    train_data = np.zeros((train_len,80,80,1))
    val_data = np.zeros((val_len,80,80,1))
    test_data = np.zeros((test_len,80,80,1))
    train_labels = np.full(train_len,-1)
    val_labels = np.full(val_len,-1)
    test_labels = np.full(test_len,-1)
    train_ind = 0
    val_ind = 0
    test_ind = 0
    choices = np.arange(3)
    i=0
    cur_len = 0
    while i < len(data):
        start = i-cur_len
        iter_amt = len(data) if blocksize == 1 else blocksize
        while labels[i]==curBlock and cur_len < iter_amt:
            i+=1
            cur_len+=1
            if i == len(data):
                break
        end = i
        if not i == len(data):
            if labels[i]==curBlock:
                cur_len-=int(blocksize/2)
            else:
                cur_len=0
            curBlock = labels[i]
        if not blocksize == 1 and end-start < blocksize:
            continue
        cur_frames = data[start:end]
        cur_labels = labels[start:end]
        choice=-1
        if choices.size>0:
            choice = np.random.choice(choices)
        else:
            break
        if choice == 0:
            if train_ind + len(cur_frames) >= train_len:
                cur_frames = cur_frames[:train_len-train_ind]
                cur_labels = cur_labels[:train_len-train_ind]
                choices=np.delete(choices,np.argwhere(choices==0))
            train_data[train_ind:train_ind+len(cur_frames)]=cur_frames
            train_labels[train_ind:train_ind+len(cur_frames)]=cur_labels
            train_ind += len(cur_frames)
        elif choice == 1:
            if val_ind + len(cur_frames) >= val_len:
                cur_frames = cur_frames[:val_len-val_ind]
                cur_labels = cur_labels[:val_len-val_ind]
                choices=np.delete(choices,np.argwhere(choices==1))
            val_data[val_ind:val_ind+len(cur_frames)]=cur_frames
            val_labels[val_ind:val_ind+len(cur_frames)]=cur_labels
            val_ind += len(cur_frames)
        elif choice == 2:
            if test_ind + len(cur_frames) >= test_len:
                cur_frames = cur_frames[:test_len-test_ind]
                cur_labels = cur_labels[:test_len-test_ind]
                choices=np.delete(choices,np.argwhere(choices==2))
            test_data[test_ind:test_ind+len(cur_frames)]=cur_frames
            test_labels[test_ind:test_ind+len(cur_frames)]=cur_labels
            test_ind += len(cur_frames)
    return train_data[:train_ind-(train_ind%blocksize)],train_labels[:train_ind-(train_ind%blocksize)],val_data[:val_ind-(val_ind%blocksize)],val_labels[:val_ind-(val_ind%blocksize)],test_data[:test_ind-(test_ind%blocksize)],test_labels[:test_ind-(test_ind%blocksize)]

In [None]:
#Testing preprocessing to remove -5 labelled beforehand
file_path = "images_raw_doric_round1.h5"
with h5py.File(file_path,'r') as f:
    with h5py.File("images_processed.h5",'w') as w:
        w.create_dataset("/frames/raw",shape=f["/frames/raw"].shape,dtype='uint8')
        w.create_dataset("/labels",data=f["/labels"])
        start = 0
        index = 0
        for i in range(0,len(f["/labels"])):
            if f["/labels"][i]<0:
                w["/frames/raw"][index:index+i-start]=f["/frames/raw"][start:i]
                w["/labels"][index:index+i-start]=f["/labels"][start:i]
                start=i
                index+=i-start
            if(i%1000==0):
                print("Progress: ",i/(len(f["/frames/raw"])),'            ',end='\r')

In [None]:
#Making sure data spread is normal, make sure to remove the [...,None] in DataGenerator.
lbls = np.zeros(BATCH_SIZE*300)
for i in range(0,300):
    lbls[i*BATCH_SIZE:(i+1)*BATCH_SIZE]=data_gen.__getitem__(0)[1]
plt.hist(lbls,46,density=True)
plt.show()