In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
%matplotlib inline
%load_ext autoreload
%autoreload 2
import tensorflow as tf
from tensorflow import keras

import DataGenerator as DG
from DataGenerator import DataGenerator

from tensorflow.keras import layers
from tensorflow.keras.callbacks import TensorBoard
#Progress bar fix: use callbacks=[Logger.JupyterProgbarLogger()] in model.fit
#verbose=0 is also required
import JupyterProgbarLogger as Logger
from tqdm import tqdm_notebook as tqdm
#from kerastuner.tuners import RandomSearch


import sklearn.metrics as metrics

import numpy as np
import random
import math

import matplotlib.pyplot as plt

import h5py

###FIX NUMPY LOAD FOR DICTIONARIES
np_load_old = np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

In [2]:
BATCH_SIZE=8
DATA_AMOUNT=2000000
VALIDATION_AMOUNT=100000
#frames per sample / 3rd dimension for 3D CNN
depth=8

In [3]:
def build_model(
                input_shape=(1,80, 80, 1),
                stride_length=(1, 1, 1),
                kernel=(3,3,3),
                kernel_initializer='glorot_uniform',
                activation=layers.Activation('relu'),
                dense_activation=layers.Activation('relu'),
                output_activation=layers.Activation('softmax'),
                batch_momentum=.999,
                dropout_chance=0.2,
                combine=True,
                padding='same',
                batch_norm=False,
                dropout=False
            ):
    name = "3D CNN"
    inputs = layers.Input(shape=input_shape)
    x = inputs
    conv_parameters = {
        'padding': padding,
        'strides': stride_length,
        'kernel_initializer': kernel_initializer,
    }
    # encode net
    #if batch_norm:
    #    x = layers.BatchNormalization(momentum=batch_momentum)(x)
    #elif dropout:
    #    x = layers.Dropout(dropout_chance)(x)
    #x = activation(x)
    x = layers.Conv3D(8, kernel, **conv_parameters)(x)
    x = layers.MaxPooling3D((1, 2, 2), padding=padding)(x)
    x = layers.Conv3D(16, kernel, **conv_parameters)(x)
    x = layers.Conv3D(16, kernel, **conv_parameters)(x)
    x = layers.MaxPooling3D((2, 2, 2), padding=padding)(x)
    x = layers.Conv3D(32, kernel, **conv_parameters)(x)
    x = layers.Conv3D(32, kernel, **conv_parameters)(x)
    x = layers.MaxPooling3D((2, 2, 2), padding=padding)(x)
    x = layers.Conv3D(64, kernel, **conv_parameters)(x)
    x = layers.Conv3D(64, kernel, **conv_parameters)(x)
    x = layers.MaxPooling3D((2, 2, 2), padding=padding)(x)
    x = activation(x)
    x = layers.GlobalAveragePooling3D()(x)
    output = output_activation(x)
    model = keras.models.Model(inputs, output)
    model.compile(
        optimizer=keras.optimizers.SGD(
            learning_rate=1e-4,
            momentum=0.9,
            nesterov=True,
            decay=1e-6
        ),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model,name

In [4]:
if not depth == 1:
    model,name = build_model(input_shape=(depth,80,80,1))
else:
    model,name = build_model()
print(model.summary())

W0723 15:47:55.383274 140086897731392 deprecation.py:506] From /home/ap487/miniconda3/envs/tf2_gpu/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 8, 80, 80, 1)]    0         
_________________________________________________________________
conv3d (Conv3D)              (None, 8, 80, 80, 8)      224       
_________________________________________________________________
max_pooling3d (MaxPooling3D) (None, 8, 40, 40, 8)      0         
_________________________________________________________________
conv3d_1 (Conv3D)            (None, 8, 40, 40, 16)     3472      
_________________________________________________________________
conv3d_2 (Conv3D)            (None, 8, 40, 40, 16)     6928      
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 4, 20, 20, 16)     0         
_________________________________________________________________
conv3d_3 (Conv3D)            (None, 4, 20, 20, 32)     13856 

In [None]:
data_gen = DataGenerator("images_raw_doric_round1.h5",data_amount=DATA_AMOUNT,batch_size=BATCH_SIZE,frames_per_sample=depth)
validation_gen = DataGenerator("images_raw_doric_round1.h5",data_amount=VALIDATION_AMOUNT,batch_size=BATCH_SIZE,frames_per_sample=depth,offset=DATA_AMOUNT)
history = model.fit_generator(generator=data_gen,
                    validation_data=validation_gen,
                    epochs=5,
                    verbose=0,
                    use_multiprocessing=True,
                    workers=10,
                    callbacks=[Logger.JupyterProgbarLogger(),
                              keras.callbacks.EarlyStopping(monitor='val_acc', min_delta=0, patience=10,verbose=1, mode='auto',restore_best_weights=True)]
                   )

Epoch 1/5


HBox(children=(IntProgress(value=0, max=80), HTML(value='')))

Metrics: loss: 3.0284 - acc: 0.1135 - val_loss: 1.9728 - val_acc: 0.3958

Epoch 2/5


HBox(children=(IntProgress(value=0, max=80), HTML(value='')))

Metrics: loss: 1.9670 - acc: 0.3686 - val_loss: 1.5774 - val_acc: 0.5016

Epoch 3/5


HBox(children=(IntProgress(value=0, max=80), HTML(value='')))

Metrics: loss: 1.7032 - acc: 0.4616 - val_loss: 1.6152 - val_acc: 0.4824

Epoch 4/5


HBox(children=(IntProgress(value=0, max=80), HTML(value='')))

Metrics: loss: 1.5748 - acc: 0.4892

In [None]:
def accuracy(predictions, answers):
    accs = np.zeros(len(answers))
    for i in range(0,len(answers)):
        if(predictions[i]==answers[i]):
            accs[i]=1
    return np.mean(accs)

In [None]:
with h5py.File("images_raw_doric_round1.h5",'r') as f:
    test_data=np.zeros((1000,depth,80,80))
    test_labels=np.zeros(1000)
    i=0
    #Starting index for batch samples
    index = 2000000
    while i < 1000:
        #Loop forward to add to the frame sequence
        cur_amount = 0
        while cur_amount < depth:
            label = f["/labels"][index]
            if label >= 0:
                test_data[i][cur_amount]=f["/frames/raw"][index]
                test_labels[i]=f["/labels"][index]
                index+=1
                cur_amount+=1
            else:
                index+=1
        i+=1
    test_data = test_data[...,None]

In [None]:
p = model.predict(test_data)
predicts = np.zeros(len(test_labels))
for i in range(0,len(p)):
    predicts[i]=np.argmax(p[i])

In [None]:
print(accuracy(predicts,test_labels))
print(metrics.f1_score(test_labels,predicts,average=None))

In [None]:
model_benchmarks = np.load("benchmarks.npy")
model_benchmarks.item()[name]=metrics.f1_score(test_labels,predicts,average=None)
np.save("benchmarks.npy",model_benchmarks)
print(model_benchmarks)

In [None]:
plt.plot(predicts)
plt.plot(test_labels)
#with h5py.File("images_raw_doric_round1.h5",'r') as f:
#    lbls = f["/labels"][:2000000:depth]
#plt.plot(lbls)
plt.xlim(800,830)
plt.show()

In [None]:
#Old Data Function -- using DataGenerator now.
def sort_data(data,labels,train_len,val_len,test_len,blocksize=1):
    curBlock = labels[0]
    train_data = np.zeros((train_len,80,80,1))
    val_data = np.zeros((val_len,80,80,1))
    test_data = np.zeros((test_len,80,80,1))
    train_labels = np.full(train_len,-1)
    val_labels = np.full(val_len,-1)
    test_labels = np.full(test_len,-1)
    train_ind = 0
    val_ind = 0
    test_ind = 0
    choices = np.arange(3)
    i=0
    cur_len = 0
    while i < len(data):
        start = i-cur_len
        iter_amt = len(data) if blocksize == 1 else blocksize
        while labels[i]==curBlock and cur_len < iter_amt:
            i+=1
            cur_len+=1
            if i == len(data):
                break
        end = i
        if not i == len(data):
            if labels[i]==curBlock:
                cur_len-=int(blocksize/2)
            else:
                cur_len=0
            curBlock = labels[i]
        if not blocksize == 1 and end-start < blocksize:
            continue
        cur_frames = data[start:end]
        cur_labels = labels[start:end]
        choice=-1
        if choices.size>0:
            choice = np.random.choice(choices)
        else:
            break
        if choice == 0:
            if train_ind + len(cur_frames) >= train_len:
                cur_frames = cur_frames[:train_len-train_ind]
                cur_labels = cur_labels[:train_len-train_ind]
                choices=np.delete(choices,np.argwhere(choices==0))
            train_data[train_ind:train_ind+len(cur_frames)]=cur_frames
            train_labels[train_ind:train_ind+len(cur_frames)]=cur_labels
            train_ind += len(cur_frames)
        elif choice == 1:
            if val_ind + len(cur_frames) >= val_len:
                cur_frames = cur_frames[:val_len-val_ind]
                cur_labels = cur_labels[:val_len-val_ind]
                choices=np.delete(choices,np.argwhere(choices==1))
            val_data[val_ind:val_ind+len(cur_frames)]=cur_frames
            val_labels[val_ind:val_ind+len(cur_frames)]=cur_labels
            val_ind += len(cur_frames)
        elif choice == 2:
            if test_ind + len(cur_frames) >= test_len:
                cur_frames = cur_frames[:test_len-test_ind]
                cur_labels = cur_labels[:test_len-test_ind]
                choices=np.delete(choices,np.argwhere(choices==2))
            test_data[test_ind:test_ind+len(cur_frames)]=cur_frames
            test_labels[test_ind:test_ind+len(cur_frames)]=cur_labels
            test_ind += len(cur_frames)
    return train_data[:train_ind-(train_ind%blocksize)],train_labels[:train_ind-(train_ind%blocksize)],val_data[:val_ind-(val_ind%blocksize)],val_labels[:val_ind-(val_ind%blocksize)],test_data[:test_ind-(test_ind%blocksize)],test_labels[:test_ind-(test_ind%blocksize)]

In [None]:
with h5py.File("images_raw_doric_round1.h5",'r') as f:
    print(f["/labels"][0:130])
data_gen.__getitem__(1)[1]

In [None]:
pl = model.predict(data_gen.__getitem__(518)[0])
plc = np.zeros(len(pl))
for i in range (0,len(pl)):
    plc[i]=np.argmax(pl[i])
plt.plot(plc)
plt.plot(data_gen.__getitem__(518)[1])
plt.show()