In [1]:
import h5py
import random
import numpy as np
import tensorflow as tf
from datetime import datetime
import H5pyHelper
from tensorflow.keras import layers, models, regularizers, optimizers
#from sklearn.metrics import accuracy_score

In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

hf = h5py.File('/workspaces/flora_dex/h5_files/copies/data.h5', 'r')
BATCH_SIZE = 32
NUM_CLASSES = hf['y_train'][0].shape[0] ## gets number of classes
INP_SHAPE = hf['x_train'][0].shape
TRAIN_SIZE = hf["x_train"].shape[0]
TEST_SIZE = hf["x_test"].shape[0]


hf.close()

Num GPUs Available:  0


In [3]:
# A memory-mapped array is kept on disk. However, it can be accessed and sliced like any ndarray.
# Memory mapping is especially useful for accessing small fragments of large files without reading
# the entire file into memory.

#def generator(feature_name,label_name,batch_size,shuffle):
#    index = 0
#    
#    hf = h5py.File('/home/sorozco0612/dev/flora_dex/raw_data/data.h5', 'a')
#    
#    # shuffle data
#    if (shuffle):
#        print("Shuffling data before starting training...")
#        #random.seed(datetime.now())
#
#        #random.shuffle(hf[feature_name])
#        #random.shuffle(hf[label_name])
#    
#    while True:
#        if index == 0:
#    
#            x = hf[feature_name]
#            y = hf[label_name] 
#            
#            ## create shuffle index for each batch
#            idx_map = np.arange(x.shape[0])
#            np.random.shuffle(idx_map)
#        
#        # batch has not met the end of the data
#        if (index + batch_size < x.shape[0]):
#            batch = sorted(idx_map[index:index+batch_size])
#            features = x[batch]
#            labels = y[batch]
#
#            index += batch_size
#        else:
#            # batch size will be smaller than the rest on last iteration 
#            
#            batch = sorted(idx_map[index:])
#            features = x[batch]
#            labels = y[batch]
#            
#            idx_map = np.arange(features.shape[0])
#            np.random.shuffle(idx_map)
#
#            index = 0
#            
#            ## close file so it can be reshuffled
#            hf.close()
#        
#        ## shuffle just the batches
#        #features = features[idx_map]
#        #labels = labels[idx_map]
#        
#        yield (features,labels)
        
        
def generator(feature_name,label_name,batch_size,shuffle):
    index = 0
    
    
    if (shuffle):
        print("Shuffling data before starting training...")
        seed = datetime.now()
        
        H5pyHelper.shuffle_dataset(
                    "/home/sorozco0612/dev/flora_dex/raw_data/data.h5", feature_name, seed
                )
        H5pyHelper.shuffle_dataset(
                    "/home/sorozco0612/dev/flora_dex/raw_data/data.h5", label_name, seed
                )
    while True:
        
        if index == 0:
            hf = h5py.File('/home/sorozco0612/dev/flora_dex/raw_data/data.h5', 'r')
    
            x = hf[feature_name]
            y = hf[label_name]  
        
        # batch has not met the end of the data
        if (index + batch_size < x.shape[0]):
            features = x[index:index+batch_size]
            labels = y[index:index+batch_size]
            index += batch_size
        else:
            # batch size will be smaller than the rest on last iteration
            features = x[index:]
            labels = y[index:]
            index = 0
            
            ## close file so it can be reshuffled
            hf.close()
            
        yield (features,labels)   

train_generator = generator('x_train', 'y_train',BATCH_SIZE,True)
test_generator = generator('x_test','y_test',BATCH_SIZE,False)

In [4]:
## alex net
model = models.Sequential([
    layers.Conv2D(filters=96, kernel_size=(11,11), strides=(4,4), activation='relu', input_shape=(500,500,3)),
    layers.BatchNormalization(),
    layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    layers.Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), activation='relu', padding="same"),
    layers.BatchNormalization(),
    layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    layers.BatchNormalization(),
    layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    layers.BatchNormalization(),
    layers.Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    layers.BatchNormalization(),
    layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    layers.Flatten(),
    layers.Dense(4096, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(4096, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(NUM_CLASSES, activation='softmax')
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 123, 123, 96)      34944     
_________________________________________________________________
batch_normalization (BatchNo (None, 123, 123, 96)      384       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 61, 61, 96)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 61, 61, 256)       614656    
_________________________________________________________________
batch_normalization_1 (Batch (None, 61, 61, 256)       1024      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 30, 30, 256)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 30, 30, 384)       8

In [5]:
#model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.optimizers.SGD(lr=0.001), metrics=['accuracy'])
model.compile(loss='categorical_crossentropy', optimizer=tf.optimizers.SGD(lr=0.001), metrics=['accuracy'])

In [6]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=3)
fit = model.fit(x=train_generator,
                steps_per_epoch= TRAIN_SIZE // BATCH_SIZE,
                callbacks=[callback], 
                validation_steps= TEST_SIZE // BATCH_SIZE,
                validation_data=test_generator,
                verbose=1,
                epochs=250)

Shuffling data before starting training...
'x_train' chunk has shape:(500, 500, 500, 3)
'x_train' chunk has shape:(1000, 500, 500, 3)
'x_train' chunk has shape:(1500, 500, 500, 3)
'x_train' chunk has shape:(2000, 500, 500, 3)
'x_train' chunk has shape:(2500, 500, 500, 3)
'x_train' chunk has shape:(3000, 500, 500, 3)
'x_train' chunk has shape:(3500, 500, 500, 3)
'x_train' chunk has shape:(4000, 500, 500, 3)
'x_train' chunk has shape:(4500, 500, 500, 3)
'x_train' chunk has shape:(5000, 500, 500, 3)
'x_train' chunk has shape:(5500, 500, 500, 3)
'x_train' chunk has shape:(6000, 500, 500, 3)
'x_train' chunk has shape:(6500, 500, 500, 3)
'x_train' chunk has shape:(7000, 500, 500, 3)
'x_train' chunk has shape:(7500, 500, 500, 3)
'x_train' chunk has shape:(8000, 500, 500, 3)
'x_train' chunk has shape:(8500, 500, 500, 3)
'x_train' chunk has shape:(9000, 500, 500, 3)
'x_train' chunk has shape:(9500, 500, 500, 3)
'x_train' chunk has shape:(10000, 500, 500, 3)
'x_train' chunk has shape:(10500, 500

In [7]:
def summarize_prediction(Y_true, Y_pred):
    #mse = mean_squared_error(Y_true, Y_pred)
    #accuracy = accuracy_score(Y_true, Y_pred)
    #print(f'mse       = {mse:.2}')
    print(f'accuracy = {accuracy:.2%}')
    
def predict_and_summarize(X, Y):
    Y_pred = model.predict(X)
    #summarize_prediction(Y, Y_pred)
    return Y_pred

hf = h5py.File('/home/sorozco0612/dev/flora_dex/raw_data/data.h5', 'r')
x_test = hf["x_test"]
y_test = hf["y_test"]

y_pred = predict_and_summarize(x_test, y_test)

print(y_pred[:1])
print(y_test[:1])

hf.close()

[[7.78542657e-04 6.25980692e-03 9.49958712e-02 1.29004084e-05
  6.21830477e-05 5.54712933e-05 2.84315720e-05 3.81550693e-04
  1.92087628e-02 2.98860745e-04 1.75059104e-05 3.02061118e-04
  1.28012864e-04 7.46248243e-03 2.92857840e-05 6.32313604e-04
  2.79471148e-02 5.68286690e-04 1.14611199e-03 1.32118255e-01
  4.49791871e-04 1.97634473e-03 5.33298917e-05 3.50706745e-04
  2.56427709e-04 5.84588743e-05 2.45174942e-05 2.47118487e-06
  5.11093007e-04 3.84321646e-03 1.58055336e-03 1.88186527e-06
  2.32173683e-04 1.27475723e-05 4.45285914e-05 2.21634018e-05
  5.67492934e-05 2.46388634e-04 3.09811076e-05 8.77393322e-05
  4.78481001e-04 5.64316381e-03 1.18022166e-04 1.27222916e-06
  9.34274431e-05 2.84610323e-05 2.83514528e-04 2.32970024e-06
  1.88430408e-06 6.76734408e-06 8.66164701e-05 3.97296157e-03
  1.50364509e-03 5.61339766e-06 7.86752134e-05 8.37487678e-05
  3.24041976e-05 1.74921606e-05 7.08757943e-06 1.66228929e-05
  1.90709488e-05 2.80225231e-06 3.80638812e-04 1.44820660e-03
  2.6348