In [None]:
#For test:
DEBUG = True

#For train:
#DEBUG = False

In [1]:
# source: https://www.kaggle.com/nobletp/panda-keras-baseline

In [2]:
#test: python3 effnB3regr.py --cnnpar effnB3regr_test --mfolder effnB3test
#train: python3 effnB3regr.py --cnnpar effnB3regr --mfolder effnB3regr

In [3]:
import warnings
warnings.filterwarnings('ignore')
import os, argparse, sys
sys.path.append("..")
from glob import glob
from random import shuffle
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score
import pandas as pd 
import json
import skimage.io
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import Model, Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import Callback, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import efficientnet.tfkeras as efn
print('tensorflow version:', tf.__version__)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)
else:
    print('no gpus')

tensorflow version: 2.2.0
no gpus


In [4]:
os.environ['TF_KERAS'] = '1'
from keras_radam import RAdam

In [5]:
import panda_bvv_config
from panda_bvv_config import *

In [6]:
if DEBUG:
    #FOR TEST:
    cnnet = 'effnB3regr_test'
    model_save_folder = 'effnB3regr'
else:
    #FOR TRAIN:
    ap = argparse.ArgumentParser()
    ap.add_argument('--cnnpar', type=str, help="parameters name", dest = 'cnn_parameters', default = 'effnB2')
    ap.add_argument('--mfolder', help="folder to save model files", dest = 'mfolder', default = 'effnB2_model',\
                    type=str)
                    #("--cnn", type=str, help="training cnn name", dest = 'train_cnn')
    args = vars(ap.parse_args())
    cnnet = args["cnn_parameters"]
    model_save_folder = args["mfolder"]


In [8]:
full_model_save_folder_path = os.path.join(note_path, model_save_folder)
input_parameters = panda_bvv_config.train_dict.get(cnnet)

image_sizey = input_parameters.get('image_sizey')
image_sizex = input_parameters.get('image_sizex')
num_epochs = input_parameters.get('num_epochs')
num_reduceOnPlateu = input_parameters.get('num_reduceOnPlateu')
learn_rate = input_parameters.get('learn_rate')
stop_patience = input_parameters.get('stop_patience')
inp_label_smooth = input_parameters.get('inp_label_smooth')
our_id_label_map = input_parameters.get('id_label_map')
class_weights_ = input_parameters.get('class_weights')
output_bias = tf.keras.initializers.Constant(input_parameters.get('output_bias'))
BS = input_parameters.get('BS')
s_per_epoch = input_parameters.get('s_per_epoch')
val_steps = input_parameters.get('val_steps')
model_name = input_parameters.get('model_name')
checkpoint_name = input_parameters.get('checkpoint_name')
weights_file = input_parameters.get('weights_file')
file_for_struct = input_parameters.get('file_for_struct')
file_for_weights = input_parameters.get('file_for_weights')
history_file = input_parameters.get('history_file')
save_plot_file = input_parameters.get('save_plot_file')
num_logits = input_parameters.get('num_logits')
from_folder = os.path.join(base_path, input_parameters.get('from_folder'))
#if input_parameters.get('bestmodel_weights'): 
bestmodel_weights = input_parameters.get('bestmodel_weights')
#if input_parameters.get('level0_file'): 
level0_file = input_parameters.get('level0_file')
input_shape_ =(image_sizey, image_sizex , 3)
TrDataGen = input_parameters.get('trdatagen') 
ValDataGen = input_parameters.get('valdatagen') 

In [9]:
def get_preds_and_labels(model, generator):
    """
    Get predictions and labels from the generator
    
    :param model: A Keras model object
    :param generator: A Keras ImageDataGenerator object
    
    :return: A tuple with two Numpy Arrays. One containing the predictions
    and one containing the labels
    """
    preds = []
    labels = []
    for _ in range(val_steps):
        x, y = next(generator)
        preds.append(model.predict(x))
        labels.append(y)
    # Flatten list of numpy arrays
    return np.concatenate(preds).ravel(), np.concatenate(labels).ravel()


In [10]:
class CohCap(Callback):
    """
    A custom Keras callback for saving the best model
    according to the Quadratic Weighted Kappa (QWK) metric
    """
    metrics_name = os.path.join(full_model_save_folder_path,'kohen_kappa_best.h5')
    kappa_history_file = os.path.join(full_model_save_folder_path,'kappa_history')
    
    def on_train_begin(self, logs={}):
        """
        Initialize list of QWK scores on validation data
        """
        self.val_kappas = []

    def on_epoch_end(self, epoch, logs={}):
        """
        Gets QWK score on the validation data
        
        :param epoch: The current epoch number
        """
        # Get predictions and convert to integers
        y_pred, labels = get_preds_and_labels(model, val_datagen)
        y_pred = np.rint(y_pred).astype(np.uint8).clip(0, 4)
        # We can use sklearns implementation of QWK straight out of the box
        # as long as we specify weights as 'quadratic'
        _val_kappa = cohen_kappa_score(labels, y_pred, weights='quadratic')
        self.val_kappas.append(_val_kappa)
        with open(self.kappa_history_file, 'a') as kh:
                kh.write(f'{_val_kappa}\n')
        print(f"val_kappa: {round(_val_kappa, 4)}")        
        if _val_kappa == max(self.val_kappas):
            print("Validation Kappa has improved. Saving model.")
            self.model.save(self.metrics_name)
            
        return

In [11]:
kappa_metrics = CohCap()

In [12]:
train, val = train_test_split(glob(from_folder +'/*.png'), 
                              test_size= val_size_proportion, 
                              random_state=random_state_split)

In [13]:
print('CNN input parameters:\n') 
for k, v in input_parameters.items():
    if k != 'id_label_map':
        print('{}: {}'.format(k, v))

CNN input parameters:

image_sizey: 320
image_sizex: 320
num_epochs: 2
num_earlyStop: 2
num_reduceOnPlateu: 8
learn_rate: 0.0005
stop_patience: 14
inp_label_smooth: 0.01
BS: 10
s_per_epoch: 20
val_steps: 8
class_weights: {0: 1.0, 1: 1.0847711927981996, 2: 2.1533879374534624, 4: 2.3154523618895118, 3: 2.3285024154589373, 5: 2.3627450980392157}
output_bias: [2.448 2.367 1.681 1.603 1.608 1.588]
model_name: model_panda.h5
checkpoint_name: model_effnB3_panda_check
weights_file: efficientnet-b3_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5
bestmodel_weights: None
level0_file: effnB3_check20_best_level0_weights.npy
file_for_struct: model_effnB3_panda_struct.json
file_for_weights: model_effnB3_panda_weights.json
history_file: history_effnB3.json
save_plot_file: plot_edu_effnb3.png
from_folder: testdata320/testf
num_logits: 6
trdatagen: <function LinRegr.<locals>.wrapper at 0x7f615a5dfd90>
valdatagen: <function LinRegr.<locals>.wrapper at 0x7f615a5dfd90>


In [14]:
#full_model_save_folder_path = os.path.join(note_path, model_save_folder)
if not os.path.exists(full_model_save_folder_path):
    print("[INFO] 'creating {}' directory".format(model_save_folder))
    os.makedirs(full_model_save_folder_path)
model_name = os.path.join(full_model_save_folder_path, model_name)
checkpoint_name = os.path.join(full_model_save_folder_path, checkpoint_name)

weights_file = os.path.join(note_path, weights_file) #!:not the same path

file_for_struct = os.path.join(full_model_save_folder_path, file_for_struct)
file_for_weights = os.path.join(full_model_save_folder_path, file_for_weights)
history_file = os.path.join(full_model_save_folder_path, history_file)
save_plot_file_main = os.path.join(full_model_save_folder_path, 'acc_' + save_plot_file)
save_plot_file_kappa = os.path.join(full_model_save_folder_path, 'kappa_' + save_plot_file)

In [15]:
# train_datagen = TrDataGen(
#         train_cnn,
#         image_sizey, image_sizex,
#         batch_size_= BS,
#         shuffle_=True)
# val_datagen = ValDataGen(
#         valid_cnn,
#         image_sizey, 
#         image_sizex,
#         batch_size_ = BS,
#         shuffle_=False)

In [16]:
train_datagen = TrDataGen(list_files = train,
                    id_label_map = our_id_label_map, 
                    batch_size = BS,
                    depth = num_logits,
                    augment=True,
                    shuf = False)
val_datagen = ValDataGen(list_files = val, 
                    id_label_map = our_id_label_map, 
                    batch_size = BS, 
                    depth = num_logits,
                    augment=False,
                    shuf = False)

In [17]:
#skimage.io.imshow(tt[0][6])

In [18]:
callbacks_list = [
#         EarlyStopping(
        # Stop training when `val_loss` is no longer improving
#         monitor='val_loss',
#         mode = 'min',
#         min_delta=1e-2,
#         patience=stop_patience,
#         verbose=1,
#         restore_best_weights = True
#         ),
        
        ModelCheckpoint(
        filepath= checkpoint_name +".{epoch:02d}.h5",
        monitor='val_loss',
        mode = 'auto',
        save_weights_only = False,
        save_freq = 'epoch',
        save_best_only=False
        ),
    
        ReduceLROnPlateau(
        monitor='val_loss',
        factor=.5,
        patience = num_reduceOnPlateu,
        verbose=1,
        min_lr=1e-7,
        epsilon=0.0001,
        ),
    kappa_metrics 
]



In [19]:
#tf.compat.v1.disable_eager_execution()

In [20]:
bottleneck = efn.EfficientNetB3(
    input_shape=input_shape_,
    weights= None,
    include_top=False, 
    pooling='avg'
)
bottleneck = Model(inputs=bottleneck.inputs, outputs=bottleneck.layers[-2].output)
model = Sequential()
model.add(bottleneck)
model.add(GlobalAveragePooling2D())
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dropout(.25))
model.add(Dense(512, activation='elu'))
model.add(BatchNormalization())
model.add(Dropout(.25))
model.add(Dense(num_logits, activation= 'elu', bias_initializer=output_bias))
model.add(Dense(1, activation="linear"))


In [21]:
if bestmodel_weights: model.load_weights(bestmodel_weights)
if level0_file: model.layers[0].set_weights(\
                                np.load(level0_file, allow_pickle=True)
                                           )


In [22]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model (Model)                (None, 10, 10, 1536)      10783528  
_________________________________________________________________
global_average_pooling2d (Gl (None, 1536)              0         
_________________________________________________________________
flatten (Flatten)            (None, 1536)              0         
_________________________________________________________________
batch_normalization (BatchNo (None, 1536)              6144      
_________________________________________________________________
dropout (Dropout)            (None, 1536)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               786944    
_________________________________________________________________
batch_normalization_1 (Batch (None, 512)               2

In [23]:
opt = RAdam(total_steps=5000, warmup_proportion=0.1, min_lr=1e-5)

In [24]:
model.compile(
    loss='mse',
    #loss='categorical_crossentropy',
    optimizer=opt,
    metrics=['mse', 'acc']
)

In [26]:
history = model.fit_generator(
    train_datagen,
    steps_per_epoch=s_per_epoch,
    validation_data=val_datagen,
    validation_steps=val_steps,
    class_weight=class_weights_,
    callbacks=callbacks_list,
    epochs=num_epochs,
    verbose=1
)

In [None]:
dict_to_save = {}
for k, v in history.history.items():
    dict_to_save.update({k: [np.format_float_positional(x) for x in history.history[k]]})
with open(history_file, 'w') as file:
    json.dump(dict_to_save, file)


In [None]:
model.save(model_name)
model.save_weights(file_for_weights, save_format="h5")
json_config = model.to_json()
with open(file_for_struct, 'w') as f:
    json.dump(json_config, f)

In [None]:
plt.style.use("ggplot")
plt.figure()

plt.plot(np.arange(0, num_epochs), history.history['mse'], label='mse')
plt.plot(np.arange(0, num_epochs), history.history['val_mse'], label='val_mse')
plt.plot(np.arange(0, num_epochs),history.history['acc'], label='accuracy')
plt.plot(np.arange(0, num_epochs),history.history['val_acc'], label='val_accuracy')
plt.plot(np.arange(0, num_epochs),history.history['loss'], label='loss')
plt.plot(np.arange(0, num_epochs),history.history['val_loss'], label='val_loss')
plt.title("Training Loss, MSE and Accuracy on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("MSE/Accuracy/Loss")
plt.legend(loc="lower left")
plt.savefig(save_plot_file_main)

In [None]:
# #kappa
# plt.style.use("ggplot")
# plt.figure()

# plt.plot(np.arange(0, num_epochs), history.history['val_kappa'], label='validation kappa')
# #plt.plot(np.arange(0, num_epochs), history.history['val_cohen_kappa'], label='val_kappa')

# plt.title("Training Cohen Kappa on Dataset")
# plt.xlabel("Epoch #")
# plt.ylabel("Cohen Kappa")
# plt.legend(loc="lower left")
# plt.savefig(save_plot_file_kappa)

In [27]:
import os
module_name = 'effnB3regr'

os.system('jupyter nbconvert --to python ' + module_name + '.ipynb')
with open(module_name + '.py', 'r') as f:
    lines = f.readlines()
with open(module_name + '.py', 'w') as f:
    for line in lines:
        if 'nbconvert --to python' in line:
            break
        else:
            f.write(line)