# FO-731_Pre-train_JSN-prediction_classifier_1
Training JSN-prediction classifier using pre-trained ResNet Autoencoder<br>


author = MV<br>
date = 2021-10-11<br>

_______________________________________

Training with image data and all numeric features

# Imports

In [None]:
! nvidia-smi

In [None]:
# this defines the GPU you are using
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [None]:
# add paths for dnn2 and labelbox-connector
import sys
sys.path.insert(1, "/srv/dnn-framework")
sys.path.insert(1, "/srv/labelbox-connector")

In [None]:
#general
import matplotlib.pylab as plt
import matplotlib
import numpy as np
import pandas as pd
import os
import logging
import cv2
import albumentations as A

#dnn
import keras.backend as K
import tensorflow as tf
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, TensorBoard, EarlyStopping
from keras.optimizers import Adam
from keras.layers import Conv2D
from keras.models import Model, load_model

#framework
from framework.utils.app_base import DnnApp
from framework.utils import Landmarks, BoundingBox, img_utils
from framework.loader import LmBlobBatchGenerator
from framework.dataset import DatasetAugmented
from framework.preprocessing import preprocessing, image_processing
from framework.augmentor import AugmentorImage

from imgaug import augmenters as iaa
from sklearn.metrics import confusion_matrix, mean_squared_error, cohen_kappa_score, roc_auc_score, roc_curve, log_loss
from sklearn.metrics import precision_recall_fscore_support, classification_report
import itertools

from datetime import date

In [None]:
import keras
from keras.applications.resnet50 import ResNet50
from keras.models import Model

from network.iblmodel import IBLModel as Model
from network.RetinaNet.resnet import ResNet2D50
from keras.layers import Input, Dense, LeakyReLU, Dropout, Concatenate

In [None]:
#configs
%matplotlib inline
logging.basicConfig(format='%(asc' 'time)s %(name)-25s %(level' 'name)-8s %(message)s')
logging.getLogger().setLevel(logging.INFO) # you change this to logging.DEBUG to get more logging information

In [None]:
#session config - if needed
import tensorflow.keras.backend as K
config = tf.ConfigProto()
#config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.5
K.set_session(tf.Session(config=config))

In [None]:
df = pd.read_csv('/srv/Class_def_files/Nonan_Master_20JSN_XML_ex014_2.csv')

In [None]:
len(df)

In [None]:
df.columns

# Data Loading

## Load Yaml

In [None]:
# load data
yaml_path= '/srv/Class_def_files/FO-731_Pre-training_JSN20%_Numeric.yaml'
app = DnnApp(yaml_path)

## Define Batchgenerator

In [None]:
class JSNClassBatchGenerator(LmBlobBatchGenerator):
    """Batch Generator for Classifing JSN into slow and fast progressors"""
    
    def __init__(self,
                 dataset=None,
                 batch_size=None,
                 in_columns=None,
                 out_columns=None,
                 shuffle=False):
        """
        Override of init
        """
        super().__init__(dataset, batch_size, {"img_path_pro":"img_path_pro"},{"img_path_pro":"img_path_pro"}, shuffle)
        self.classf_name = "class"
        self.file_name = "img_path_pro"
        self.numeric_names = [
            'Patient Sex_S',
            'BMI_S',
            'Patient Age_S',
            'Hip_symptoms_S',
            'WOMAC_dis_S',
            'WOMAC_pain_S',
            'WOMAC_stiff_S',
            'other_knee_KOA_S',
            'KL-grade_a_S']
        self.output_channels_classf = 2
        
        # augmentation
        self.rotate = None # rotate the img l/r with max this angle
        self.p_augment = 0.0 # probability with which to augment      
        self.augmentation = False
        
        # preprocessing
        self.my_size = [512, 1024]
        self.current_batch_size = 4
        self.clahe = cv2.createCLAHE(clipLimit=2., tileGridSize=(3,7)) # 2, (16,16)
        
    @staticmethod
    def gaussian(x, mu, sig):
            return np.exp(-np.power(x - mu, 2.) / (2 * np.power(sig, 2.)))    
        
    def do_augment(self, image, out_size):        
        # image augment            
        if np.random.uniform() < self.p_augment:    
            aug = A.Compose([A.RandomBrightness(p=0.5),
                       A.RandomGamma(p=0.5),
                       A.RandomContrast(limit=0.7,p=0.5),  
                       A.ShiftScaleRotate(shift_limit=0.0325, scale_limit=0.15, rotate_limit=0, interpolation=1, border_mode=cv2.BORDER_CONSTANT, always_apply=False, p=0.5),
                       A.GaussNoise(p=0.3),
                       A.Rotate(limit=self.rotate, interpolation=1, border_mode=cv2.BORDER_CONSTANT, always_apply=False, p=0.5),
                       A.GridDistortion(num_steps=3, distort_limit=0.05, p=0.5, border_mode=cv2.BORDER_CONSTANT),
                      ]) 
                                       
            augmented = aug(image=image)
            image = augmented['image']
            
        return image
    
    def next_internal(self):
        """
        Internal function of the next operator. The output is NOT formatted so
        that it can be used to train keras models in a tuple, it is a stack of
        images/landmarks. Formatting needs to happen in the operator call.

        :return: stack of images, stack of labels
        """
        indices = self._get_next_chunk()
        
        data_frame = pd.DataFrame(index=indices, columns=[self.file_name, self.classf_name] + self.numeric_names)
        
        logging.debug("JSNClassBatchGenerator: next internal dataset_name: %s" % self.dataset.dataset_name)
        out_size = tuple(self.my_size)
        
        for idx in indices:
            try:
                row = self.dataset[idx]
                image = row[self.file_name]
                res_image = None
                res_image = cv2.normalize(image, res_image, 0, 255, cv2.NORM_MINMAX, cv2.CV_32FC1)            
                row[self.file_name] = res_image
                row[self.classf_name] = row[self.classf_name]                
                
                #print({k:v for k,v in row.items() if k in self.numeric_names})        
                data_frame.loc[idx] = row

            except Exception as e:
                logging.exception("ResNetClassBatchGenerator: Row idx {0}: {1}".format(idx, e))     
        
        self.current_batch_size = data_frame.shape[0]
        
        inputs = np.stack(data_frame[self.file_name]).astype(np.float32)
        outputs = np.stack(data_frame[self.classf_name]).astype(np.float32)
        
        inputs = {"input_2": inputs.reshape(self.input_shape)}#, "numeric": inputs.reshape(self.input_shape)}
        inputs["numeric"] = np.stack(np.array(data_frame[self.numeric_names]))#.astype(np.float32)
        
        return inputs, outputs
    
    
    @property
    def out_shape(self):
        return self.current_batch_size, 1
    
    @property
    def input_shape(self):
        return self.current_batch_size, self.my_size[1], self.my_size[0], self.input_channels
    
    def __next__(self):
        """
        Call of the batch generator by keras, Override

        :returns input images, output blobs
        """
        inputs, outputs = self.next_internal()
        
        return inputs, outputs.reshape(self.out_shape)

## Define the BG TTT

In [None]:
train = DatasetAugmented('train', app.datasets['data_train'].all, None)
tune = DatasetAugmented('tune', app.datasets['data_tune'].all, None)
test = DatasetAugmented('test', app.datasets['data_test'].all, None)

In [None]:
batch_sz = 4
bg_train = JSNClassBatchGenerator(train, batch_size=batch_sz, shuffle=True)
bg_tune = JSNClassBatchGenerator(tune, batch_size=batch_sz, shuffle=True)
bg_test = JSNClassBatchGenerator(test, batch_size=batch_sz, shuffle=False)

In [None]:
bg_train.__next__()

In [None]:
#bg_train.rotate = 15
#bg_train.p_augment = 0.5  
bg_train.augmentation = False

# Create and Load Model

In [None]:
from network.RetinaNet.resnet_classifier import ResNetClassifier

## Load the ResNet50 Classifier Weights

In [None]:
path_weights_resnet50 = "/srv/dnn-framework/FO-719_RetinaNet_pretrain/res_net_Clf_run_Sep-21-2021.hdf5"

In [None]:
ResNet_clf = ResNetClassifier(num_classes = [2,7], in_size = (1024,512,1)).create()
ResNet_clf.summary()

## Remove and add new layers to model

In [None]:
m = 9 # len numeric
inputs_2 = Input(shape=(m,), name="numeric")
dense = Dense(32 * m, activation="relu")(inputs_2)
dropout = Dropout(0.3)(dense)

In [None]:
inputs_2

In [None]:
JSN_clf = ResNetClassifier(num_classes=[1], in_size = (1024,512,1)).create(head='default')
JSN_clf.layers.pop()
n=2048
inputs = JSN_clf.input

#x = keras.layers.Dropout(0.5, name="Dropout_0.5")(JSN_clf.layers[-1].output)
x = keras.layers.Dense(n, activation=None)(JSN_clf.layers[-1].output)
x = LeakyReLU()(x)
x = Dropout(0.3)(x)
x = Concatenate()([dropout, x])

x = keras.layers.Dense(n//2, activation=None)(x)
x = LeakyReLU()(x)
x = Dropout(0.3)(x)

x = keras.layers.Dense(1, activation='sigmoid',name="jsn")(x)

JSN_clf = Model([inputs, inputs_2], x)
JSN_clf.summary()

In [None]:
JSN_clf.inputs, JSN_clf.outputs

In [None]:
n

In [None]:
fast_n_dirty = {"input_2":np.random.uniform(size=(1,1024,512,1)), 'numeric':np.random.uniform(size=(1,m))}
JSN_clf.predict(fast_n_dirty)

### Transfer learning

In [None]:
for i, (layer_resnet, layer_jsn) in enumerate(zip(ResNet_clf.layers, JSN_clf.layers)):
    print(i, end='\r')
    if i>188:
        break
    layer_jsn.set_weights(layer_resnet.get_weights())

### Freezing layers
Do not allow already good layers to train

In [None]:
for i, layer in enumerate(JSN_clf.layers):
    print(f"{i}:{layer.name}")

# Train
## Define the training Schedule

In [None]:
today = date.today()
path_tensorboard = "/srv/dnn-framework/logs/tensorboard/"

#old_weights = './JSN_clf_run_Dec-15-2021_0_Inge_NoAug.hdf5'
#JSN_clf.load_weights(old_weights)

In [None]:
# First run
f_num = 0
# define tag
tag = 'JSN_clf_run_{0}_{1}_Jan_NoAug'.format(today.strftime("%b-%d-%Y"),f_num)
print(tag)

#callbacks and optimizer
bg_train.batch_size = 8
reduce_call_back = ReduceLROnPlateau(monitor='val_loss',
                                         factor=0.5,
                                         patience=5,
                                         verbose=1,
                                         mode='min',
                                         cooldown=2,
                                         min_lr=1e-012)

model_chkpnt_call_back = ModelCheckpoint(tag+".hdf5",
                                         monitor='val_loss',
                                         verbose=1,
                                         save_best_only=True)

tb_call_back = TensorBoard(log_dir=os.path.join(path_tensorboard, tag),
                           histogram_freq=0,
                           write_graph=True,
                           write_images=True,
                           write_grads=True)

early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=12,
    verbose=1
)

callbacks = [#reduce_call_back,
             model_chkpnt_call_back,
             tb_call_back,
             #early_stopping
            ]

bg_train.batch_size = 4
bg_tune.batch_size = 4
bg_train.augmentation = False
bg_train.current_batch_size

opt = Adam(lr=1e-05)
JSN_clf.compile(optimizer=opt, loss='binary_crossentropy',
              metrics={'jsn': tf.keras.metrics.AUC()})

# Train
JSN_clf.fit_generator(bg_train, len(bg_train),  # Nitems in train set
                    300,  # total epochs
                    verbose=1,
                    # validation set gen (img, labels)
                    validation_data=bg_tune,
                    # Nitems in validation set
                    validation_steps=len(bg_tune),
                    callbacks=callbacks,
                    initial_epoch=0)

# Evaluate 

In [None]:
from framework.validation.validation_classification import ClassificationValidator


In [None]:
old_tag = './JSN_clf_run_Nov-25-2021_188_Ferdi_NoAug.hdf5'
JSN_clf.load_weights(old_tag)

In [None]:
JSN_clf_val_bg = ClassificationValidator(JSN_clf)
JSN_clf_val_bg.evaluate(bg_test)

In [None]:
true_y = np.array([0])
pred_y = np.array([0])
for _ in range(len(bg_test)):
    i, c = next(bg_test)
    true_y = np.vstack((true_y,c))
    pred_y = np.vstack((pred_y,JSN_clf.predict(i)))

In [None]:
pred_y[0]

In [None]:
from sklearn.metrics  import  confusion_matrix

In [None]:
confusion_matrix(true_y, pred_y>0.5)

In [None]:
from sklearn.metrics import roc_curve,auc

In [None]:
fpr, tpr, t = roc_curve(true_y, pred_y)
auc(fpr,tpr)

In [None]:
plt.ylim((0,1))
plt.xlim((0,1))

plt.plot(fpr,tpr)
plt.plot([0,1],[0,1])
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
#plt.plot(fpr,t)

In [None]:
df = pd.read_csv('/srv/Class_def_files/Nonan_Master_20JSN_XML_ex014_2_OAI.csv')
df[['class']]

In [None]:
bg_test.dataset.data['class'].apply(lambda x: x.data)

In [None]:
JSN_clf_val_bg.print_error_stats()

In [None]:
JSN_clf_val_bg.show_confusion_matrix()

In [None]:
JSN_clf_val_bg.result

In [None]:
def show_confusion_matrix(cm, classes, fs=(6, 6)):
        plt.rcParams.update({'font.size': 16})
        plt.figure(figsize=fs)
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Greens, resample=False)

        tick_marks = np.arange(len(classes))
        plt.yticks(tick_marks, classes)
        plt.xticks(tick_marks, classes)
        thresh = cm.max() / 2.
        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
            plt.text(j, i, np.round(cm[i, j] * 100, 1),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > 1.5 * thresh else "black")

        plt.tight_layout()
        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.show()
        
def make_binary(arr, thresh=0.5):
    return (np.asarray(arr) > thresh).astype(int)

In [None]:
bg_use = bg_test
#bg_use.batch_size = 248

In [None]:
bg

In [None]:
all_labels = []
all_preds = []

for i in range(len(bg_use)):
    imgs, labs = next(bg_use)
    lab=labs['jsn']
    
    pred = JSN_clf.predict(imgs)
    
    all_labels += np.argmax(lab, axis=1).flatten().tolist()
    all_preds += np.argmax(pred, axis=1).flatten().tolist()
    
    print(f"Mini Batch {i+1}/{len(bg_use)}", end='\r')

In [None]:
all_preds
