In [None]:
import os, math, time
import numpy as np
import matplotlib.pyplot as plt


# seed for reproducibility
import random 
import tensorflow as tf
from keras import backend as K
seed_value=101;
os.environ['PYTHONHASHSEED']=str(seed_value)
np.random.seed(seed_value)
random.seed(seed_value)
tf.random.set_seed(seed_value)


import keras
from keras import models, layers
from tensorflow.keras.callbacks import Callback
from IPython.display import clear_output
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics

from sklearn.utils import class_weight
from keras.preprocessing.image import ImageDataGenerator

%matplotlib inline

In [None]:
# parameters
NUM_TARGET_LABELS = 5

file_path = './'

fname_image = '../data/data_image.npy'
fname_labels = '../data/labels.npy'
label_names = ['com','cod','ca','st','ua','au','cy','br','others']

exp_name = 'ResNet152'
result_path = f'../result/{exp_name}'

In [None]:
######################## IF WINDOWS,
gpu = tf.config.experimental.list_physical_devices('GPU')
try:
    tf.config.experimental.set_memory_growth(gpu[0], True) 
except RuntimeError as e:
    print(e) 

In [None]:
####################################################
#######  EXECUTE ONLY IF USING GOOGLE COLAB ########
#from google.colab import drive
#drive.mount('/content/gdrive/')

#ile_path = './gdrive/My Drive/kidney_stone/code'
#name_image = os.path.join(file_path, fname_image)
#name_labels = os.path.join(file_path, fname_labels)
####################################################

In [None]:
### set data and labels
# load data and labels
data = np.load(fname_image)
labels = np.load(fname_labels)

# set unique labels
label_unique = np.array([list(x) for x in set(tuple(x) for x in labels)])
label_unique_indices = np.array([list(np.where((labels == val).all(axis=1)))[0] for val in label_unique])
label_unique_counts = np.array([len(x) for x in label_unique_indices])

# sort by frequency
temp_ind = label_unique_counts.argsort()[::-1]
label_class_info = label_unique[temp_ind]
label_class_indices = label_unique_indices[temp_ind]
label_class_counts = label_unique_counts[temp_ind]

# final label and info
label_final = labels[:,0]
for label_i in range(len(label_unique_counts)):
    label_final[label_class_indices[label_i]] = label_i

label_info = [label_class_info[x.astype(int)] for x in label_final]


# cut data and labels
target_ind = np.array(label_final) >= NUM_TARGET_LABELS
label_other = np.array(label_final)[target_ind]
data_other = data[target_ind] / 255.0

target_ind = np.array(label_final) < NUM_TARGET_LABELS
label_final = np.array(label_final)[target_ind]
data_final = data[target_ind] / 255.0


In [None]:
### model and function definition

# model class
class Network():
  start_time = 0

  num_output_label = 4

  def __init__(self, num_output_label=4):
    self.num_output_label = num_output_label

  def create(self):
    self.model = models.Sequential()
    self.model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(160, 160, 3)))
    self.model.add(layers.MaxPooling2D((2, 2)))
    self.model.add(layers.Dropout(0.25))

    self.model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    self.model.add(layers.MaxPooling2D((2, 2)))
    self.model.add(layers.Dropout(0.25))

    self.model.add(layers.Conv2D(256, (3, 3), activation='relu', padding='same'))
    self.model.add(layers.MaxPooling2D((2, 2)))
    self.model.add(layers.Dropout(0.25))

    self.model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same'))
    self.model.add(layers.MaxPooling2D((2, 2)))
    self.model.add(layers.Dropout(0.25))

    self.model.add(layers.Flatten())
    self.model.add(layers.Dense(256, activation='relu'))
    self.model.add(layers.Dropout(0.3))

    self.model.add(layers.Dense(self.num_output_label, activation='softmax'))

    self.model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# training history callback class
class TrainingHistoryCallback(Callback):

  def __init__(self, fold_i, x_test, y_test, save_path, visualize=True):
    self.fold_i = fold_i
    self.x_test = x_test
    self.y_test = y_test
    self.save_path = save_path
    self.visualize = visualize

  def on_train_begin(self, logs={}):
    self.update_freq = 10
    self.curr_epoch = 0
    self.x = []

    self.train_loss = []
    self.train_acc = []
    self.val_loss = []
    self.val_acc = []

    self.fig = plt.figure()

    self.logs = []
    self.y_pred = []

    self.best_val_loss = 1000000
    self.best_acc = 0
    self.best_f1 = 0
    
    self.best_val_loss_epoch = None
    self.best_acc_epoch = None
    self.best_f1_epoch = None
    

  def on_epoch_end(self, epoch, logs={}):
    self.curr_epoch += 1
    self.x.append(self.curr_epoch)
    self.train_loss.append(logs.get('loss'))
    self.train_acc.append(logs.get('accuracy'))
    self.val_loss.append(logs.get('val_loss'))
    self.val_acc.append(logs.get('val_accuracy'))
    self.logs.append(logs)
    self.y_pred.append(self.model.predict(self.x_test))

    # check whether model has the best validation loss
    if logs.get('val_loss') < self.best_val_loss:
      self.best_val_loss = logs.get('val_loss')
      self.best_val_loss_epoch = self.curr_epoch
      self.model.save(os.path.join(self.save_path,'model_best_loss.hdf5'),overwrite=True)
        
    # check whether model has the best validation accuracy
    if logs.get('val_accuracy') > self.best_acc:
      self.best_acc = logs.get('val_accuracy')
      self.best_acc_epoch = self.curr_epoch
      self.model.save(os.path.join(self.save_path,'model_best_acc.hdf5'),overwrite=True)
    
    # check whether model has the best mean f1 score
    _, _, fscore, _ = metrics.precision_recall_fscore_support(np.argmax(self.y_test, axis=1),
                                                              np.argmax(self.model.predict(self.x_test),axis=1))
    if np.mean(fscore) > self.best_f1:
      self.best_f1 = np.mean(fscore)
      self.best_f1_epoch = self.curr_epoch
      self.model.save(os.path.join(self.save_path,'model_best_f1.hdf5'),overwrite=True)
        
    # visualization
    if (self.visualize and self.curr_epoch % self.update_freq == 0):
      clear_output(wait=True)

      plt.figure(figsize=(12,5))
      plt.subplot(121)
      plt.plot(self.x[5:], self.train_loss[5:], label="train_loss")
      plt.plot(self.x[5:], self.val_loss[5:], label="val_loss")
      plt.legend()

      plt.subplot(122)
      plt.plot(self.x, self.train_acc, label="train_acc")
      plt.plot(self.x, self.val_acc, label="val_acc")
      plt.legend()
      plt.show()

      print("fold = ",self.fold_i, "epoch =",self.curr_epoch)
      print("\ttrain_loss = ", self.train_loss[-1], ", val_loss = ", self.val_loss[-1])
      print("\t\ttrain_acc = ", self.train_acc[-1], ", val_acc = ", self.val_acc[-1])


In [None]:
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications import vgg16

from tensorflow.keras.applications import VGG16
# 모델 불러오기
conv_layers = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# 새로운 모델 생성하기
model = models.Sequential()

# VGG16모델의 Convolution Layer를 추가
model.add(conv_layers)
 
# 모델의 Fully Connected 부분을 재구성
model.add(layers.Flatten())
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(4, activation='softmax'))


In [None]:
### train and evaluate model
# one-hot encoding
label_final_onehot = keras.utils.to_categorical(label_final)

# kfold
kfold_divider = StratifiedKFold(n_splits = 10, shuffle=True)

# initialize
os.mkdir(os.path.join(result_path))

# main
for fold_i, (ind_train,ind_test) in enumerate(kfold_divider.split(data_final, label_final)):
  # set path to save
  result_path_current = f'{result_path}/{fold_i+1}'
  os.mkdir(os.path.join(file_path,result_path_current))

  # shuffle
  x_train, x_test = data_final[ind_train], data_final[ind_test]
  y_train, y_test = label_final_onehot[ind_train], label_final_onehot[ind_test]

  # data augmentation
  data_generator = ImageDataGenerator(rotation_range=90,  
                                      zoom_range=[0.5,1.0],
                                      horizontal_flip=True,
                                      vertical_flip=True,
                                      width_shift_range=0.2,
                                      height_shift_range=0.2,
                                      fill_mode='wrap')

  # class weighting
  label_int = np.argmax(label_final_onehot, axis=1)
  class_weights = class_weight.compute_class_weight('balanced',  np.unique(label_int), label_int)
  class_weights_dict = dict(enumerate(class_weights))


  # training initialization
  # network = Network(num_output_label=NUM_TARGET_LABELS)
  network = model
  
  history_callback = TrainingHistoryCallback(fold_i+1, x_test, y_test, result_path_current, visualize=False)
  #checkpoint_callback = keras.callbacks.ModelCheckpoint(os.path.join(result_path_current,'model.hdf5'), verbose=0, save_best_only=True)
  earlystop_callback = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=150)

  start_time = time.time()

  # training
  network.create()
  network.model.summary()

  batch_size=256
  epochs=1000
  training_history = network.model.fit(
      data_generator.flow(x_train, y_train, batch_size=batch_size),
      validation_data=(x_test,y_test),
      steps_per_epoch=x_train.shape[0] // batch_size,
      epochs=epochs, verbose=1,
      class_weight=class_weights_dict, 
      callbacks = [history_callback, earlystop_callback]    
  )
  #print("elapsed : {}".format(time.time() - start_time))
  
  # save result
  elapsed_time = np.array(time.time() - start_time)

  x_test
  x_other = data_other
  y_test = np.argmax(y_test, axis=1)
  ind_test = np.array(ind_test)

  epoch_list = np.array(history_callback.x)
  train_loss = np.array(history_callback.train_loss)
  train_acc = np.array(history_callback.train_acc)
  val_loss = np.array(history_callback.val_loss)
  val_acc = np.array(history_callback.val_acc)

  pred_history = np.array(history_callback.y_pred)


  best_val_loss_epoch = history_callback.best_val_loss_epoch
  best_acc_epoch = history_callback.best_acc_epoch
  best_f1_epoch = history_callback.best_f1_epoch

  np.savez(os.path.join(result_path_current,'mat.npz'),
          elapsed_time=elapsed_time,
          x_test=x_test, y_test=y_test, ind_test=ind_test,
          epoch_list=epoch_list,
          train_loss=train_loss, train_acc=train_acc,
          val_loss=val_loss, val_acc=val_acc,
          pred_history=pred_history,
          x_other=x_other,
          best_val_loss_epoch=best_val_loss_epoch, best_acc_epoch=best_acc_epoch, best_f1_epoch=best_f1_epoch
          )


 0 0 0 4 0 0 4 0 1 1 4 0 3 1 2 0 4 1 1 1 0 0 0 0 0 3 0 0 0 0 0 1 1 1 3 1 0
 0 2 2 0 0 0 2 0 0 1 0 0 4 3 2 3 2 1 0 4 1 3 3 2 2 0 0 1 1 4 0 0 0 0 2 0 0
 1 0 4 1 4 1 0 0 0 0 0 0 4 3 0 0 2 4 0 0 3 3 1 1 1 0 1 4 1 0 1 1 0 0 0 3 4
 0 0 1 2 0 0 4 0 0 1 2 1 0 4 1 0 2 0 0 0 2 3 3 3 2 0 4 1 1 0 0 0 0 2 0 2 0
 0 3 0 0 0 2 0 2 2 0 0 0 2 1 0 0 1 2 3 0 1 1 0 3 3 2 3 3 0 0 0 1 3 4 0 1 4
 0 1 2 0 2 0 0 4 3 0 0 0 0 0 0 2 3 0 0 4 0 0 2 0 0 0 0 1 4 2 2 1 0 1 1 2 1
 1 2 0 1 0 1 2 2 0 3 2 4 3 3 0 3 2 0 0 4 4 4 0 2 0 3 0 4 3 0 0 0 0 0 1 0 0
 0 0 0 0 0 3 3 0 0 4 1 1 4 1 3 0 4 3 0 0 1 2 2 0 0 0 2 0 4 3 3 2 0 0 2 0 1
 0 3 3 0 0 2 4 3 2 0 1 0 3 1 0 1 0 4 3 0 1 2 2 0 0 2 3 0 0 4 0 1 1 0 0 0 0
 2 2 0 0 1 0 0 1 2 2 0 2 2 0 2 1 0 1 1 1 1 1 0 1 1 0 1 1 1 0 0 1 0 1 0 4 1
 4 2 1 0 0 0 2 0 0 0 0 0 0 0 0 0 0 1 0 2 0 0 0 2 0] as keyword args. From version 0.25 passing these as positional arguments will result in an error


Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_20 (Conv2D)           (None, 160, 160, 64)      1792      
_________________________________________________________________
max_pooling2d_20 (MaxPooling (None, 80, 80, 64)        0         
_________________________________________________________________
dropout_25 (Dropout)         (None, 80, 80, 64)        0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 80, 80, 128)       73856     
_________________________________________________________________
max_pooling2d_21 (MaxPooling (None, 40, 40, 128)       0         
_________________________________________________________________
dropout_26 (Dropout)         (None, 40, 40, 128)       0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 40, 40, 256)      

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_24 (Conv2D)           (None, 160, 160, 64)      1792      
_________________________________________________________________
max_pooling2d_24 (MaxPooling (None, 80, 80, 64)        0         
_________________________________________________________________
dropout_30 (Dropout)         (None, 80, 80, 64)        0         
_________________________________________________________________
conv2d_25 (Conv2D)           (None, 80, 80, 128)       73856     
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 40, 40, 128)       0         
_________________________________________________________________
dropout_31 (Dropout)         (None, 40, 40, 128)       0         
____________________

KeyboardInterrupt: 

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

In [None]:
### evaluation
y_true = np.argmax(y_test, axis=1)
y_pred = np.argmax(history_callback.best_model.predict(x_test),axis=1)
accuracy = metrics.accuracy_score(y_true,y_pred)
precision, sensitivity, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred)
auc1 = metrics.roc_auc_score(y_true, history_callback.best_model.predict(x_test), multi_class='ovo')
auc2 = metrics.roc_auc_score(y_true, history_callback.best_model.predict(x_test), multi_class='ovr')
conf_mat = confusion_matrix(y_true,y_pred)

print(accuracy)
print(precision)
print(recall)
print(fscore)
print(support)
print(auc1)
print(auc2)
print(conf_mat)

0.2
[0.  0.2 0.  0. ]
[0.80952381 0.125      0.5        1.        ]
[0.         0.33333333 0.         0.        ]
[21  8  6  5]
0.5352347883597883
0.5300862795776206
[[ 0 21  0  0]
 [ 0  8  0  0]
 [ 0  6  0  0]
 [ 0  5  0  0]]


  _warn_prf(average, modifier, msg_start, len(result))
