In [None]:
from keras.models import *
from keras.layers import *
from keras.callbacks import *
from keras.optimizers import *
from keras.applications import *
from keras.regularizers import *
from keras.preprocessing.image import *
from keras.utils.multi_gpu_utils import multi_gpu_model

from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot, plot_model

import time
from glob import glob
from tqdm import tqdm
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from sklearn.metrics import *

import multiprocessing
from multiprocessing.dummy import Pool
from pprint import pprint

In [None]:
import horovod.keras as hvd
from keras import backend as K

hvd.init()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.visible_device_list = str(hvd.local_rank())
K.set_session(tf.Session(config=config))

In [None]:
import argparse

def str2bool(v):
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

parser = argparse.ArgumentParser(description='training.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)

In [None]:
epochs = [50, 20]
batch_size = 2
model_name = 'irv2'
preprocess_func = inception_resnet_v2.preprocess_input
MODEL = InceptionResNetV2

f = 2
width = int(2560 / f)
height = int(1920 / f)
classes = ['正常', '吊经', '擦洞', '跳花', '毛洞', '织稀', '扎洞', '缺经', '毛斑', '其他']
n_classes = len(classes)

In [None]:
parser.add_argument('--batch_size', default=2, type=int, help='batch_size')
# parser.add_argument('--model_name', type=str, help='保存模型的文件名，不带.h5')
parser.add_argument('--preprocess_func', default='inception_resnet_v2.preprocess_input', 
                    type=str, help='预处理函数')
parser.add_argument('--MODEL', default='InceptionResNetV2', 
                    type=str, help='预训练模型')

parser.add_argument('--f', default=2, type=float, help='缩放系数')
parser.add_argument('--lr', default=1e-4, type=float, help='学习率')
parser.add_argument('--epochs', default=[50, 30, 20], nargs='*', type=int, 
                    help='每一次的训练代数（用空格分隔），学习率按lr的0.1倍递减')
parser.add_argument('--optimizer', default='Adam(lr)', type=str, help='优化器')
parser.add_argument('--l2', default=1e-5, type=float, help='L2正则化')
parser.add_argument('--balance_class_weight', default=False, type=str2bool, help='类别权重按比例给定')

args = parser.parse_args()
# if args.model_name == None:
#     parser.print_help()
#     exit()

epochs = args.epochs
batch_size = args.batch_size
# model_name = args.model_name
preprocess_func = eval(args.preprocess_func)
MODEL = eval(args.MODEL)

lr = args.lr
f = args.f
l2_rate = args.l2
width = int(2560 / f)
height = int(1920 / f)
classes = ['正常', '吊经', '擦洞', '跳花', '毛洞', '织稀', '扎洞', '缺经', '毛斑', '其他']
n_classes = len(classes)
now = time.strftime('%m%d')
model_name = f'{MODEL.__name__}_{now}_f{f}_reg{l2_rate}'

if hvd.rank() == 0:
    print('-'*80)
    print('开始训练 %s 模型。' % model_name)
    print(args)
    print(width, height, n_classes)
    print(classes)
    print('-'*80)

In [None]:
idg = ImageDataGenerator(horizontal_flip=True,
                         rotation_range=15,
                         zoom_range=0.2)
idg2 = ImageDataGenerator()

gen_train = idg.flow_from_directory('train', target_size=(height, width), 
                                    interpolation='lanczos', classes=classes, batch_size=batch_size)
gen_valid = idg2.flow_from_directory('valid', target_size=(height, width), shuffle=False, 
                                     interpolation='lanczos', classes=classes, batch_size=batch_size)
y_valid = (gen_valid.classes != 0).astype(int)
y_train = gen_train.classes
class_weights = None
if args.balance_class_weight == True:
    class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)

In [None]:
class RocAucMetricCallback(Callback):
    def __init__(self):
        super(RocAucMetricCallback, self).__init__()

    def on_epoch_end(self, epoch, logs={}):
        y_pred = model.predict_generator(gen_valid, verbose=1, workers=multiprocessing.cpu_count())
        y_pred2 = 1 - y_pred[:,0]
        y_pred3 = 1 - (np.argmax(y_pred, axis=-1) == 0)
        
        auc = roc_auc_score(y_valid, y_pred2)
        acc = accuracy_score(y_valid, y_pred3)
        logs['val_auc'] = auc
        logs['val_acc'] = acc
        logs['lr'] = float(K.get_value(self.model.optimizer.lr))
        print(f'ACC: {acc:.4f} AUC: {auc:.4f}')

In [None]:
base_model = MODEL(input_shape=(height, width, 3), include_top=False, weights='imagenet')

In [None]:
input_tensor = Input((height, width, 3))
x = Lambda(preprocess_func)(input_tensor)
x = base_model(x)
x = GlobalAvgPool2D()(x)
x = Dropout(0.5)(x)
x = Dense(n_classes, activation='softmax')(x)
model = Model(input_tensor, x)

In [None]:
for layer in base_model.layers + model.layers:
    if hasattr(layer, 'kernel_regularizer'):
        print(layer)
        layer.kernel_regularizer= l2(l2_rate)

In [None]:
plot_model(model, show_shapes=True, to_file=f'logs/plot_{model_name}.png')
# SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))

In [None]:
def train(start, end, lr, warmup=False):
    opt = eval(args.optimizer)
    opt = hvd.DistributedOptimizer(opt)

    callbacks = [
        hvd.callbacks.BroadcastGlobalVariablesCallback(0),
        hvd.callbacks.MetricAverageCallback(),
    ]
    
    if warmup:
        callbacks.append(hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1))
    
    if hvd.rank() == 0:
        callbacks.append(RocAucMetricCallback())
        callbacks.append(ModelCheckpoint('models/%s-{lr:.6f}-{epoch}-{val_auc:.6f}.h5' % model_name, 
                                         save_weights_only=True))
        callbacks.append(CSVLogger('logs/%s.csv' % model_name, append=True))
#         callbacks.append(EarlyStopping(monitor='val_auc', patience=5, mode='max'))
    
    model.compile(optimizer=opt, 
              loss='categorical_crossentropy',
              metrics=['accuracy'])

    model.fit_generator(gen_train, steps_per_epoch=len(gen_train) // hvd.size(), 
                        workers=multiprocessing.cpu_count(), class_weight=class_weights, 
                        initial_epoch=start, epochs=end, callbacks=callbacks, verbose=int(hvd.rank() == 0))

In [None]:
start = 0
for i, epochs in enumerate(args.epochs):
    train(start, start + epochs, lr*0.1**i, warmup=i == 0)
    start += epochs

In [None]:
y_pred = model.predict_generator(gen_valid, verbose=1, workers=multiprocessing.cpu_count())
y_pred2 = 1 - y_pred[:,0]
y_pred3 = 1 - (np.argmax(y_pred, axis=-1) == 0)
pprint(roc_auc_score(y_valid, y_pred2))
pprint(accuracy_score(y_valid, y_pred3))
pprint(confusion_matrix(y_valid > 0.5, y_pred3))