In [1]:
from keras.models import *
from keras.layers import *
from keras.callbacks import *
from keras.optimizers import *
from keras.applications import *
from keras.preprocessing.image import *

from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

from glob import glob
from tqdm import tqdm
import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from sklearn.metrics import *

import multiprocessing
from multiprocessing.dummy import Pool
from pprint import pprint

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

Using TensorFlow backend.


In [2]:
import horovod.keras as hvd
from keras import backend as K

hvd.init()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.visible_device_list = str(hvd.local_rank())
K.set_session(tf.Session(config=config))

In [3]:
epochs = 100
batch_size = 1
lr = 1e-4
model_name = 'nasnet_0723'
preprocess_func = nasnet.preprocess_input
MODEL = NASNetLarge

f = 3.75
width = int(2560 / f)
height = int(1920 / f)
classes = ['正常', '吊经', '擦洞', '跳花', '毛洞', '织稀', '扎洞', '缺经', '毛斑', '其他']
n_classes = len(classes)

if hvd.rank() == 0:
    pprint('开始训练 %s 模型。\n' % model_name)
    print(width, height, n_classes)
    pprint(classes)

'开始训练 nasnet_0723 模型。\n'
682 512 10
['正常', '吊经', '擦洞', '跳花', '毛洞', '织稀', '扎洞', '缺经', '毛斑', '其他']


In [4]:
idg = ImageDataGenerator(horizontal_flip=True,
                         rotation_range=15,
                         zoom_range=0.2)
idg2 = ImageDataGenerator()

gen_train = idg.flow_from_directory('train', target_size=(height, width), 
                                    classes=classes, batch_size=batch_size)
gen_valid = idg2.flow_from_directory('valid', target_size=(height, width), shuffle=False, 
                                    classes=classes, batch_size=batch_size)
y_valid = (gen_valid.classes != 0).astype(int)

Found 1819 images belonging to 10 classes.
Found 203 images belonging to 10 classes.


In [5]:
class RocAucMetricCallback(Callback):
    def __init__(self):
        super(RocAucMetricCallback, self).__init__()

    def on_epoch_end(self, epoch, logs={}):
        y_pred = model.predict_generator(gen_valid, verbose=1, workers=multiprocessing.cpu_count())
        y_pred2 = 1 - y_pred[:,0]
        y_pred3 = 1 - (np.argmax(y_pred, axis=-1) == 0)
        
        auc = roc_auc_score(y_valid, y_pred2)
        acc = accuracy_score(y_valid, y_pred3)
        logs['val_auc'] = auc
        logs['val_acc'] = acc
        logs['lr'] = float(K.get_value(self.model.optimizer.lr))
        print(f'ACC: {acc:.4f} AUC: {auc:.4f}')

In [6]:
input_tensor = Input((height, width, 3))

x = Lambda(preprocess_func)(input_tensor)
base_model = MODEL(include_top=False, weights='imagenet', input_tensor=x)

x = GlobalAvgPool2D()(base_model.output)
x = Dropout(0.5)(x)
x = Dense(n_classes, activation='softmax')(x)
model = Model(input_tensor, x)

In [7]:
# SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))

In [8]:
def train(epochs, lr, warmup=False):
    opt = Adam(lr)
    opt = hvd.DistributedOptimizer(opt)

    callbacks = [
        hvd.callbacks.BroadcastGlobalVariablesCallback(0),
        hvd.callbacks.MetricAverageCallback(),
    ]
    
    if warmup:
        callbacks.append(hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1))
    
    if hvd.rank() == 0:
        callbacks.append(RocAucMetricCallback())
        callbacks.append(ModelCheckpoint('models/%s-{lr:.6f}-{epoch}-{val_auc:.6f}.h5' % model_name, 
                                         save_weights_only=True))
        callbacks.append(CSVLogger('logs/%s.csv' % model_name, append=True))
        callbacks.append(EarlyStopping(monitor='val_auc', patience=5, mode='max'))
    
    model.compile(optimizer=opt, 
              loss='categorical_crossentropy',
              metrics=['accuracy'])

    model.fit_generator(gen_train, steps_per_epoch=len(gen_train) // hvd.size() / 100, 
                        workers=multiprocessing.cpu_count(), 
                        epochs=epochs, callbacks=callbacks, verbose=int(hvd.rank() == 0))

In [9]:
train(100, lr, warmup=True)
train(100, lr*0.1)
train(100, lr*0.01)

Epoch 1/100
ACC: 0.5567 AUC: 0.4147
Epoch 2/100
ACC: 0.4975 AUC: 0.4570
Epoch 3/100
ACC: 0.5369 AUC: 0.4759
Epoch 4/100
ACC: 0.5714 AUC: 0.5126
Epoch 5/100

Epoch 5: finished gradual learning rate warmup to 0.0001.
ACC: 0.3941 AUC: 0.5135
Epoch 6/100
ACC: 0.6010 AUC: 0.5523
Epoch 7/100

KeyboardInterrupt: 

In [None]:
y_pred = model.predict_generator(gen_valid, verbose=1, workers=multiprocessing.cpu_count())
y_pred2 = 1 - y_pred[:,0]
y_pred3 = 1 - (np.argmax(y_pred, axis=-1) == 0)
pprint(roc_auc_score(y_valid, y_pred2))
pprint(accuracy_score(y_valid, y_pred3))
pprint(confusion_matrix(y_valid > 0.5, y_pred3))