In [1]:
# 自动加载已修改的python文件
%reload_ext autoreload
%autoreload 2

import os
import tensorflow as tf  
import numpy as np 
import utils
from sklearn.metrics import f1_score 
from sklearn.metrics import accuracy_score   
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score  
from sklearn.metrics import confusion_matrix

# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ['CUDA_VISIBLE_DEVICES'] = '2'       # 使用第二块GPU（从0开始）

pwd = '/notebooks/17_LJS/TbsData/TBS5_加入淡染/'  # 数据路径
labels = ['Junk','Negative','Positive']

Using TensorFlow backend.


In [2]:
"""
配置哪些GPU对Keras模型可见
参考至：https://blog.csdn.net/sinat_26917383/article/details/75633754
"""
import keras.backend.tensorflow_backend as ktf

# GPU 显存自动分配
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
#config.gpu_options.per_process_gpu_memory_fraction = 0.3
session = tf.Session(config=config)
ktf.set_session(session)

### 读取样本并生成标签

In [3]:
utils_ = utils.utils()

X_train = utils_.getSamplesDir(pwd+"Train_20W/",labels)  # 读取训练样本集
X_val = utils_.getSamplesDir(pwd+"Valid/",labels)  # 读取验证样本集

# 随机打乱样本
X_train = utils_.myShuffle(X_train)
X_val = utils_.myShuffle(X_val)

y_train = utils_.geneLabel(X_train,labels)
y_val = utils_.geneLabel(X_val,labels)

print("训练集大小："+str(len(y_train)))
print("验证集大小："+str(len(y_val)))

训练集大小：570131
验证集大小：39792


### 统计各类样本数量分布

In [4]:
_ = utils_.countSample(X_train,labels)
_ = utils_.countSample(X_val,labels)

类别Junk样本数为：	197332
类别Negative样本数为：	180027
类别Positive样本数为：	192772


类别Junk样本数为：	22743
类别Negative样本数为：	5001
类别Positive样本数为：	12048




### 模型评价函数

In [5]:
# 分析混淆矩阵
def analysisConfusionMatrix(ConfusionMatrix):
    result = ConfusionMatrix
    # 垃圾污染阳性
    two = float(sum(result[0][2:3]))/float(sum(result[0][:3])+1e-9)
    # 阴性污染阳性
    three = float(sum(result[1][2:3]))/float(sum(result[1][:3])+1e-9)
    # 阳性丢失
    one = float(sum(result[2][:2]))/float(sum(result[2][:3])+1e-9)
    print "阳性丢失："+str(round(one,5))+"  垃圾污染阳性："+str(round(two,5))+"  阴性污染阳性："+str(round(three,5))
    return one,two,three

### 定义F_beta_score

In [6]:
# 分析：对于“异常类别”，和查准率相比较，查全率更加重要（查全率高才能降低假阴性率），所以beta应该大于一。 
#       但是也不能一味地追求查全率，查准率也有一定的重要性，所以使用使用“异常类别”的f_beta_score系数作为模型评价指标。
def F_beta_score(confusionMatrix,beta):  # confusionMatrix二分类任务混淆矩阵,beta为异常类别f1score系数。
    tp = confusionMatrix[0][0]
    fn = confusionMatrix[0][1]
    fp = confusionMatrix[1][0]
    tn = confusionMatrix[1][1]
    
    # 计算“正常类别”的F_beta值
    # p_normal = tp/(tp+fp)
    # r_normal = tp/(tp+fn)
    # f_beta1 = (1+beta1*beta1)*p_normal*r_normal/((beta1*beta1*p_normal)+r_normal)
    
    # 计算“异常类别”的F_beta值
    p_ab = tn/(tn+fn+0.00000001)
    r_ab = tn/(tn+fp+0.00000001)
    f_beta2 = (1+beta*beta)*p_ab*r_ab/((beta*beta*p_ab)+r_ab+0.00000001)
    
    return f_beta2

# 合并三分类混淆矩阵中“正常类别”和“垃圾类别”，然后调用上述F_beta_score
def F_beta_score_MergeNormalAndJunk(confusionMatrix,beta):
    newConfusionMatrix = []
    tp = confusionMatrix[0][0]+confusionMatrix[0][1]+confusionMatrix[1][0]+confusionMatrix[1][1]
    fn = confusionMatrix[0][2]+confusionMatrix[1][2]
    fp = confusionMatrix[2][0]+confusionMatrix[2][1]
    tn = confusionMatrix[2][2]
    
    newConfusionMatrix_one = []
    newConfusionMatrix_one.append(tp)
    newConfusionMatrix_one.append(fn)
    
    newConfusionMatrix_two = []
    newConfusionMatrix_two.append(fp)
    newConfusionMatrix_two.append(tn)
    
    newConfusionMatrix.append(newConfusionMatrix_one)
    newConfusionMatrix.append(newConfusionMatrix_two)
    
    return F_beta_score(newConfusionMatrix,beta)

### 定义模型训练函数

#### 生成训练集子集，用于分析模型

In [7]:
X_train_evalua,y_train_evalua=utils_.randomCropSample(X_train,y_train,len(X_val))
_ = utils_.countSample(X_train_evalua,labels)

类别Junk样本数为：	13685
类别Negative样本数为：	12677
类别Positive样本数为：	13430




#### 定义模型网络结构

In [8]:
import Build
import tensorflow as tf  
from IPython.display import display_html
from keras import backend as K
import Config as cfg0

def trainFun(model,utils,dataenhance,reDir='info/info.txt'):
    utils.printRd("",reDir,"w+")  # 清空重定向文件 
    # tf.reset_default_graph()    # 清空运行图
    batch_size = cfg0.batch_size        

    # 表示训练多少个batch对应一epoch 
    epoch = int(len(X_train)/batch_size+1e-9)  
    # 设置训练轮数
    STEPS = int(epoch*cfg0.epochs)  
    print "the number of total batch:"+str(STEPS)

    with tf.Session(config = config) as sess:    
        sess.run(tf.global_variables_initializer())  # 初始化变量  
        
        # 判断是否加载与训练模型
        if cfg0.loadmodel_path!=None:
            model = load_model(cfg0.loadmodel_path+cfg0.loadmodel_name)

        val_accuracy_list = []  # 绘制验证集准确率变化曲线
        val_f1_list = []  # 绘制验证集f1_score变化曲线
        train_accuracy_list = []  # 绘制训练集准确率变化曲线
        cost_list_train = []
        cost_list_val = []

        val_f_beta_score_max = -1 # 记录val_f1的最大值
        val_f1_max = -1 # 记录val_f1的最大值
        for i in range(0,STEPS):   # 表示从索引为start开始训练样本（主要是为了均衡二次训练时样本被训练的机会）
            utils.printRd("............................第"+str(i)+"个batch............................",reDir)  
            # 首先进行学习率指数衰减计算
            # model = utils_.lr_exponential_decay(model,i,cfg0.lr_decay_step,cfg0.lr_decay_rate)
            # calculate learning rate
#             if i!=0 and i%decay_step==0:
#                 lr = K.get_value(model.optimizer.lr)  # 获取模型当前学习率
#                 K.set_value(model.optimizer.lr, lr*decay_rate)  # set new lr b
            
            
            if i%(int(cfg0.saveStepEpochRate*epoch))==0 and (i!=0 or cfg0.firstPred==True):  # 判断在进行训练之前是否进行预测。 
                # 记录“训练集准确率”、“验证集准确率”、“验证集F1”  
                train_predicts,_ = utils.predict_keras(model,X_train_evalua,batch_size,dataenhance,reDir)  
                val_predicts,_ = utils.predict_keras(model,X_val,batch_size,dataenhance,reDir)
                
                # 求真实标签
                train_evalua_realLabel = utils.onehot2realLabel(y_train_evalua)
                train_predicts_realLabel = utils.onehot2realLabel(train_predicts,utils.countSample(X_train,labels)) 
                
                val_realLabel = utils.onehot2realLabel(y_val)
                val_predicts_realLabel = utils.onehot2realLabel(val_predicts,utils.countSample(X_train,labels))

                # 计算“准确率”等 
                train_accuracy = accuracy_score(train_evalua_realLabel,train_predicts_realLabel, normalize=True)
                val_accuracy = accuracy_score(val_realLabel,val_predicts_realLabel, normalize=True)
                val_f1 = f1_score(val_realLabel,val_predicts_realLabel,average="macro") 
                train_confusion_matrix = confusion_matrix(train_evalua_realLabel,train_predicts_realLabel)
                val_confusion_matrix = confusion_matrix(val_realLabel,val_predicts_realLabel)
                
                # 修改模型评价指标为“异常类别”的f_beta_score
                val_f_beta_score = F_beta_score_MergeNormalAndJunk(val_confusion_matrix,beta=1)  # beta=2，表示“异常类别”查全率的重要性是查准率的4倍
                
                # 训练集交叉熵
                cross_entropy_train = sess.run(-tf.reduce_mean(y_train_evalua*tf.log(tf.clip_by_value(train_predicts,1e-10,1.0))))
                cross_entropy_val = sess.run(-tf.reduce_mean(y_val*tf.log(tf.clip_by_value(val_predicts,1e-10,1.0))))
                
                # 对各个参数只保留五位小数（后面的位四舍五入处理）
                train_accuracy = round(train_accuracy,5)
                val_accuracy = round(val_accuracy,5)
                cross_entropy_train = round(cross_entropy_train,5)
                cross_entropy_val = round(cross_entropy_val,5)
                val_f1 = round(val_f1,5) 
                val_f_beta_score = round(val_f_beta_score,5)

                utils.printRd("训练集混淆矩阵为：",reDir)
                utils.printRd(np.array(train_confusion_matrix),reDir)
                utils.printRd(analysisConfusionMatrix(train_confusion_matrix),reDir)
                utils.printRd("验证集混淆矩阵为：",reDir)
                utils.printRd(np.array(val_confusion_matrix),reDir)
                utils.printRd(analysisConfusionMatrix(val_confusion_matrix),reDir)

                utils.printRd("第"+str(i)+"次迭代："+"train_accuracy="+str(train_accuracy)+" val_accuracy="+str(val_accuracy)+" cost="+str(cross_entropy_train)+" val_f1:"+str(val_f1)+ " val_f_beta_score:"+str(val_f_beta_score),reDir) 
 
                train_accuracy_list.append(train_accuracy)
                val_accuracy_list.append(val_accuracy)
                val_f1_list.append(val_f1) 
                cost_list_train.append(cross_entropy_train)
                cost_list_val.append(cross_entropy_val)
                lists = [val_accuracy_list,val_f1_list,train_accuracy_list]
                names = ["val_accuracy","val_f1","train_accuracy"]
                utils.showPerformOrCostCurve(lists,names,os.path.dirname(reDir)+'/perform.jpg') # 绘制准确率/F1分数
                lists = [cost_list_train,cost_list_val]
                names = ["train_cost","val_cost"]
                utils.showPerformOrCostCurve(lists,names,os.path.dirname(reDir)+'/cost.jpg') # 绘制损失函数
                
                if val_f1>val_f1_max:
                    val_f1_max = val_f1
                    utils.printRd("val_f1最大值更新："+str(val_f1_max),reDir)

                # 修改模型保存条件为：每次epoch判断一次val_f1是否大于val_f1_max,假如满足条件就保存一次模型
                if val_f_beta_score>val_f_beta_score_max:  # 这样做也就相当于earlystoping。
                    # vgg.save_npy(sess,pwd+'jsL/keras_model/vggPara-save_train_l2正则化解决过拟合.npy')     
                    # 因为模型中使用了BN层，参数无法提取，所以替换使用Saver.save函数保存模型
                    # saver.save(sess, cfg0.saveModelPath)  # 保存模型  
                    model.save(cfg0.saveModelPath)
                    val_f_beta_score_max = val_f_beta_score
                    utils.printRd("model saved!",reDir)
                else:
                    utils.printRd("model not saved!",reDir)  

            # 每次选取batch_size个样本进行训练
            # 梯度下降训练模型参数
            start = (i*batch_size)%len(X_train)
            end = min(start+batch_size,len(X_train)) 
            xxx = dataenhance.getMiniBatch4Train(X_train[start:end])
            # sess.run(train, feed_dict={model.inputs: xxx, model.labels: y_train[start:end], model.is_training: True})  
            yyy = np.array(y_train[start:end])
            model.train_on_batch(xxx,yyy,class_weight=None, sample_weight=None)
        display_html("<script>Jupyter.notebook.kernel.restart()</script>",raw=True)
        sess.close() 

### 训练模型

In [9]:
x  = cfg0.lr
for i in range(24):  
    print "x:"+str(x)
    x = x*cfg0.lr_decay_rate

x:0.01
x:0.0094
x:0.008836
x:0.00830584
x:0.0078074896
x:0.007339040224
x:0.00689869781056
x:0.00648477594193
x:0.00609568938541
x:0.00572994802229
x:0.00538615114095
x:0.00506298207249
x:0.00475920314814
x:0.00447365095925
x:0.0042052319017
x:0.0039529179876
x:0.00371574290834
x:0.00349279833384
x:0.00328323043381
x:0.00308623660778
x:0.00290106241131
x:0.00272699866664
x:0.00256337874664
x:0.00240957602184


In [10]:
reDir = cfg0.train_reDir
dataenhance = utils.DataEnhance(image_h = 224,image_w = 224)
model = Build.MobileNet_V2().build()
trainFun(model,utils_,dataenhance,reDir)

the number of total batch:356320


AttributeError: 'Model' object has no attribute 'prob'