# CNN迁移学习

## 环境导入

In [2]:
import os
## 导入 Inceptionv3 模型
from keras.applications.inception_v3 import InceptionV3, preprocess_input

## 导入建立神经网络的基本模块
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D

from keras.optimizers import *
from keras.losses import categorical_crossentropy

## 导入数据增强模块
import cv2
from keras_preprocessing.image import ImageDataGenerator

# 超参数调节
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp

# 可视化
# from keras.utils import plot_model
# from keras_visualizer import visualizer
# from IPython.display import Image, SVG, display
from keras.callbacks import TensorBoard
import datetime


2023-01-06 21:26:53.328618: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


## 参数区

In [1]:
DATASET_PATH_ROOT = '/data/DataSets/TWITTER_IMG_SENT_2015/dataset/'
OUT_PATH_ROOT = '/data/Models/TWITTER_SENT_2015/'
OUT_LOG_PATH = OUT_PATH_ROOT + 'logs/'

BATCH_SIZE = 512
TOTAL_EPOCH = 100

超参数

In [None]:
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([512, 1024]))
# HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.2))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd']))

METRIC_ACCURACY = 'accuracy'



## 数据准备

In [3]:

# 训练集
train_datagen = ImageDataGenerator(
        preprocessing_function=preprocess_input,
        # rescale=1. / 255,
        rotation_range=30,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
)

#验证集
val_datagen = ImageDataGenerator(
        preprocessing_function=preprocess_input,
        # rescale=1. / 255,
        rotation_range=30,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
)

# 测试集
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input, )

# 数据输入
train_generator = train_datagen.flow_from_directory(directory=f'{DATASET_PATH_ROOT}train', target_size=(299, 299), batch_size=BATCH_SIZE)
val_generator = val_datagen.flow_from_directory(directory=f'{DATASET_PATH_ROOT}validation', target_size=(299, 299), batch_size=BATCH_SIZE)
test_generator = test_datagen.flow_from_directory(directory=f'{DATASET_PATH_ROOT}test', target_size=(299, 299), batch_size=BATCH_SIZE)

Found 617 images belonging to 2 classes.
Found 176 images belonging to 2 classes.


## 迁移学习

In [5]:
# 输出日志
LOG_DIR = OUT_LOG_PATH + 'hparam_tuning/'
os.environ['TENSORBOARD_BINARY'] = '/usr/local/miniconda3/envs/TensorFlow/bin/tensorboard'

# @formatter:off
%load_ext tensorboard
%tensorboard --logdir {LOG_DIR} --port 6006 --bind_all
# @formatter:on

Launching TensorBoard...

In [None]:
def start_transfer_learning(run_dir, hparams):
    # 构建基础模型
    base_model = InceptionV3(weights='imagenet', include_top=False)  #去掉最后一层

    # 增加新的输出层
    x = base_model.output
    x = GlobalAveragePooling2D()(x)  # 添加全局平均池化层 将 MxNxC 的张量转换成 1xC 张量，C是通道数
    x = Dense(hparams[HP_NUM_UNITS], activation='relu')(x)  # 添加一个全连接层
    predictions = Dense(2, activation='softmax')(x)  # 自定义自己的分类器，这是一个2分类的分类器
    model = Model(inputs=base_model.input, outputs=predictions)  # 构建我们需要训练的完整模型

    # 锁层
    for layer in base_model.layers:
        layer.trainable = False

    # 编译模型
    model.compile(optimizer=hparams[HP_OPTIMIZER], loss='categorical_crossentropy', metrics=['accuracy'])  # rmsprop
    model.fit_generator(generator=train_generator,
                        steps_per_epoch=(617 // BATCH_SIZE),  #800
                        epochs=TOTAL_EPOCH,  #2
                        validation_data=val_generator,
                        validation_steps=(176 // BATCH_SIZE),  #12
                        class_weight=None,  #不要这个,
                        callbacks=[TensorBoard(log_dir=LOG_DIR, profile_batch=5, histogram_freq=1), hp.KerasCallback(run_dir, hparams)]
                        )
    # 在测试集上评估
    scores = model.evaluate_generator(test_generator)
    return scores


In [None]:
# 超参数运行
def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        hp.hparams_config(
                hparams=[HP_NUM_UNITS, HP_OPTIMIZER],
                metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
        )
        start_transfer_learning(hparams)

# # 拟合模型
# history_tl = model.fit_generator(generator=train_generator,
#                                  steps_per_epoch=(617 // BATCH_SIZE),  #800
#                                  epochs=TOTAL_EPOCH,  #2
#                                  validation_data=val_generator,
#                                  validation_steps=(176 // BATCH_SIZE),  #12
#                                  class_weight=None,  #不要这个,
#                                  callbacks=[TensorBoard(log_dir=log_dir, profile_batch=5, histogram_freq=1)]
#                                  )
# # 保存
# model.save(f'{OUT_PATH_ROOT}Twitter2015_iv3_tl.h5')