In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import glob


In [3]:
# 获取文件路径
imgs_path = glob.glob('../dataset/birds/*/*.jpg')
imgs_path[:3]

['../dataset/birds/005.Crested_Auklet/Crested_Auklet_0001_794941.jpg',
 '../dataset/birds/005.Crested_Auklet/Crested_Auklet_0029_1824.jpg',
 '../dataset/birds/005.Crested_Auklet/Crested_Auklet_0074_794949.jpg']

In [11]:
# 拿到每张图片的文件夹路径中的类别名称
all_labels_name = [img_p.split('/')[3].split('.')[1] for img_p in imgs_path]

In [13]:
# 对所有的类别名称去重
label_names = np.unique(all_labels_name)

In [14]:
len(label_names)

200

In [15]:
# 将每个类别生成序号
label_to_index = dict((name, i) for i, name in enumerate(label_names))

In [16]:
# 用类别和序号生成字典
index_to_label = dict((v, k) for k, v in label_to_index.items())

In [17]:
# 将所有的图片都映射到类别
all_labels = [label_to_index.get(name) for name in all_labels_name]

In [18]:
# 保证乱序的结果一致
np.random.seed(2021)
# 乱序
random_index = np.random.permutation(len(imgs_path))

In [19]:
imgs_path = np.array(imgs_path)[random_index]
all_labels = np.array(all_labels)[random_index]

In [20]:
i = int(len(imgs_path)*0.8)

In [21]:
# 切分数据
train_path = imgs_path[:i]
train_label = all_labels[:i]
test_path = imgs_path[i:]
test_label = all_labels[i:]

In [22]:
# 转换成数据集
train_ds = tf.data.Dataset.from_tensor_slices((train_path, train_label))
test_ds = tf.data.Dataset.from_tensor_slices((test_path, test_label))

In [24]:
# 读取图片函数
def load_images(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [256, 256])
    image = tf.cast(image, tf.float32)
    image = image / 255
    return image, label

In [25]:
# 获取多线程的并行线程数
AUTOTUNE = tf.data.experimental.AUTOTUNE
# 并行读取图片
train_ds = train_ds.map(load_images, num_parallel_calls=AUTOTUNE)
test_ds = test_ds.map(load_images, num_parallel_calls=AUTOTUNE)

In [26]:
BATCH_SIZE = 32

In [27]:
train_ds = train_ds.repeat().shuffle(300).batch(BATCH_SIZE)
test_ds = test_ds.batch(BATCH_SIZE)

In [28]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(64, (3,3), input_shape=(256,256,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(256, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(256, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(512, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(512, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(512, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(512, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(512, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(1024, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(200)  # 不激活的输出
])

In [29]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 254, 254, 64)      1792      
_________________________________________________________________
batch_normalization (BatchNo (None, 254, 254, 64)      256       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 252, 252, 64)      36928     
_________________________________________________________________
batch_normalization_1 (Batch (None, 252, 252, 64)      256       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 126, 126, 64)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 124, 124, 128)     73856     
_________________________________________________________________
batch_normalization_2 (Batch (None, 124, 124, 128)     5

In [30]:
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['acc']
)

In [31]:
train_count = len(train_path)
test_count = len(test_path)
steps_per_epoch = train_count / BATCH_SIZE
validation_steps = test_count / BATCH_SIZE

In [33]:
history = model.fit(train_ds, epochs=10,
                    steps_per_epoch = steps_per_epoch,
                    validation_data = test_ds,
                    validation_steps = validation_steps)

Epoch 1/10
  5/294 [..............................] - ETA: 1:42:51 - loss: 5.9862 - acc: 0.0049    

KeyboardInterrupt: 

In [None]:
history.history.keys()

In [None]:
plt.plot(history.epoch, history.get('acc'), label='acc')
plt.plot(history.epoch, history.get('val_acc'), label='val_acc')
plt.legend()

In [None]:
plt.plot(history.epoch, history.get('loss'), label='loss')
plt.plot(history.epoch, history.get('val_loss'), label='val_loss')
plt.legend()

使用模型进行预测

In [None]:
def load_images(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [256, 256])
    image = tf.cast(image, tf.float32)
    image = image / 255
    return image

In [None]:
test_image = ''
test_tensor = load_preprocess_image(test_image)
test_tensor = tf.expand_dims(test_tensor, axis=0)
pred = model.predict(test_tensor)

In [None]:
# 返回值是张量，需要看哪个值最大
index_to_label.get(np.argmax(pred))