In [54]:
import skimage
from skimage import io, segmentation
from collections import defaultdict
import numpy as np
import tensorflow as tf
import cv2

使用超像素分割将图像分成若干个被编号的区域。

构建一个数据结构来记录每个编号区域的像素点的坐标、最大最小横纵坐标等信息。

根据每个区域的最大最小横纵坐标，在训练集图片上获取对应的矩形区域。

对矩形区域进行下采样，将其拉成一个向量。

将这个向量喂给神经网络（即分类器）。

神经网络根据这个向量和本身的标签，进行反向传播调整前面几层的权重和偏置。

In [55]:
import cv2

# 读取图像
image = cv2.imread('./test.jpg')

# 进行超像素分割
segmented_image = cv2.ximgproc.createSuperpixelSEEDS(image.shape[0], image.shape[1], image.shape[2], 400, 5, 2)
segmented_image.iterate(image)

# 获取编号区域信息
num_regions = segmented_image.getNumberOfSuperpixels()
labels = segmented_image.getLabels()

# 构建区域信息数据结构
region_info = {}
for region_id in range(num_regions):
    # 获取区域的像素点坐标
    points = segmented_image.getLabelContourMask(region_id)
    # 计算区域的最大最小横纵坐标
    min_x = points[:,:,0].min()
    max_x = points[:,:,0].max()
    min_y = points[:,:,1].min()
    max_y = points[:,:,1].max()
    # 将区域信息添加到数据结构中
    region_info[region_id] = {
        'points': points,
        'min_x': min_x,
        'max_x': max_x,
        'min_y': min_y,
        'max_y': max_y,
    }

AttributeError: module 'cv2' has no attribute 'ximgproc'

In [21]:
# 读入训练集图片
train_image = skimage.io.imread('./train.jpg')

# 遍历每个区域
for region_id, region in region_info.items():
    # 获取该区域的最大最小横纵坐标
    min_x, max_x, min_y, max_y = region['min_x'], region['max_x'], region['min_y'], region['max_y']
    # 在训练集图片上获取该区域对应的矩形区域
    region_image = train_image[min_y:max_y+1, min_x:max_x+1, :]

print(region_image)

[[[ 13  24  20]
  [ 15  25  24]
  [ 18  30  30]
  [ 29  45  45]
  [ 45  65  72]]

 [[249 255 253]
  [249 255 255]
  [248 255 255]
  [249 255 255]
  [248 255 255]]

 [[252 255 255]
  [251 255 255]
  [249 255 255]
  [249 255 255]
  [253 255 254]]]


In [39]:
def down_sample(image):
    # 将图像缩放到 32x32 的尺寸
    image = cv2.resize(image, (32, 32))
    # 将图像转换为向量
    vector = image.flatten()
    return vector
# 遍历每个区域
for region_id, region in region_info.items():
    # 获取区域的最大最小横纵坐标
    min_row, min_col, max_row, max_col = region['min_x'], region['min_y'], region['max_x'], region['max_y']
    # 在图像中截取矩形区域
    region_image = image[min_row:max_row, min_col:max_col]
    # 进行下采样
    vector = down_sample(region_image)
    # 更新区域信息
    region_info[region_id]['vector'] = vector



137


In [29]:
# 定义网络
class Classifier(tf.keras.Model):
    def __init__(self, num_classes):
        super(Classifier, self).__init__()
        self.conv1 = tf.keras.layers.Conv2D(16, 3, activation='relu', padding='same')
        self.conv2 = tf.keras.layers.Conv2D(32, 3, activation='relu', padding='same')
        self.conv3 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')
        self.flatten = tf.keras.layers.Flatten()
        self.fc1 = tf.keras.layers.Dense(128, activation='relu')
        self.fc2 = tf.keras.layers.Dense(num_classes)

    def call(self, inputs, training=None, mask=None):
        x = self.conv1(inputs)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

# 初始化分类器
classifier = Classifier(num_classes=10)

# 定义损失函数和优化器
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

# 遍历每个区域
for region_id, region in region_info.items():
    # 获取该区域的向量
    vector = region['vector']
    # 将向量转换为 TensorFlow 张量
    vector = tf.convert_to_tensor(vector, dtype=tf.float32)
    # 将向量的维度扩展到 4 维，以便输入到网络中
    vector = tf.expand_dims(vector, axis=0)
    vector = tf.expand_dims(vector, axis=-1)
    # 获取该区域的标签
    label = region['label']
    # 将标签转换为 TensorFlow 张量
    label = tf.convert_to_tensor(label, dtype=tf.int64)

    # 将向量喂给分类器进行分类
    with tf.GradientTape() as tape:
        logits = classifier(vector)
        loss_value = loss_fn(label, logits)
    # 计算梯度
    grads = tape.gradient(loss_value, classifier.trainable_variables)
    # 使用优化器更新模型参数
    optimizer.apply_gradients(zip(grads, classifier.trainable_variables))


KeyError: 'label'