In [15]:
import cv2
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
from tensorflow.keras import datasets, layers, models
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

In [16]:
images = []
numbers = []

In [17]:
data_names_labels = pd.read_csv('images/labels.csv')
filenames = data_names_labels['filename']
labels = data_names_labels['number']

In [18]:
filepath = 'images/'
locpath = 'train/'
for filename in filenames:
    imagepath = filepath + filename
    blocktail = filename.replace('.jpg','')
    location_path = locpath + blocktail + '.xml'
    
    tree = ET.parse(location_path)
    root = tree.getroot()

    # 提取对象和边界框数据
    data = []
    for obj in root.findall('object'):
        name = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        data.append([name, xmin, ymin, xmax, ymax])
    
    # 转换为 DataFrame
    df = pd.DataFrame(data, columns=['name', 'xmin', 'ymin', 'xmax', 'ymax'])

    for num in range(len(df)):
        
        xmin = df['xmin'][num]
        xmax = df['xmax'][num]
        ymin = df['ymin'][num]
        ymax = df['ymax'][num]
    
        image = cv2.imread(imagepath, cv2.IMREAD_COLOR)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        # 中值滤波去噪
        gray = cv2.medianBlur(gray, 5)
        # 去照光
        adaptive_thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY, 11, 2)

        # 计算透视变换矩阵
        points = np.array([[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]], dtype='float32')
        width = xmax - xmin
        height = ymax - ymin
        dst_points = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]], dtype='float32')
        
        matrix = cv2.getPerspectiveTransform(points, dst_points)
        
        # 进行透视变换
        warped = cv2.warpPerspective(adaptive_thresh, matrix, (width, height))
        image = cv2.resize(warped,(20,32))

        # 归一化处理
        normalized_image = image / 255.0
        
        images.append(normalized_image)
        numbers.append(df['name'][num])

In [19]:
images = np.array(images)
numbers = np.array(numbers, dtype='int32')

In [20]:
images = np.expand_dims(images, axis=-1)
images.shape

(5039, 32, 20, 1)

In [24]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(20, 32, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10,activation='softmax'))

In [25]:
X_train,X_test,y_train,y_test = train_test_split(images,numbers,test_size=0.2,random_state=50)
num_classes = 10
y_train = to_categorical(y_train,num_classes=num_classes)
y_test = to_categorical(y_test,num_classes=num_classes)

In [26]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=100, 
                    validation_data=(X_test, y_test))

Epoch 1/100
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5244 - loss: 1.4286 - val_accuracy: 0.9236 - val_loss: 0.2566
Epoch 2/100
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9261 - loss: 0.2755 - val_accuracy: 0.9573 - val_loss: 0.1381
Epoch 3/100
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9540 - loss: 0.1672 - val_accuracy: 0.9643 - val_loss: 0.1025
Epoch 4/100
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9625 - loss: 0.1185 - val_accuracy: 0.9603 - val_loss: 0.1132
Epoch 5/100
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9779 - loss: 0.0787 - val_accuracy: 0.9663 - val_loss: 0.1012
Epoch 6/100
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9800 - loss: 0.0728 - val_accuracy: 0.9683 - val_loss: 0.0870
Epoch 7/100
[1m126/12

In [27]:
pred = model.predict(X_test)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [28]:
# 将概率分布转换为类别标签
predicted_classes = np.argmax(pred, axis=1)

# 如果需要将 one-hot 编码的真实标签转换回类别标签
true_classes = np.argmax(y_test, axis=1)

# 打印前几个预测结果和对应的真实标签
print("Predicted classes:", predicted_classes[:10])
print("True classes:", true_classes[:10])

Predicted classes: [8 1 2 0 8 0 7 7 7 6]
True classes: [8 1 2 0 8 0 7 7 7 6]
