Load mat file from dataset

In [None]:
import h5py

def load_mat_file(filename):
    with h5py.File(filename, 'r') as f:
        digit_struct = f['digitStruct']
        name_refs = digit_struct['name']
        bbox_refs = digit_struct['bbox']

        def get_name(index):
            name_ref = f[name_refs[index][0]][()]
            return ''.join(chr(c[0]) for c in name_ref)

        def get_bbox(index):
            bbox = {}
            box = f[bbox_refs[index][0]]
            for key in box.keys():
                attr = box[key]
                values = []
                if len(attr) > 1:
                    for i in range(len(attr)):
                        values.append(int(f[attr[i][0]][()][0]))
                else:
                    values.append(int(attr[0][0]))
                bbox[key] = values
            return bbox

        data = []
        for i in range(len(name_refs)):
            name = get_name(i)
            bbox = get_bbox(i)
            data.append({'name': name, 'bbox': bbox})
        return data

image and label preprocess

In [None]:
import cv2
import tensorflow as tf
import os
import numpy as np
import matplotlib.pyplot as plt

def image_preprocess(img, bbox):
    img_w,img_h = img.shape[1],img.shape[0]
    lx1 = [bbox['left'][i] for i in range(len(bbox['label']))]
    ly1 = [bbox['top'][i] for i in range(len(bbox['label']))]
    lx2 = [bbox['left'][i]+bbox['width'][i] for i in range(len(bbox['label']))]
    ly2 = [bbox['top'][i]+bbox['height'][i] for i in range(len(bbox['label']))]
    x1,y1,x2,y2 = min(lx1), min(ly1), max(lx2), max(ly2)
    x1,y1,x2,y2 = max(x1-0.2*(x2-x1),0), max(y1-0.2*(y2-y1),0), min(x1+1.2*(x2-x1),img_w-1), min(y1+1.2*(y2-y1),img_h-1)
    x1,y1,x2,y2 = int(x1),int(y1),int(x2),int(y2)
    img = img[y1:y2, x1:x2]
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # 如果需要转换为灰度图像
    img = cv2.resize(img, (32, 32))  # 根据需要调整尺寸
    img = img.astype('float32') / 255.0 # 归一化
    return img

def label_preprocess(bbox):
    label_raw = bbox['label']
    if len(label_raw) != 2:
        return None
    label = [num % 10 for num in label_raw]
    onehot_label = np.zeros((2, 10), dtype=int)
    for i in range(2):
        onehot_label[i][label[i]] = 1
    return onehot_label



Load dataset

In [None]:
def load_dataset(data, image_dir):
    images = []
    labels = []

    for item in data:
        bbox = item['bbox']
        onehot_label = label_preprocess(bbox)
        if onehot_label is None:
            continue
        img_path = os.path.join(image_dir, item['name'])
        img = cv2.imread(img_path)
        img = image_preprocess(img, bbox)
        
        labels.append(onehot_label)
        images.append(img)

    images = np.array(images)
    images = np.expand_dims(images, axis=-1)
    labels = np.array(labels)
    
    return images, labels

perform and save processed dataset to h5 files

In [None]:
digit_struct_data_train = load_mat_file('data/train/digitStruct.mat')
train_images, train_labels = load_dataset(digit_struct_data_train, 'data/train')
with h5py.File('svhn_dataset/svhn_2digits_train.h5', 'w') as f:
    f.create_dataset('images', data=np.array(train_images))
    f.create_dataset('labels', data=np.array(train_labels))

digit_struct_data_test = load_mat_file('data/test/digitStruct.mat')
test_images, test_labels = load_dataset(digit_struct_data_test, 'data/test')
with h5py.File('svhn_dataset/svhn_2digits_test.h5', 'w') as f:
    f.create_dataset('images', data=np.array(test_images))
    f.create_dataset('labels', data=np.array(test_labels))