In [24]:
import torch
import torchvision
import numpy as np
import cv2
import os
import json

In [25]:
cifar10 = torchvision.datasets.CIFAR10(
    root='datasets',
    train=True,
    download=False
)
cifar10_test = torchvision.datasets.CIFAR10(
    root='datasets',
    train=False,
    download=False
)
#输出数据集的信息
print(cifar10)
print(cifar10_test)

train_filenames = []
train_annotations = []
test_filenames = []
test_annotations= []

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: datasets
    Split: Train
Dataset CIFAR10
    Number of datapoints: 10000
    Root location: datasets
    Split: Test


In [26]:
#cifar10 官方给出的解压函数
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

anno_loc = './datasets/annotations/'
loc_1 = './datasets/train_cifar10/'
loc_2 = './datasets/test_cifar10/'

#判断文件夹是否存在，不存在的话创建文件夹
if os.path.exists(loc_1) == False:
    os.mkdir(loc_1)
if os.path.exists(loc_2) == False:
    os.mkdir(loc_2)
if os.path.exists(anno_loc) == False:
    os.mkdir(anno_loc)

In [27]:
def cifar10_img(file_dir):
    for i in range(1,6):
        data_name = file_dir + '/'+'data_batch_'+ str(i)
        data_dict = unpickle(data_name)
        print(data_name + ' is processing')

        for j in range(10000):
            img = np.reshape(data_dict[b'data'][j],(3,32,32))
            img = np.transpose(img,(1,2,0))
            #通道顺序为RGB
            img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
            #要改成不同的形式的文件只需要将文件后缀修改即可
            img_name = loc_1 + str((i)*10000 + j + 1) + '.png'
            annot_img_name = str((i)*10000 + j + 1) + '.png'
            img_annotations = data_dict[b'labels'][j]
            train_filenames.append(annot_img_name)
            train_annotations.append(img_annotations)

            cv2.imwrite(img_name,img)

        print(data_name + ' is done')


    test_data_name = file_dir + '/test_batch'
    print(test_data_name + ' is processing')
    test_dict = unpickle(test_data_name)

    for m in range(10000):
        img = np.reshape(test_dict[b'data'][m], (3, 32, 32))
        img = np.transpose(img, (1, 2, 0))
        # 通道顺序为RGB
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # 要改成不同的形式的文件只需要将文件后缀修改即可
        img_name = loc_2 + str(m+1) + '.png'
        annot_img_name = str(m+1) + '.png'
        img_annotations = test_dict[b'labels'][m]
        test_filenames.append(annot_img_name)
        test_annotations.append(img_annotations)
        cv2.imwrite(img_name, img)
    print(test_data_name + ' is done')
    print('Finish transforming to image')

In [28]:
if __name__ == '__main__':

    file_dir = './datasets/cifar-10-batches-py'
    cifar10_img(file_dir)
    # cifar10_annotations(file_dir)

    train_annot_dict = {
        'images': train_filenames,
        'categories': train_annotations
    }
    test_annot_dict = {
        'images':test_filenames,
        'categories':test_annotations
    }
    # print(annotation)

    train_json = json.dumps(train_annot_dict)
    train_file = open('./datasets/annotations/cifar10_train.json', 'w')
    train_file.write(train_json)
    train_file.close()

    test_json =json.dumps(test_annot_dict)
    test_file = open('./datasets/annotations/cifar10_test.json','w')
    test_file.write(test_json)
    test_file.close()
    print('annotations have writen to json file')

./datasets/cifar-10-batches-py/data_batch_1 is processing
./datasets/cifar-10-batches-py/data_batch_1 is done
./datasets/cifar-10-batches-py/data_batch_2 is processing
./datasets/cifar-10-batches-py/data_batch_2 is done
./datasets/cifar-10-batches-py/data_batch_3 is processing
./datasets/cifar-10-batches-py/data_batch_3 is done
./datasets/cifar-10-batches-py/data_batch_4 is processing
./datasets/cifar-10-batches-py/data_batch_4 is done
./datasets/cifar-10-batches-py/data_batch_5 is processing
./datasets/cifar-10-batches-py/data_batch_5 is done
./datasets/cifar-10-batches-py/test_batch is processing
./datasets/cifar-10-batches-py/test_batch is done
Finish transforming to image
annotations have writen to json file


In [13]:
import json
import csv

#将json文件转换为csv文件

def map_category_to_label(category):
    #官方给出的类别时0-9，将它们转换为对应的类别
    label_mapping = {
        0: 'airplane',
        1: 'automobile',
        2: 'bird',
        3: 'cat',
        4: 'deer',
        5: 'dog',
        6: 'frog',
        7: 'horse',
        8: 'ship',
        9: 'truck'
    }
    return label_mapping.get(category, category)

def json_to_csv(json_file, csv_file):
    with open(json_file, 'r', encoding='utf-8') as json_data:
        data = json.load(json_data)

    with open(csv_file, 'w', newline='', encoding='utf-8') as csv_data:
        csv_writer = csv.writer(csv_data)

        # 写入CSV的表头
        csv_writer.writerow(['Image', 'Category'])

        # 写入数据
        for image, category in zip(data['images'], data['categories']):
            # 根据映射表替换category
            image_name = os.path.splitext(image)[0]
            category_label = map_category_to_label(category)
            csv_writer.writerow([image_name, category_label])


test_json_file_path = './datasets/annotations/cifar10_test.json'
test_csv_file_path = './datasets/annotations/testLables.csv'
train_json_file_path = './datasets/annotations/cifar10_train.json'
train_csv_file_path = './datasets/annotations/trainLables.csv'
json_to_csv(test_json_file_path, test_csv_file_path)
json_to_csv(train_json_file_path, train_csv_file_path)