In [1]:
!pwd

/root/autodl-tmp/cifar-100


In [6]:
import os
import pickle
import numpy as np
from PIL import Image
from tqdm import tqdm

def unpickle(file):
    with open(file, 'rb') as fo:
        return pickle.load(fo, encoding='latin1')

def convert_cifar100_to_caltech_style(cifar100_root, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    
    # 加载meta数据，获取类别名
    meta = unpickle(os.path.join(cifar100_root, 'meta'))
    fine_label_names = meta['fine_label_names']  # 100个类别名

    # 转换训练集
    train_data = unpickle(os.path.join(cifar100_root, 'train'))
    save_cifar100_images(train_data, fine_label_names, output_dir, split='train')

    # 转换测试集
    test_data = unpickle(os.path.join(cifar100_root, 'test'))
    save_cifar100_images(test_data, fine_label_names, output_dir, split='test')

def save_cifar100_images(data_dict, label_names, output_dir, split):
    data = data_dict['data']  # [50000, 3072]
    labels = data_dict['fine_labels']
    filenames = data_dict['filenames']

    for i in tqdm(range(len(data)), desc=f"Processing {split}"):
        img = data[i].reshape(3, 32, 32).transpose(1, 2, 0)
        label = labels[i]
        class_name = label_names[label]

        class_dir = os.path.join(output_dir, class_name)
        os.makedirs(class_dir, exist_ok=True)

        fname = filenames[i]
        img_path = os.path.join(class_dir, fname)
        Image.fromarray(img).save(img_path)

# 示例调用
cifar100_root = "./cifar-100-python"  # 解压后的 CIFAR-100 路径
output_dir = "./cifar100"
convert_cifar100_to_caltech_style(cifar100_root, output_dir)


Processing train: 100%|██████████| 50000/50000 [00:17<00:00, 2886.32it/s]
Processing test: 100%|██████████| 10000/10000 [00:03<00:00, 3094.00it/s]


In [10]:
import os
import csv
from pathlib import Path

def listdir_nohidden(path):
    """列出目录下所有非隐藏文件和文件夹"""
    return [f for f in os.listdir(path) if not f.startswith('.')]

def generate_csv(image_dir, save_path, ignored_categories=None, new_cnames=None):
    """
    生成Caltech101数据集的标注文件。
    
    Args:
        image_dir (str): 数据集顶层目录路径。
        save_path (str): 保存生成的csv文件的路径。
        ignored_categories (list, optional): 要忽略的类别列表。默认为None。
        new_cnames (dict, optional): 类别名称映射字典。默认为None。
    """
    if ignored_categories is None:
        ignored_categories = []
    
    # 确保保存路径的目录存在
    save_dir = os.path.dirname(save_path)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    # 获取类别列表
    categories = listdir_nohidden(image_dir)
    categories = [c for c in categories if c not in ignored_categories]
    categories.sort()
    
    # 准备数据
    data = []
    for label, category in enumerate(categories):
        # 获取类别目录下的图像列表
        category_dir = os.path.join(image_dir, category)
        images = listdir_nohidden(category_dir)
        images = [os.path.join(category_dir, im) for im in images]
        
        # 更新类别名称（如果存在映射）
        if new_cnames is not None and category in new_cnames:
            category = new_cnames[category]
        
        # 添加到数据列表
        for image_path in images:
            data.append({
                'id': len(data),
                'image_path': image_path,
                'label': category
            })
    
    # 写入csv文件
    with open(save_path, mode='w') as file:
        writer = csv.DictWriter(file, fieldnames=['id', 'image_path', 'label'])
        writer.writeheader()
        writer.writerows(data)

# 示例用法
if __name__ == "__main__":
    # 数据集路径和保存路径
    image_dir = '/root/autodl-tmp/cifar-100/cifar100'  # Caltech101数据集路径
    save_path = '/root/autodl-tmp/cifar-100/cifar100.csv'  # 保存路径
    
    # 忽略的类别和类别名称映射（如果有）
    ignored_categories = []  # 根据实际需要调整
    new_cnames = None  # 如果需要映射类别名称，可以在这里定义
    
    # 生成csv文件
    generate_csv(image_dir, save_path, ignored_categories, new_cnames)
    print(f"标注文件已生成并保存到: {save_path}")

标注文件已生成并保存到: /root/autodl-tmp/cifar-100/cifar100.csv


In [4]:
!pwd

/root/autodl-tmp/cifar-10
