In [1]:
!pwd

/root/autodl-tmp/cifar-100


In [None]:
import os
import pickle
import numpy as np
from PIL import Image
from tqdm import tqdm

def unpickle(file):
    with open(file, 'rb') as fo:
        return pickle.load(fo, encoding='latin1')

def convert_cifar100_to_caltech_style(cifar100_root, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    
    # Load meta data to get class names
    meta = unpickle(os.path.join(cifar100_root, 'meta'))
    fine_label_names = meta['fine_label_names']  # 100 class names

    # Convert training set
    train_data = unpickle(os.path.join(cifar100_root, 'train'))
    save_cifar100_images(train_data, fine_label_names, output_dir, split='train')

    # Convert test set
    test_data = unpickle(os.path.join(cifar100_root, 'test'))
    save_cifar100_images(test_data, fine_label_names, output_dir, split='test')

def save_cifar100_images(data_dict, label_names, output_dir, split):
    data = data_dict['data']  # [50000, 3072]
    labels = data_dict['fine_labels']
    filenames = data_dict['filenames']

    for i in tqdm(range(len(data)), desc=f"Processing {split}"):
        img = data[i].reshape(3, 32, 32).transpose(1, 2, 0)
        label = labels[i]
        class_name = label_names[label]

        class_dir = os.path.join(output_dir, class_name)
        os.makedirs(class_dir, exist_ok=True)

        fname = filenames[i]
        img_path = os.path.join(class_dir, fname)
        Image.fromarray(img).save(img_path)

# Example usage
cifar100_root = "./cifar-100-python"  # Path to the extracted CIFAR-100 dataset
output_dir = "./cifar100"
convert_cifar100_to_caltech_style(cifar100_root, output_dir)


Processing train: 100%|██████████| 50000/50000 [00:17<00:00, 2886.32it/s]
Processing test: 100%|██████████| 10000/10000 [00:03<00:00, 3094.00it/s]


In [None]:
import os
import csv
from pathlib import Path

def listdir_nohidden(path):
    """List all non-hidden files and folders in the directory."""
    return [f for f in os.listdir(path) if not f.startswith('.')]

def generate_csv(image_dir, save_path, ignored_categories=None, new_cnames=None):
    """
    Generate annotation file for the Caltech101 dataset.
    
    Args:
        image_dir (str): Path to the top-level directory of the dataset.
        save_path (str): Path to save the generated CSV file.
        ignored_categories (list, optional): List of categories to ignore. Defaults to None.
        new_cnames (dict, optional): Dictionary for category name mapping. Defaults to None.
    """
    if ignored_categories is None:
        ignored_categories = []
    
    # Ensure the save directory exists
    save_dir = os.path.dirname(save_path)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    # Get the list of categories
    categories = listdir_nohidden(image_dir)
    categories = [c for c in categories if c not in ignored_categories]
    categories.sort()
    
    # Prepare data
    data = []
    for label, category in enumerate(categories):
        # Get the list of images in the category directory
        category_dir = os.path.join(image_dir, category)
        images = listdir_nohidden(category_dir)
        images = [os.path.join(category_dir, im) for im in images]
        
        # Update category name (if mapping exists)
        if new_cnames is not None and category in new_cnames:
            category = new_cnames[category]
        
        # Add to the data list
        for image_path in images:
            data.append({
                'id': len(data),
                'image_path': image_path,
                'label': category
            })
    
    # Write to CSV file
    with open(save_path, mode='w') as file:
        writer = csv.DictWriter(file, fieldnames=['id', 'image_path', 'label'])
        writer.writeheader()
        writer.writerows(data)

# Example usage
if __name__ == "__main__":
    # Dataset path and save path
    image_dir = '/root/autodl-tmp/cifar-100/cifar100'  # Path to the Caltech101 dataset
    save_path = '/root/autodl-tmp/cifar-100/cifar100.csv'  # Save path
    
    # Ignored categories and category name mapping (if any)
    ignored_categories = []  # Adjust as needed
    new_cnames = None  # Define category name mapping here if needed
    
    # Generate the CSV file
    generate_csv(image_dir, save_path, ignored_categories, new_cnames)
    print(f"Annotation file has been generated and saved to: {save_path}")

标注文件已生成并保存到: /root/autodl-tmp/cifar-100/cifar100.csv


In [4]:
!pwd

/root/autodl-tmp/cifar-10
