In [2]:
import argparse
import time
import yaml
import os
import logging
from collections import OrderedDict
from contextlib import suppress
from datetime import datetime

import torch
import torch.nn as nn
import torchvision.utils
from torch.nn.parallel import DistributedDataParallel as NativeDDP

from timm.data import create_dataset, create_loader, resolve_data_config, Mixup, FastCollateMixup, AugMixDataset
from timm.models import create_model, safe_model_name, resume_checkpoint, load_checkpoint, model_parameters
from timm.models.layers import convert_splitbn_model
from timm.utils import *
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy, JsdCrossEntropy
from timm.optim import create_optimizer_v2, optimizer_kwargs
from timm.scheduler import create_scheduler
from timm.utils import ApexScaler, NativeScaler

from src import *

try:
    from apex import amp
    from apex.parallel import DistributedDataParallel as ApexDDP
    from apex.parallel import convert_syncbn_model

    has_apex = True
except ImportError:
    has_apex = False
print("hello")

# SAVE_PATH = "data/cifar10"

hello


# Processing Cifar-10

In [12]:
import os
import pandas as pd
import shutil

# Path to your existing 'train' folder and the CSV file
base_path = './data/cifar10/'  # Adjust this if your path is different
train_folder_path = os.path.join(base_path, 'train')
train_labels_path = os.path.join(base_path, 'trainLabels.csv')

# Read the CSV file
labels_df = pd.read_csv(train_labels_path)

# Create new main training directory
new_train_path = os.path.join(base_path, 'new_train')
os.makedirs(new_train_path, exist_ok=True)

# Iterate through the DataFrame rows and move images to their respective label folders
for index, row in labels_df.iterrows():
    image_name = f"{row['id']}.png"
    label = row['label']
    
    # Create directory for the label if it doesn't exist
    label_dir = os.path.join(new_train_path, label)
    os.makedirs(label_dir, exist_ok=True)
    
    # Source and destination paths
    src_path = os.path.join(train_folder_path, image_name)
    dst_path = os.path.join(label_dir, image_name)
    
    # Move the image to its label directory
    shutil.move(src_path, dst_path)

print("Organized images based on their labels!")

Organized images based on their labels!


In [13]:
torchvision.datasets.CIFAR10(root="data3/train", train=True, download=True)
torchvision.datasets.CIFAR10(root="data3/validation", train=False, download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data3/train/cifar-10-python.tar.gz


100.0%


Extracting data3/train/cifar-10-python.tar.gz to data3/train
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data3/validation/cifar-10-python.tar.gz


100.0%


Extracting data3/validation/cifar-10-python.tar.gz to data3/validation


Dataset CIFAR10
    Number of datapoints: 10000
    Root location: data3/validation
    Split: Test

In [3]:
import os
import pickle
import numpy as np
from PIL import Image

def unpickle(file):
    """Load byte data from file"""
    with open(file, 'rb') as f:
        data = pickle.load(f, encoding='latin-1')
    return data

def save_images_from_batch(data, labels, label_names, root_dir):
    """Save images contained in the CIFAR-10 batch to individual .png files"""
    for idx, img_data in enumerate(data):
        label = labels[idx]
        label_name = label_names[label]
        folder_path = os.path.join(root_dir, label_name)
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        
        # Reshape image data and convert to RGB format
        R = img_data[0:1024].reshape(32, 32)
        G = img_data[1024:2048].reshape(32, 32)
        B = img_data[2048:].reshape(32, 32)
        img = np.dstack((R, G, B))
        
        img_path = os.path.join(folder_path, f"{idx}.png")
        im = Image.fromarray(img)
        im.save(img_path)


In [16]:

# Load label names from batches.meta
metadata = unpickle('data3/train/cifar-10-batches-py/batches.meta')
label_names = metadata['label_names']

# Process training batches data_batch_1 to data_batch_5
for i in range(1, 6):
    batch_file = f'data3/train/cifar-10-batches-py/data_batch_{i}'
    batch_data = unpickle(batch_file)
    save_images_from_batch(batch_data['data'], batch_data['labels'], label_names, 'train')

# Process test batch
test_batch_data = unpickle('data3/train/cifar-10-batches-py/test_batch')
save_images_from_batch(test_batch_data['data'], test_batch_data['labels'], label_names, 'val')

# Processing Cifar-100

In [34]:
import os
import pickle
import numpy as np
from PIL import Image

def unpickle(file):
    """Load byte data from file"""
    with open(file, 'rb') as f:
        data = pickle.load(f, encoding='latin-1')
    return data

def save_images_from_batch(data, labels, label_names, root_dir):
    """Save images contained in the CIFAR-10 batch to individual .png files"""
    for idx, img_data in enumerate(data):
        label = labels[idx]
        label_name = label_names[label]
        folder_path = os.path.join(root_dir, label_name)
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        
        # Reshape image data and convert to RGB format
        R = img_data[0:1024].reshape(32, 32)
        G = img_data[1024:2048].reshape(32, 32)
        B = img_data[2048:].reshape(32, 32)
        img = np.dstack((R, G, B))
        
        img_path = os.path.join(folder_path, f"{idx}.png")
        im = Image.fromarray(img)
        im.save(img_path)


In [36]:
metadata = unpickle('data/cifar-100-python.tar/cifar-100-python/meta')
fine_label_names = metadata['fine_label_names']
train_data = unpickle('data/cifar-100-python.tar/cifar-100-python/train')
save_images_from_batch(train_data['data'], train_data['fine_labels'], fine_label_names, 'train')
test_data = unpickle('data/cifar-100-python.tar/cifar-100-python/test')
save_images_from_batch(test_data['data'], test_data['fine_labels'], fine_label_names, 'val')

In [7]:
print(metadata.keys())

dict_keys(['fine_label_names', 'coarse_label_names'])


In [9]:
len(metadata["coarse_label_names"])

20

In [10]:
len(metadata["fine_label_names"])

100

In [11]:
data = unpickle('data/cifar-100-python.tar/cifar-100-python/train')

In [30]:
test_data = unpickle('data/cifar-100-python.tar/cifar-100-python/test')
test_data.keys()

dict_keys(['filenames', 'batch_label', 'fine_labels', 'coarse_labels', 'data'])

In [31]:
len(test_data["data"])

10000