In [30]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

# Set random seed for reproducibility
torch.manual_seed(0)

# Define transformations
transform_test = transforms.Compose([
    transforms.ToTensor()
])

# Load CIFAR-100 dataset
train_data = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_test)
test_data = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)

train_loader = DataLoader(train_data, batch_size=100, shuffle=False)
mean_train = torch.zeros(3)
std_train = torch.zeros(3)
total_samples = 50000
batch_size = 100

for imgs, _ in train_loader:
    batch_mean = torch.mean(imgs, dim=(0, 2, 3))
    batch_std = torch.std(imgs, dim=(0, 2, 3))
    mean_train += batch_mean
    std_train += batch_std

mean_train = mean_train*batch_size/total_samples
std_train = std_train*batch_size/total_samples
print("Mean:", mean_train)
print("Std:", std_train)

transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(mean_train, std_train)  # Normalizing with previously calculated mean and std
])

train_data = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)

# Split the dataset into training and validation sets
new_train_data, val_data = random_split(train_data, [40000,10000])

# Calculate class proportions in the new training set
class_counts = torch.zeros(100)
for img, target in new_train_data:
    class_counts[target] += 1

class_proportions = 100 * class_counts / class_counts.sum()

print("Proportion of each class in the new training set:")
with open("new_train_set.csv", "w") as file:
    for i in range(100):
        # 将print的输出写入到文件中
        file.write(f"{train_data.classes[i]}, {class_proportions[i]:.2f}\n")



Files already downloaded and verified
Files already downloaded and verified
Mean: tensor([0.5071, 0.4865, 0.4409])
Std: tensor([0.2667, 0.2558, 0.2754])
Files already downloaded and verified
Proportion of each class in the new training set:
