In [9]:
import torch
import torchvision
from torchvision import transforms as T
import numpy as np

In [4]:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True)

Files already downloaded and verified


In [27]:
# Which classes do we have
classes = list(train_dataset.classes)
print(classes)

# Count current classes
class_counter = np.zeros(len(classes))
for data, label in train_dataset:
    class_counter[label] +=1

print(class_counter)



percentage = [1, 1, 1, 0.8, 0.5, 0.2, 0.2, 0.2, 0.1, 0.1]

print(class_counter * percentage)

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
[5000. 5000. 5000. 5000. 5000. 5000. 5000. 5000. 5000. 5000.]
[5000. 5000. 5000. 4000. 2500. 1000. 1000. 1000.  500.  500.]


In [21]:
# Create a list of the number of images for each class
num_images = [500, 500, 500, 500, 500, 1000, 1000, 1000, 1000, 2000]

# Create a list of indices for the total number of images
indices = list(range(len(train_dataset)))

""" We want to go through all the data. If the data matches the current class, put it in a 
tmp list. And then just save a subsection of it to our dataset"""
class_indices = []
for i in range(len(classes)):  # i ist jeweils eine Klasse
    data_from_this_class = []
    for j in range(len(train_dataset)):   # j ist jeweils ein data label paar aus dem Datensatz
        if train_dataset[j][1] == i:  # Wenn die aktuelle Klasse mit der Datei übereinstimmt
            data_from_this_class.append(indices[j]) # wir wissen dass element j zu dieser Klasse dazu gehört
    
    class_indices.append(data_from_this_class[:num_images[i]])  # Nimm aber nur so viele wie oben angegeben


# Flatten the list
class_indices = [item for sublist in class_indices for item in sublist]

# Create a new dataset with the new indices
imbalanced_dataset = torch.utils.data.Subset(train_dataset, class_indices)


In [23]:

train_transform = T.Compose([T.Resize((224,224)),  #resises the image so it can be perfect for our model.
                                T.RandomHorizontalFlip(), # FLips the image w.r.t horizontal axis
                                T.RandomRotation(10),     #Rotates the image to a specified angel
                                T.RandomAffine(0, shear=10, scale=(0.8,1.2)), #Performs actions like zooms, change shear angles.
                                T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), # Set the color params
                                T.ToTensor(), # comvert the image to tensor so that it can work with torch
                                T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) #Normalize all the images
                                ])

transform = T.Compose([T.ToTensor(),
                        T.Resize((224,224)),
                        T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset2 = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)


testset2 = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

num_classes = 10




Files already downloaded and verified
Files already downloaded and verified


In [24]:
print(len(trainset2))

50000


In [22]:
# Which classes do we have
classes = list(train_dataset.classes)
print(classes)

# Count current classes
class_counter = np.zeros(len(classes))
for data, label in imbalanced_dataset:
    class_counter[label] +=1

print(class_counter)

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
[ 500.  500.  500.  500.  500. 1000. 1000. 1000. 1000. 2000.]


In [47]:
torch.randint(1,9, (1,))

tensor([7])

In [41]:
trainset[50][1]

9

In [28]:
#accessing CIFAR10 dataset
dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True)

#changing the dataset
for i in range(len(dataset)):
    if torch.rand(1)<0.1:  # with a probability of 10%
        dataset[i][1] = (dataset[i][1] + torch.randint(1,9, (1,))) % 10

tensor([4])