In [8]:
from torchvision import datasets, transforms

# define transformations for the images
# rotate the image by 90 degree (from horizontal to vertical)
# normalize data to [-1, 1]
transform = transforms.Compose([
    transforms.RandomRotation((-90,-90)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# get training and test dataset
train = datasets.EMNIST(root='./data', 
                        split='balanced', 
                        train=True,
                        download=True,
                        transform=transform
                        )

test = datasets.EMNIST(root='./data', 
                        split='balanced', 
                        train=False,
                        download=True,
                        transform=transform
                        )

print('number of training data and test data: '+ str(len(train)) + ', ' + str(len(test)))
print('classes in training data: ')
print(train.classes)
print(f"length of classes: {len(train.classes)}")

number of training data and test data: 112800, 18800
classes in training data: 
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'd', 'e', 'f', 'g', 'h', 'n', 'q', 'r', 't']
length of classes: 47


In [9]:
from torch.utils.data import DataLoader
# load dataset
train = DataLoader(train, batch_size=64, shuffle=True)
test = DataLoader(test, batch_size=64, shuffle=False)

In [10]:
print(f"length of training dataset after loading: {len(train)}")
print(f"length of test dataset after loading: {len(test)}")

length of training dataset after loading: 1763
length of test dataset after loading: 294
