In [13]:
import os
import numpy as np
import torchvision
from PIL import Image

import settings

## Load Dataset

In [14]:
np.random.seed(0)
root = settings.DATA_HOME['cifar10']
train_dataset = torchvision.datasets.CIFAR10(root=root, train=True, download=True)
test_dataset = torchvision.datasets.CIFAR10(root=root, train=False, download=True)

Files already downloaded and verified
Files already downloaded and verified


## Merge Dataset

In [15]:
label_list = np.array([0, 1, 8, 9])
all_train_data = []
all_train_labels = []
for PIL_image, label in train_dataset:
    # image = np.transpose(PIL_image, (2, 0, 1))
    if label in label_list:
        all_train_data.append(PIL_image)
        all_train_labels.append(np.where(label_list == label)[0][0])

all_test_data = []
all_test_labels = []
for PIL_image, label in test_dataset:
    # image = np.transpose(PIL_image, (2, 0, 1))
    if label in label_list:
        all_test_data.append(PIL_image)
        all_test_labels.append(np.where(label == label_list)[0][0])

all_train_data = np.array(all_train_data, dtype=Image.Image)
all_test_data = np.array(all_test_data, dtype=Image.Image)
all_train_labels = np.array(all_train_labels, dtype=np.long)
all_test_labels = np.array(all_test_labels, dtype=np.long)

all_train_data.shape, all_test_data.shape, \
all_train_labels.shape, all_test_labels.shape, \
np.unique(all_train_labels), np.unique(all_train_labels)

((20000,),
 (4000,),
 (20000,),
 (4000,),
 array([0, 1, 2, 3]),
 array([0, 1, 2, 3]))

## Divide data

In [16]:
n_classes = np.unique(all_train_labels).shape[0]
classified_train_data = [all_train_data[all_train_labels == i] for i in range(n_classes)]
classified_train_data = np.array(classified_train_data, dtype=Image.Image)
classified_test_data = [all_test_data[all_test_labels == i] for i in range(n_classes)]
classified_test_data = np.array(classified_test_data, dtype=Image.Image)

n_clients = 4
clients = ['Client-{}'.format(i) for i in range(n_clients)]
seq_0 = [0, 1, 2, 3]
seq_1 = [1, 2, 3, 0]
seq_2 = [2, 3, 0, 1]

classified_train_data.shape, classified_test_data.shape

((4, 5000), (4, 1000))

In [17]:
client_data = []
client_labels = []

n_train = 1500
n_test = 300

for client_name, label_0, label_1, label_2 in zip(clients, seq_0, seq_1, seq_2):
    train_data = [classified_train_data[label_0][:n_train],
                  classified_train_data[label_1][n_train: n_train * 2],
                  classified_train_data[label_2][n_train * 2: n_train * 3],
                  ]
    train_labels = [label_0] * n_train + [label_1] * n_train + [label_2] * n_train

    test_data = [classified_test_data[label_0][:n_test],
                 classified_test_data[label_1][n_test: n_test * 2],
                 classified_test_data[label_2][ n_test * 2: n_test * 3],
                 ]
    test_labels = [label_0] * n_test + [label_1] * n_test + [label_2] * n_test

    data = np.concatenate(train_data + test_data)
    labels = np.array(train_labels + test_labels)

    print(client_name, data.shape, labels.shape)

    np.savez_compressed(os.path.join(root, '{}_dataset'.format(client_name)), data=data, labels=labels)

    client_data.append([np.transpose(image, (2, 0, 1)) for image in data])
    client_labels.append(labels)

client_data = np.array(client_data, np.uint8)
client_labels = np.array(client_labels, np.long)
print(client_data.shape)

np.savez_compressed(os.path.join(root, 'cifar10_dataset'),
                    client_names=clients, data=client_data, labels=client_labels)

Client-0 (5400,) (5400,)
Client-1 (5400,) (5400,)
Client-2 (5400,) (5400,)
Client-3 (5400,) (5400,)
(4, 5400, 3, 32, 32)


In [18]:
f = np.load(os.path.join(root, 'CIFAR10_dataset.npz'), allow_pickle=True)
f['data'].shape

(4, 5400, 3, 32, 32)

In [20]:
[np.unique(i) for i in f['labels']]

[array([0, 1, 2]), array([1, 2, 3]), array([0, 2, 3]), array([0, 1, 3])]