# Federate a dataset from torchtext, torchvision, tensorflow or huggingface

There are a lot of datasets available on the main deep learning frameworks that make it easy to use their own framework. It's important to let the user to use this datasets, and we do so the user can use her favorites datasets and federate them.

We show multiple examples on how to load and federate the datasets.

For every framework we support, there are two possible ways to load a dataset:

In [None]:
from flex.data import FlexDataObject
from flex.data import FlexDataDistribution
from flex.data import FlexDatasetConfig

## HugginFace

In [None]:
# HuggingFace
from datasets import load_dataset

# Load a dataset into a FlexDataObject
dataset_hf = load_dataset('ag_news', split='train')

One way:

In [None]:
fcd_hf = FlexDataObject.from_huggingface_dataset(
    dataset_hf, X_columns="text", label_column="label"
)

# Create a config and federate the dataset
config_hf = FlexDatasetConfig(
    seed=0,
    n_clients=2,
    replacement=False
)


flex_dataset_two_step_hf = FlexDataDistribution.from_config(
    cdata=fcd_hf, config=config_hf
)
print(f"Flex dataset two steps a data sample from client_0: {flex_dataset_two_step_hf['client_0'].X_data[0]}")

Or another (shortcut):

In [None]:
# Federate the dataset directly, only using a config.
flex_dataset_hf = FlexDataDistribution.from_config_with_huggingface_dataset(
    dataset_hf, config_hf, "text", "label"
)

print(f"Flex dataset a data sample from client_0: {flex_dataset_hf['client_0'].X_data[0]}")

## Tensorflow dataset

In [None]:
import tensorflow_datasets as tfds

mnist = tfds.load(
    'mnist',
    split=['train'],
    shuffle_files=True,
    as_supervised=True,
    batch_size=-1, # this is required
)

One way:

In [None]:
fcd_tf = FlexDataObject.from_tfds_dataset(mnist)

config_tf = FlexDatasetConfig(
    seed=0,
    n_clients=2,
    replacement=False
)


# Federate the FlexDataObject we just created
flex_dataset_two_step_tf = FlexDataDistribution.from_config(
    cdata=fcd_tf,
    config=config_tf
)

sample = flex_dataset_two_step_tf['client_0'].X_data[0]
import matplotlib.pyplot as plt
plt.imshow(sample, cmap=plt.get_cmap('gray'))

Or another:

In [None]:
# Federate the dataset directly
flex_dataset_tf = FlexDataDistribution.from_config_with_tfds_dataset(
    mnist,
    config_tf
)

sample = flex_dataset_tf['client_0'].X_data[0]
import matplotlib.pyplot as plt
plt.imshow(sample, cmap=plt.get_cmap('gray'))

## Pytorch torchvision dataset

In [None]:
from torchvision import datasets, transforms

cifar10 = datasets.CIFAR10(
        root=".",
        train=True,
        download=True,
        transform=transforms.ToTensor()
)

One way:

In [None]:
fcd_torch = FlexDataObject.from_torchvision_dataset(cifar10)

config_torch = FlexDatasetConfig(
    seed=0,
    n_clients=2,
    replacement=False
)

# Federate the FlexDataObject we just created
flex_dataset_two_step_torch = FlexDataDistribution.from_config(
    cdata=fcd_torch,
    config=config_torch
)

sample = flex_dataset_two_step_torch['client_0'].X_data[0]
import matplotlib.pyplot as plt
sample = sample.swapaxes(0,1)
sample = sample.swapaxes(1,2)
plt.imshow(sample, cmap=plt.get_cmap('gray'))

Or another (shortcut):

In [None]:
# Federate the dataset directly
flex_dataset_torch = FlexDataDistribution.from_config_with_torchvision_dataset(
    cifar10,
    config_tf
)

sample = flex_dataset_torch['client_0'].X_data[0]
import matplotlib.pyplot as plt
sample = sample.swapaxes(0,1)
sample = sample.swapaxes(1,2)
plt.imshow(sample, cmap=plt.get_cmap('gray'))

## Pytorch torchtext dataset

In [None]:
from torchtext.datasets import AG_NEWS

torch_dataset = AG_NEWS(split='train')

One way:

In [None]:
fcd_torch = FlexDataObject.from_torchtext_dataset(torch_dataset)

# We will use the same configuration than in the HuggingFace example
config_torch = FlexDatasetConfig(
    seed=0,
    n_clients=2,
    replacement=False
)

# Federate the FlexDataObject we just created
flex_dataset_two_step_torch = FlexDataDistribution.from_config(
    cdata=fcd_torch,
    config=config_torch
)

print(f"Flex dataset two steps: {flex_dataset_two_step_torch['client_0'].X_data[0]}")

Or another (shortcut):

In [None]:
# Federate the dataset directly
flex_dataset_torch = FlexDataDistribution.from_config_with_torchtext_dataset(
    torch_dataset,
    config_torch
)
print(f"Flex dataset direct: {flex_dataset_torch['client_0'].X_data[0]}")

### END
Congratulations, now you know how to federate a dataset using the *FlexDataDistribution* and the *FlexDatasetConfig* classes, so you can setup multiple experimental settings that fit most your hipothesis.