# Using DALI in Catalyst 


In [1]:
from nvidia.dali.pipeline import Pipeline
from nvidia.dali import pipeline_def
import nvidia.dali as dali
import nvidia.dali.fn as fn
import nvidia.dali.types as types
from torch.utils.data import DataLoader
from nvidia.dali.plugin.pytorch import DALIGenericIterator, LastBatchPolicy

In [2]:
dali.__version__

'1.7.0'

### To get test data you need to use [DALI extra](https://github.com/NVIDIA/DALI_extra).

In [3]:
# define pipeline
data_paths = {
    'train': 'DALI_extra/db/MNIST/training/',
    'valid': 'DALI_extra/db/MNIST/testing/',
}

@pipeline_def
def MNISTPipeline(device, shard_id, num_shards):
    images, labels = fn.readers.caffe2(path = data_paths[mode], shard_id = shard_id, num_shards = num_shards, random_shuffle = True, name = 'Reader')
    images = fn.decoders.image(images,
                               device = 'mixed' if device == 'gpu' else 'cpu',
                               output_type = types.GRAY)
    images = fn.crop_mirror_normalize(images,
                                      device="gpu",
                                      dtype=types.FLOAT,
                                      std=[0.3081 * 255],
                                      mean=[0.1307 * 255],
                                      output_layout=types.NCHW)

    if device == 'gpu':
        labels = labels.gpu()
    labels = fn.cast(labels, dtype=types.INT64)
    return images, labels

In [4]:
# Customizing DALI loader for using in catalyst.
class DALILoader(DataLoader):
    def __init__(
            self,
            mode = 'train',
            batch_size = 32,
            num_workers = 4,
        ):
        super().__init__()
    def setup(self, stage = None):
        device_id = self.local_rank
        shard_id = self.global_rank
        num_shards = self.trainer.world_size
        self.pipeline = MNISTPipeline(mode = mode,
                                 batch_size = 32,
                                 device = 'gpu',
                                 device_id = device_id,
                                 shard_id = shard_id,
                                 num_shards = num_shards,
                                 num_threads = 4)
        self.pipeline.build()
        self.loader = DALIGenericIterator(
            pipelines=self.pipeline,
            output_map=['features', 'targets'],
            reader_name = "Reader",
            auto_reset=True,
            last_batch_policy=LastBatchPolicy.PARTIAL,
        )
        
    def __len__(self):
        return len(self.loader)
    
    def __iter__(self):
        return ({'features': batch[0]["features"], 'targets': batch[0]["targets"].squeeze().long()} for batch in self.loader)
    
    def sampler(self):
        return None
    
    def batch_sampler(self):
        return None

In [5]:
import os
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
from catalyst import dl

In [6]:
num_samples, num_features = int(1e4), int(1e1)
X, y = torch.rand(num_samples, num_features), torch.rand(num_samples, 1)
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=32, num_workers=8)
loaders = {"train": loader, "valid": loader}

model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.02)



In [None]:
runner = dl.SupervisedRunner()

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=loaders,
    num_epochs=1,
    logdir="./logs",
    valid_loader="valid",
    valid_metric="loss",
    minimize_valid_metric=True,
    verbose=True,
    callbacks=[
        dl.AccuracyCallback(input_key="logits", target_key="targets", num_classes=10),
    ]
)