# DataLoading example

In [1]:
# %pip install aac_datasets  # Uncomment if not installed !

## Dataset

In [2]:
import logging
import sys

logging.basicConfig(
    format="[%(asctime)s][%(name)s][%(levelname)s] - %(message)s",
    level=logging.WARNING,
    stream=sys.stdout,
)

In [3]:
import aac_datasets
import yaml

from aac_datasets import Clotho

In [4]:
print(f"aac-datasets version: {aac_datasets.__version__}")

aac-datasets version: 0.6.1


In [5]:
clotho_dev = Clotho(".", subset="dev", download=False)

example_0 = clotho_dev[0]
audio_example = example_0["audio"]
captions_example = example_0["captions"]

print(f"Audio waveform shape: {audio_example.shape}")
print(f"Captions:\n{yaml.dump(captions_example, sort_keys=False)}")

RuntimeError: Couldn't find appropriate backend to handle uri ./CLOTHO_v2.1/clotho_audio_files/development/Distorted AM Radio noise.wav and format None.

## DataLoader

In [None]:
from typing import List

import yaml
from torch import Tensor
from torch.utils.data.dataloader import DataLoader

from aac_datasets import Clotho
from aac_datasets.utils.collate import AdvancedCollate

In [None]:
clotho_dev = Clotho(".", subset="dev", download=False)

# note: AdvancedCollate will pad audios files to max length to form a single tensor
dataloader = DataLoader(
    clotho_dev,
    batch_size=4,
    collate_fn=AdvancedCollate({"audio": 0.0}),
)

batch_0 = next(iter(dataloader))
batch_0_audio: Tensor = batch_0["audio"]
batch_0_captions: List[List[str]] = batch_0["captions"]

print(f"Batch 0 audio shape: {batch_0_audio.shape}")
print(f"Batch 0 captions:\n{yaml.dump(batch_0_captions, sort_keys=False)}")