In [None]:
# @title
%%capture
!pip install -qU albumentations datasets lightly"[video]"
!git clone https://github.com/alexeygrigorev/clothing-dataset.git
!wget https://s3.amazonaws.com/fast-ai-imageclas/imagewoof2-320.tgz
!wget -q https://github.com/sayakpaul/Action-Recognition-in-TensorFlow/releases/download/v1.0.0/ucf101_top5.tar.gz

import tarfile

from lightly.data import LightlyDataset
from lightly.transforms import SimCLRTransform

transform = SimCLRTransform()

with tarfile.open("/content/imagewoof2-320.tgz") as f:
    f.extractall("/content/imagewoof2-320")

with tarfile.open("/content/ucf101_top5.tar.gz") as f:
    f.extractall("/content/ucf101_top5")

## 🔥 PyTorch Datasets

In [None]:
import torchvision

base = torchvision.datasets.CIFAR10(root="data/torchvision/", download=True)
torch_dataset = LightlyDataset.from_torch_dataset(base, transform=transform)

In [None]:
import albumentations as A
import torchvision.transforms as T
from albumentations.pytorch import ToTensorV2

## Torchvision Transforms
torchvision_transforms = T.Compose(
    [
        T.RandomHorizontalFlip(),
        T.ToTensor(),
    ]
)

## Albumentation Transforms
albumentation_transforms = A.Compose(
    [
        A.CenterCrop(height=128, width=128),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)

## Lightly Transforms
lightly_transform = SimCLRTransform()


torchvision_aug_dataset = LightlyDataset.from_torch_dataset(
    base, transform=torchvision_transforms
)
albumentation_aug_dataset = LightlyDataset.from_torch_dataset(
    base, transform=albumentation_transforms
)
lightly_aug_dataset = LightlyDataset.from_torch_dataset(
    base, transform=lightly_transform
)

## 🤗 HuggingFace Datasets

![](https://huggingface.co/front/assets/homepage/hugs.svg)

In [None]:
import torch
from datasets import load_dataset

base = load_dataset("uoft-cs/cifar10", trust_remote_code=True)


def apply_transform(example_batch, transform=transform):
    """
    Apply the given transform across a batch. To be used in a 'map' like manner

    Args:
      example_batch (Dict): a batch of data, should contain the key 'image'
      tranform (Callable): image transformations to be performed

    Returns:
      updated batch with transformations applied to the image
    """

    assert (
        "image" in example_batch.keys()
    ), "batch should be of type Dict[str, Any] with a key 'image'"

    example_batch["image"] = [
        transform(image.convert("RGB")) for image in example_batch["image"]
    ]
    return example_batch


base.set_transform(apply_transform)

hf_dataloader = torch.utils.data.DataLoader(base["train"])

## LightlyDataset format

### Unlabelled Image Datasets

In [None]:
unlabelled_image_dataset = LightlyDataset(
    input_dir="/content/clothing-dataset/images", transform=transform
)

### Labeled Image Datasets

In [None]:
labelled_image_dataset = LightlyDataset(
    input_dir="/content/imagewoof2-320/imagewoof2-320/train", transform=transform
)

### Video Datasets

In [None]:
video_dataset = LightlyDataset(
    input_dir="/content/ucf101_top5/train", transform=transform
)