# Apply Transforms to Video datasets

Good demo here (using Kinetics):
https://pytorch.org/vision/stable/auto_examples/plot_video_api.html#sphx-glr-auto-examples-plot-video-api-py 

In [2]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
torch.manual_seed(69)

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Define Dataset class

In [None]:
class VPT_Dataset(Dataset):
    """TODO: Adapt this data class."""

    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        # self.landmarks_frame = pd.read_csv(csv_file)
        # self.root_dir = root_dir
        self.video_file = []
        self.img_embeddings = []
        self.transform = transform

    def __len__(self):
        return len(self.landmarks_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # todo: adapt this to our data
        img_name = os.path.join(self.root_dir,
                                self.landmarks_frame.iloc[idx, 0])
        image = io.imread(img_name)
        landmarks = self.landmarks_frame.iloc[idx, 1:]
        landmarks = np.array([landmarks])
        landmarks = landmarks.astype('float').reshape(-1, 2)
        sample = {'image': image, 'landmarks': landmarks}

        if self.transform:
            sample = self.transform(sample)

        return sample

# Define Dataset transformations (using Compose)

### Transformations used in X-CLIP

We could probably steal their implementation, too: https://github.com/microsoft/VideoX/blob/master/X-CLIP/datasets/rand_augment.py

```python
_RAND_CHOICE_WEIGHTS_0 = {
    "Rotate": 0.3,
    "ShearX": 0.2,
    "ShearY": 0.2,
    "TranslateXRel": 0.1,
    "TranslateYRel": 0.1,
    "Color": 0.025,
    "Sharpness": 0.025,
    "AutoContrast": 0.025,
    "Solarize": 0.005,
    "SolarizeAdd": 0.005,
    "Contrast": 0.005,
    "Brightness": 0.005,
    "Equalize": 0.005,
    "Posterize": 0,
    "Invert": 0,
}
```

In [None]:
# Torchscript (torch.jit.script) an optimizing JIT runtime compiler for PyTorch. 
# Compiled to C++, faster. I've read data augmentation is CPU-intensive, so this might help.
transforms = torch.nn.Sequential(
    transforms.CenterCrop(10),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
)
scripted_transforms = torch.jit.script(transforms)

In [2]:
from torchvision import transforms

video_transforms = transforms.Compose([
    transforms.PILToTensor(),
    transforms.RandomHorizontalFlip(p=0.2),
    transforms.Resize((224, 224)),
    transforms.ConvertImageDtype(torch.float),
])

Compose(
    CenterCrop(size=(10, 10))
    PILToTensor()
    ConvertImageDtype()
)

# Instantiate Dataset

In [None]:
from torch.utils.data import DataLoader

dataset = VPT_Dataset("./dataset", epoch_size=None, transform=video_transforms)

loader = DataLoader(dataset, batch_size=12)
data = {"video": [], 'start': [], 'end': [], 'tensorsize': []}
for batch in loader:
    for i in range(len(batch['path'])):
        data['video'].append(batch['path'][i])
        data['start'].append(batch['start'][i].item())
        data['end'].append(batch['end'][i].item())
        data['tensorsize'].append(batch['video'][i].size())
print(data)