# 3.- Implement a custom data loading pipeline and evaluate the performance of a DL model

In [None]:
!pip install fiftyone

In [None]:
import fiftyone as fo
import fiftyone.zoo as foz

Download a train and validation set of images

In [None]:
#@title Download FiftyOne training and validation dataset
trn_dataset = foz.load_zoo_dataset(
    "open-images-v7",
    split="train",
    label_types=["classifications"],
    classes = ["Cat"],
    max_samples=1000,
    dataset_dir="sample_data",
    download_if_necessary=True
)

val_dataset = foz.load_zoo_dataset(
    "open-images-v7",
    split="validation",
    label_types=["classifications"],
    classes = ["Cat"],
    max_samples=50,
    dataset_dir="sample_data",
    download_if_necessary=True
)

In [None]:
#@title FiftyOne PyTorch Dataset
import matplotlib.pyplot as plt
import torch
from PIL import Image


class FiftyOneTorchDataset(torch.utils.data.Dataset):
    """A class to construct a PyTorch dataset from a FiftyOne dataset.
    
    Args:
        fiftyone_dataset: a FiftyOne dataset or view that will be used for training or testing
        transforms (None): a list of PyTorch transforms to apply to images and targets when loading
        gt_field ("ground_truth"): the name of the field in fiftyone_dataset that contains the 
            desired labels to load
        classes (None): a list of class strings that are used to define the mapping between
            class names and indices. If None, it will use all classes present in the given fiftyone_dataset.
    """

    def __init__(
        self,
        fiftyone_dataset,
        transforms=None,
        classes=None,
    ):
        self.samples = fiftyone_dataset
        self.transforms = transforms
        self.img_paths = self.samples.values("filepath")

        self.classes = classes

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        sample = self.samples[img_path]
        metadata = sample.metadata
        img = Image.open(img_path).convert("RGB")

        label = any(lab["label"] in self.classes
                    for lab in sample["positive_labels"]["classifications"])
        target = torch.as_tensor(label, dtype=torch.float32)

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.img_paths)

    def get_classes(self):
        return self.classes

## Data augmentation as a pre-processing pipeline

Deep learning models require large amounts of data to train. One reason is that DL models are complex and have lots of parameters.

*   LeNet5 has 60,000 parameters
*   InceptionV3 has 25 million parameters
*   Vision Transformer has 110 million parameters

Other reason is that we want our model to learn from as many examples as possible to make it robust to variations in the input data.
That way the model would ouput the same response to an image that is different from the ones in the training set.

The most common approach to make a model robust to these variations is through *data augmentation*.

In [None]:
from torchvision import transforms

In [None]:
# Use transforms.Compose and other transforms to create the data augmentation pipeline
augment_pipeline = transforms.Compose([
    # Add here other image augmentation functions to apply
    
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
pt_trn_dataset = FiftyOneTorchDataset(trn_dataset,
                                      classes=["Cat"],
                                      transforms=
                                      )

In [None]:
# Get only cat images
cats = [i for i, (x, l) in enumerate(pt_trn_dataset) if l > 0.5]

In [None]:
x, l = pt_trn_dataset[cats[5]]

print("Augmented image shape", x.shape)
im = (x.permute(1, 2, 0) - x.min()) / (x.max() - x.min())

plt.imshow(im)
plt.title(f"Is a cat? {l > 0.5}")
plt.show()

## Train the InceptionV3 model using data augmentation

In [None]:
from torchvision.models import inception_v3
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# The classifier model
model = inception_v3(weights=None, progress=True, num_classes=1)

# The Error/Loss function
criterion = nn.BCEWithLogitsLoss()

# The optimizer algorithm
optimizer = optim.Adam(model.parameters(), lr=0.001)

# The batches data loader
trn_queue = DataLoader(pt_trn_dataset, batch_size=16, shuffle=True, pin_memory=True)

In [None]:
# Move the model to the GPU memory
model.train()

if torch.cuda.is_available():
  model.cuda()


for e in range(5):
  for i, (x, y) in enumerate(trn_queue):
    # Empty the accumulated gradients from any previous iteration
    optimizer.zero_grad()

    # Move the input images and their respective classes to the GPU
    if torch.cuda.is_available():
      x = x.cuda()
      y = y.cuda()

    y_hat = model(x)

    # Compute the error/loss function
    loss = criterion(y_hat.logits, y.view(-1, 1))

    # Perform the backward pass to generate the gradients of the loss function with respect to the inputs
    loss.backward()

    # Update the model parameters
    optimizer.step()

    # Log the progress of the model
    if i % 10 == 0:
      acc = torch.sum(y == y_hat.logits.detach().argmax(dim=1)) / x.shape[0]

      print(f"Epoch {e}, step {i}: loss={loss.item()}, acc={acc}")

In [None]:
torch.save(model.state_dict(), "my_model_checkpoint.pth")

## Evaluate the performance of the model

To evaluate the performance of the model we'll use a set of images that have not been *seen* by our model during training.
This ensures that the performance metrics are an approximation of how our model will behave in production.

Because we are not training our model anymore, we can pass the validation images as they are without any augmentation.

In [None]:
val_pipeline = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
pt_val_dataset = FiftyOneTorchDataset(val_dataset, classes=["Cat"], transforms=val_pipeline)

val_queue = DataLoader(pt_trn_dataset, batch_size=16, shuffle=False)

In [None]:
model.eval()

tp = 0
tn = 0
p = 0
n = 0

with torch.no_grad():
  for i, (x, y) in enumerate(val_queue):

    # Move the input images and their respective classes to the GPU
    if torch.cuda.is_available():
      x = x.cuda()
      y = y.cuda()

    y_hat = model(x)

    p += y.sum().item()
    n += x.shape[0] - y.sum().item()
    tp += torch.sum(y * y_hat.detach().argmax(dim=1)).item()
    tn += torch.sum((1-y) * (1-y_hat.detach().argmax(dim=1))).item()

    # Log the progress of the model
    if i % 10 == 0:
      acc = (tp + tn) / (p + n)
      print(f"Step {i}/{len(val_queue)}: acc={acc}")

Save our model to use in production and share