In [1]:
import os
import urllib.request
import zipfile

url = "https://download.pytorch.org/tutorial/hymenoptera_data.zip"
local_file = "./data/hymenoptera_data.zip"
urllib.request.urlretrieve(url, local_file)

with zipfile.ZipFile(local_file, "r") as zip_ref:
    zip_ref.extractall("./data")

In [2]:
import albumentations as A
import numpy as np
import PIL.Image
import torch
import torchvision
from albumentations.pytorch import ToTensorV2
from torch import nn
from torch.utils.data import DataLoader


class AntsBeesDataset(torchvision.datasets.ImageFolder):
    def __getitem__(self, index: int):
        """overrides __getitem__ to be compatible to albumentations"""
        path, target = self.samples[index]
        sample = self.loader(path)
        sample = self.get_cv2_image(sample)
        if self.transforms is not None:
            transformed = self.transforms(image=sample, target=target)
            sample, target = transformed["image"], transformed["target"]
        else:
            if self.transform is not None:
                sample = self.transform(image=sample)["image"]
            if self.target_transform is not None:
                target = self.target_transform(target)

        return sample, target

    def get_cv2_image(self, image):
        if isinstance(image, PIL.Image.Image):
            return np.array(image).astype("uint8")
        elif isinstance(image, np.ndarray):
            return image
        else:
            raise RuntimeError("Only PIL.Image and CV2 loaders currently supported!")


data_dir = "./data/hymenoptera_data"
# Just normalization for validation
data_transforms = A.Compose(
    [
        A.Resize(height=256, width=256),
        A.CenterCrop(height=224, width=224),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)
train_dataset = AntsBeesDataset(root=os.path.join(data_dir, "train"))
train_dataset.transforms = data_transforms

test_dataset = AntsBeesDataset(root=os.path.join(data_dir, "val"))
test_dataset.transforms = data_transforms

In [3]:
print(f"Number of training images: {len(train_dataset)}")
print(f"Number of validation images: {len(test_dataset)}")
print(f"Example output of an image shape: {train_dataset[0][0].shape}")
print(f"Example output of a label: {train_dataset[0][1]}")

Number of training images: 244
Number of validation images: 153
Example output of an image shape: torch.Size([3, 224, 224])
Example output of a label: 0


In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = torchvision.models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
# We have only 2 classes
model.fc = nn.Linear(num_ftrs, 2)
model = model.to(device)
_ = model.eval()



In [5]:
from deepchecks.vision.vision_data import BatchOutputFormat


def deepchecks_collate_fn(batch) -> BatchOutputFormat:
    """Return a batch of images, labels and predictions for a batch of data. The expected format is a dictionary with
    the following keys: 'images', 'labels' and 'predictions', each value is in the deepchecks format for the task.
    You can also use the BatchOutputFormat class to create the output.
    """
    # batch received as iterable of tuples of (image, label) and transformed to tuple of iterables of images and labels:
    batch = tuple(zip(*batch))

    # images:
    inp = torch.stack(batch[0]).detach().numpy().transpose((0, 2, 3, 1))
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    inp = std * inp + mean
    images = np.clip(inp, 0, 1) * 255

    # labels:
    labels = batch[1]

    # predictions:
    logits = model.to(device)(torch.stack(batch[0]).to(device))
    predictions = nn.Softmax(dim=1)(logits)
    return BatchOutputFormat(images=images, labels=labels, predictions=predictions)

In [6]:
LABEL_MAP = {0: "ants", 1: "bees"}

In [7]:
from deepchecks.vision import VisionData

train_loader = DataLoader(
    train_dataset, batch_size=4, shuffle=True, collate_fn=deepchecks_collate_fn
)
test_loader = DataLoader(
    test_dataset, batch_size=4, shuffle=True, collate_fn=deepchecks_collate_fn
)

training_data = VisionData(
    batch_loader=train_loader, task_type="classification", label_map=LABEL_MAP
)
test_data = VisionData(
    batch_loader=test_loader, task_type="classification", label_map=LABEL_MAP
)

In [8]:
training_data.head()

VBox(children=(HTML(value='<div style="display:flex; flex-direction: column; gap: 10px;">\n                <di…

In [9]:
from deepchecks.vision.suites import train_test_validation

suite = train_test_validation()
result = suite.run(training_data, test_data, max_samples=5000)

In [10]:
from deepchecks.vision.suites import train_test_validation

suite = train_test_validation()
result = suite.run(training_data, test_data, max_samples=5000)

In [11]:
result.save_as_html("deepchecks-output.html")

'deepchecks-output (1).html'