In [10]:
import numpy as np
import torch
from transformers import (
    AutoImageProcessor,
    ResNetForImageClassification,
    Trainer,
    TrainingArguments,
    ViTFeatureExtractor,
    ViTForImageClassification,
    BeitForImageClassification,
    SwinForImageClassification,
    ConvNextForImageClassification
)

import matplotlib.pyplot as plt

from datasets import load_dataset, load_metric
from cracks_classification_hf import transform


In [11]:
model_type = "resnet"

In [12]:
dataset = load_dataset("imagefolder", data_dir="datasets/cracks_classification")
labels = dataset["train"].features["label"].names

Resolving data files: 100%|██████████| 11493/11493 [00:01<00:00, 10575.16it/s]
Resolving data files: 100%|██████████| 2872/2872 [00:00<00:00, 168596.36it/s]
Found cached dataset imagefolder (/home/yoni/.cache/huggingface/datasets/imagefolder/default-bdd5c90937dbbcd5/0.0.0/37fbb85cc714a338bea574ac6c7d0b5be5aff46c1862c1989b20e0771199e93f)
100%|██████████| 2/2 [00:00<00:00, 31.89it/s]


In [19]:
if model_type == "vit":
    feature_extractor_vit = ViTFeatureExtractor.from_pretrained("vit-cracks")
    feature_extractor = feature_extractor_vit
    model_vit = ViTForImageClassification.from_pretrained(
            "vit-cracks",
            num_labels=len(labels),
            id2label={str(i): c for i, c in enumerate(labels)},
            label2id={c: str(i) for i, c in enumerate(labels)},
        )
    model = model_vit
elif model_type == "resnet":
    feature_extractor_resnet = AutoImageProcessor.from_pretrained("resnet-cracks/checkpoint-11400")
    feature_extractor = feature_extractor_resnet
    model_resnet = ResNetForImageClassification.from_pretrained(
    "resnet-cracks",
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)},
    ignore_mismatched_sizes=True,
    )
    model = model_resnet

elif model_type == "convnext":
    feature_extractor_resnet = AutoImageProcessor.from_pretrained("resnet-cracks/checkpoint-11400")
    feature_extractor = feature_extractor_resnet
    model_convnext = ConvNextForImageClassification.from_pretrained(
    "convnext-cracks",
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)},
    ignore_mismatched_sizes=True,
    )
    model = model_convnext

model.eval()
# feature_extractor.model.eval()

ResNetForImageClassification(
  (resnet): ResNetModel(
    (embedder): ResNetEmbeddings(
      (embedder): ResNetConvLayer(
        (convolution): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activation): ReLU()
      )
      (pooler): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (encoder): ResNetEncoder(
      (stages): ModuleList(
        (0): ResNetStage(
          (layers): Sequential(
            (0): ResNetBottleNeckLayer(
              (shortcut): ResNetShortCut(
                (convolution): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (normalization): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (layer): Sequential(
                (0): ResNetConvLayer(
                  (convolution): Conv2d(64

In [20]:
prepared_ds = dataset.with_transform(lambda x: transform(x, feature_extractor))


In [15]:
# sample_test_nb = 200
# for sample_test_nb in range(1900, 2000):
#     model.zero_grad()
#     print(dataset["train"].features["label"].names[prepared_ds["train"][sample_test_nb]["labels"]])

#     input = prepared_ds["train"][sample_test_nb]["pixel_values"].unsqueeze(0)
#     input.requires_grad_()
#     output = model(input)
#     logits = output.logits
#     print(logits)
#     logits[0][1].backward()
#     saliency = input.grad.data.abs().max(dim=1)[0]
#     saliency = saliency.numpy()
#     saliency -= saliency.min()
#     saliency /= saliency.max()
#     saliency = np.uint8(saliency * 255)
#     plt.imshow(dataset["train"][sample_test_nb]["image"].resize((224, 224)))
#     plt.imshow(saliency[0], cmap=plt.cm.hot, alpha=0.5)
#     plt.show()

In [21]:
# Boilerplate imports.
import numpy as np
import PIL.Image
from matplotlib import pylab as P
import torch
from torchvision import models, transforms

# From our repository.
import saliency.core as saliency

%matplotlib inline

In [22]:
# Boilerplate methods.
def ShowImage(im, title='', ax=None):
    if ax is None:
        P.figure()
    P.axis('off')
    P.imshow(im)
    P.title(title)

def ShowGrayscaleImage(im,orig, title='', ax=None, alpha=0.3):
    if ax is None:
        P.figure()
    P.axis('off')
    P.imshow(orig)
    P.imshow(im, cmap=P.cm.gray, vmin=0, vmax=1, alpha=alpha)
    P.title(title)

def ShowHeatMap(im, title,orig, ax=None, alpha=0.3):
    if ax is None:
        P.figure()
    P.axis('off')
    P.imshow(orig)
    P.imshow(im, cmap='inferno', alpha=alpha)
    P.title(title)

def LoadImage(file_path):
    im = PIL.Image.open(file_path)
    im = im.resize((299, 299))
    im = np.asarray(im)
    return im

transformer = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
def PreprocessImages(images):
    # assumes input is 4-D, with range [0,255]
    #
    # torchvision have color channel as first dimension
    # with normalization relative to mean/std of ImageNet:
    #    https://pytorch.org/vision/stable/models.html
    images = np.array(images)
    images = images/255
    images = np.transpose(images, (0,3,1,2))
    images = torch.tensor(images, dtype=torch.float32)
    images = transformer.forward(images)
    return images.requires_grad_(True)

In [23]:
class_idx_str = 'class_idx_str'
def call_model_function(images, call_model_args=None, expected_keys=None):
    target_class_idx =  call_model_args[class_idx_str]
    images = np.transpose(images, (0,3,1,2))
    images = torch.tensor(images, dtype=torch.float32)
    images.requires_grad_(True)
    output = model(images).logits
    m = torch.nn.Softmax(dim=1)
    output = m(output)
    if saliency.base.INPUT_OUTPUT_GRADIENTS in expected_keys:
        outputs = output[:,target_class_idx]
        grads = torch.autograd.grad(outputs, images, grad_outputs=torch.ones_like(outputs))
        grads = torch.movedim(grads[0], 1, 3)
        gradients = grads.detach().numpy()
        return {saliency.base.INPUT_OUTPUT_GRADIENTS: gradients}
    
    print("Method not supported")
    return {}

In [None]:
sample_test_nb = 1630
batch_size = 4
for sample_test_nb in range(10000, 13000, 30):
    model.zero_grad()
    input = prepared_ds["train"][sample_test_nb]["pixel_values"].permute((1, 2, 0)).numpy()

    call_model_args = {class_idx_str: 1}

    # Construct the saliency object. This alone doesn't do anthing.
    gradient_saliency = saliency.GradientSaliency()

    # Compute the vanilla mask and the smoothed mask.
    # vanilla_mask_3d = gradient_saliency.GetMask(input, call_model_function, call_model_args)
    smoothgrad_mask_3d = gradient_saliency.GetSmoothedMask(input, call_model_function, call_model_args)

    # Call the visualization methods to convert the 3D tensors to 2D grayscale.
    # vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(vanilla_mask_3d)
    smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(smoothgrad_mask_3d)



    # Set up matplot lib figures.
    ROWS = 1
    COLS = 3
    UPSCALE_FACTOR = 20
    P.figure(figsize=(ROWS * UPSCALE_FACTOR, COLS * UPSCALE_FACTOR))

    # Render the saliency masks.
    image_orig = dataset["train"][sample_test_nb]["image"].resize((224, 224))
    # ShowHeatMap(vanilla_mask_grayscale, orig=image_orig, title='Vanilla Gradient', ax=P.subplot(ROWS, COLS, 1))
    ShowHeatMap(smoothgrad_mask_grayscale,orig=image_orig, title='SmoothGrad', ax=P.subplot(ROWS, COLS, 1))
    # Construct the saliency object. This alone doesn't do anthing.
    integrated_gradients = saliency.IntegratedGradients()

    # Baseline is a black image.
    baseline = np.zeros(input.shape)

    # Compute the vanilla mask and the smoothed mask.
    # vanilla_integrated_gradients_mask_3d = integrated_gradients.GetMask(
    # input, call_model_function, call_model_args, x_steps=25, x_baseline=baseline, batch_size=20)
    # Smoothed mask for integrated gradients will take a while since we are doing nsamples * nsamples computations.
    smoothgrad_integrated_gradients_mask_3d = integrated_gradients.GetSmoothedMask(
    input, call_model_function, call_model_args, x_steps=10, x_baseline=baseline, batch_size=20)

    # Call the visualization methods to convert the 3D tensors to 2D grayscale.
    # vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(vanilla_integrated_gradients_mask_3d)
    smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(smoothgrad_integrated_gradients_mask_3d)

    # Render the saliency masks.
    # ShowHeatMap(vanilla_mask_grayscale, orig=image_orig, title='Vanilla Integrated Gradients', ax=P.subplot(ROWS, COLS, 3))
    ShowHeatMap(smoothgrad_mask_grayscale, orig=image_orig, title='Smoothgrad Integrated Gradients', ax=P.subplot(ROWS, COLS, 2))
    ShowImage(image_orig, title='Original Image', ax=P.subplot(ROWS, COLS, 3))

    plt.show()

In [None]:
sample_test_nb = 1630
batch_size = 4
for sample_test_nb in range(1900, 2000):
    model.zero_grad()
    input = prepared_ds["train"][sample_test_nb]["pixel_values"].permute((1, 2, 0)).numpy()

    call_model_args = {class_idx_str: 1}

    # Construct the saliency object. This alone doesn't do anthing.
    gradient_saliency = saliency.GradientSaliency()

    # Compute the vanilla mask and the smoothed mask.
    # vanilla_mask_3d = gradient_saliency.GetMask(input, call_model_function, call_model_args)
    smoothgrad_mask_3d = gradient_saliency.GetSmoothedMask(input, call_model_function, call_model_args)

    # Call the visualization methods to convert the 3D tensors to 2D grayscale.
    # vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(vanilla_mask_3d)
    smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(smoothgrad_mask_3d)



    # Set up matplot lib figures.
    ROWS = 1
    COLS = 3
    UPSCALE_FACTOR = 20
    P.figure(figsize=(ROWS * UPSCALE_FACTOR, COLS * UPSCALE_FACTOR))

    # Render the saliency masks.
    image_orig = dataset["train"][sample_test_nb]["image"].resize((224, 224))
    # ShowHeatMap(vanilla_mask_grayscale, orig=image_orig, title='Vanilla Gradient', ax=P.subplot(ROWS, COLS, 1))
    ShowHeatMap(smoothgrad_mask_grayscale,orig=image_orig, title='SmoothGrad', ax=P.subplot(ROWS, COLS, 1))
    # Construct the saliency object. This alone doesn't do anthing.
    integrated_gradients = saliency.IntegratedGradients()

    # Baseline is a black image.
    baseline = np.zeros(input.shape)

    # Compute the vanilla mask and the smoothed mask.
    # vanilla_integrated_gradients_mask_3d = integrated_gradients.GetMask(
    # input, call_model_function, call_model_args, x_steps=25, x_baseline=baseline, batch_size=20)
    # Smoothed mask for integrated gradients will take a while since we are doing nsamples * nsamples computations.
    smoothgrad_integrated_gradients_mask_3d = integrated_gradients.GetSmoothedMask(
    input, call_model_function, call_model_args, x_steps=10, x_baseline=baseline, batch_size=20)

    # Call the visualization methods to convert the 3D tensors to 2D grayscale.
    # vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(vanilla_integrated_gradients_mask_3d)
    smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(smoothgrad_integrated_gradients_mask_3d)

    # Render the saliency masks.
    # ShowHeatMap(vanilla_mask_grayscale, orig=image_orig, title='Vanilla Integrated Gradients', ax=P.subplot(ROWS, COLS, 3))
    ShowHeatMap(smoothgrad_mask_grayscale, orig=image_orig, title='Smoothgrad Integrated Gradients', ax=P.subplot(ROWS, COLS, 2))
    ShowImage(image_orig, title='Original Image', ax=P.subplot(ROWS, COLS, 3))

    plt.show()