In [1]:
from deepview import DeepView
import numpy as np

from torchvision import datasets, transforms
from models.torch_model import TorchModel
import models.resnet as resnet
import torch
import torch.nn.functional as F
import torch.nn as nn

import time

%load_ext autoreload
%autoreload 2
%matplotlib qt

In [2]:
batch_size = 32

## Load CIFAR10 and a torch model

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])
trainset = datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

model = resnet.resnet20()
weights = torch.load("models/pytorch_resnet_cifar10-master/pretrained_models/resnet20-12fca82f.th", 
                     map_location='cpu')

model = nn.DataParallel(model)
model.load_state_dict(weights['state_dict'])
model = model.module
model.eval()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
print('Loaded model:\t', model._get_name())
print("Best Test prec:\t", weights['best_prec1'])

Files already downloaded and verified
Loaded model:	 ResNet
Best Test prec:	 91.78000183105469


## Usage Instructions

 1. Create a wrapper funktion like ```pred_wrapper``` which receives a numpy array of samples and returns according class probabilities from the classifier as numpy arrays
 2. Initialize DeepView-object and pass the created method to the constructor
 3. Run your code and call ```add_samples(samples, labels)``` at any time to add samples to the visualization together with the ground truth labels.
    * The ground truth labels will be visualized along with the predicted labels
    * The object will keep track of a maximum number of samples specified by ```max_samples``` and it will throw away the oldest samples first
 4. Call the ```show``` method to render the plot

The following parameters must be specified on initialization:


| <p align="left">Variable    | <p align="left">Meaning             |
|----------------------|-------------------|
| <p align="left">```batch_size```    | <p align="left">Batch size to use when calling the classifier |
| <p align="left">```pred_wrapper```    | <p align="left">To enable DeepView to call the classifier |
| <p align="left">```max_samples```      | <p align="left">The maximum amount of samples that DeepView will keep track of |
| <p align="left">```img_size```         | <p align="left">Currently only images are supported as inputs, img size specifies width and height of the input samples |
| <p align="left">```img_channels```     | <p align="left">Number of image channels |
| <p align="left">```n```     | <p align="left">Number of interpolations for distance calculation of two images. |
| <p align="left">```lam```     | <p align="left">Weighting factor for the euclidian component of the distance calculation. |
| <p align="left">```resolution```       | <p align="left">x- and y- Resolution of the decision boundary plot |
| <p align="left">```cmap```             | <p align="left">Name of the colormap that should be used in the plots. |

In [4]:
def pred_wrapper(x):
    with torch.no_grad():
        x = np.array(x, dtype=np.float32)
        tensor = torch.from_numpy(x).to(device)
        pred = model(tensor).cpu().numpy()
    return pred

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# --- Deep View Parameters ----
batch_size = 32
max_samples = 500
img_size = (32, 32)
img_channels = 3
n = 10
lam = 0.0001
resolution = 100
cmap = 'tab10'


deepview = DeepView(pred_wrapper, classes, max_samples, batch_size, 
                    img_size, img_channels, n, lam, resolution, cmap)

In [5]:
n_samples = 100
sample_ids = np.random.choice(len(trainset), n_samples)
X = np.array([ trainset[i][0].cpu().numpy() for i in sample_ids ])
Y = np.array([ trainset[i][1] for i in sample_ids ])

t0 = time.time()
deepview.add_samples(X, Y)
deepview.show()

print('Time to calculate visualization for %d samples: %.2f sec' % (n_samples, time.time() - t0))

Distance calculation 20.00 %
Distance calculation 40.00 %
Distance calculation 60.00 %
Distance calculation 80.00 %
Distance calculation 100.00 %
Embedding samples ...


  warn("Using precomputed metric; transform will be unavailable for new data")


Computing decision regions ...
Time to calculate visualization for 100 samples: 31.20 sec


## Add new samples to the visualization

In [6]:
n_new = 100

sample_ids = np.random.choice(len(trainset), n_new)
X = np.array([ trainset[i][0].cpu().numpy() for i in sample_ids ])
Y = np.array([ trainset[i][1] for i in sample_ids ])

t0 = time.time()
deepview.add_samples(X, Y)
deepview.show()

print('Time to add %d samples to visualization: %.2f sec' % (n_samples, time.time() - t0))

Distance calculation 20.00 %
Distance calculation 40.00 %
Distance calculation 60.00 %
Distance calculation 80.00 %
Distance calculation 100.00 %
Embedding samples ...


  warn("Using precomputed metric; transform will be unavailable for new data")


Computing decision regions ...
Time to add  100 samples to visualization: 66.89 sec


## Compare performance

Adding samples may be a bit more time consuming, then just running DeepView on the desired amount of samples to be visualized. This is because the decision boundaries must be calculated twice with a similar time complexity. However, the step of adding 100 samples to 100 existing samples takes less time then computing it from scratch for 200 samples. This is because distances were already computed for half of the samples and can be reused.

| <p align="left">Szenario | Time |
| -------- | ---- |
| <p align="left">From scratch for 100 samples | 31.20 sec |
| <p align="left">Adding 100 samples (100 already added) | 66.89 sec |
| <p align="left">From scratch for 200 samples | 71.16 sec |
| <p align="left">200 samples when adding 100 samples in two steps | 98.19 sec |

> Choose the usage according to your needs.

In [7]:
deepview = DeepView(pred_wrapper, classes, max_samples, batch_size, 
                    img_size, img_channels, n, lam, resolution, cmap)

n_samples = 200
sample_ids = np.random.choice(len(trainset), n_samples)
X = np.array([ trainset[i][0].cpu().numpy() for i in sample_ids ])
Y = np.array([ trainset[i][1] for i in sample_ids ])

t0 = time.time()
deepview.add_samples(X, Y)
deepview.show()

print('Time to calculate visualization for %d samples: %.2f sec' % (n_samples, time.time() - t0))

Distance calculation 20.00 %
Distance calculation 40.00 %
Distance calculation 60.00 %
Distance calculation 80.00 %
Distance calculation 100.00 %
Embedding samples ...


  warn("Using precomputed metric; transform will be unavailable for new data")


Computing decision regions ...
Time to calculate visualization for 200 samples: 71.16 sec
