# Save padded image
Given an image path (`im_pth`), the code below will extract its file name (`file_name`) and generate the following images in the `transformed_images` directory:
* unpadded original image: `{file_name}_no_pad.png`
* padded image to fit into square dimensions: `{file_name}_pad.png`

In [1]:
import os
from PIL import Image

img_size = 2048
# im_pth = '/home/aisinai/data/mimic/valid/p10296197/s03/view1_frontal.jpg'  # change to your image path
im_pth = '/home/aisinai/data/mimic/valid/p10296197/s03/view2_lateral.jpg'  # change to your image path
base = os.path.basename(im_pth)
file_name = os.path.splitext(base)[0]

os.makedirs('transformed_images', exist_ok=True)

im = Image.open(im_pth)
im.save(f'transformed_images/{file_name}_no_pad.png')

old_size = im.size  # old_size[0] is in (width, height) format
ratio = float(img_size) / max(old_size)
new_size = tuple([int(x * ratio) for x in old_size])
im = im.resize(new_size, Image.ANTIALIAS)

# create a new image for padding and paste the resized on it
new_im = Image.new("RGB", (img_size, img_size))
new_im.paste(im, ((img_size - new_size[0]) // 2,
                  (img_size - new_size[1]) // 2))
new_im.save(f'transformed_images/{file_name}_pad.png')

# Save reconstructed images
There are 4 models, identified by their train runs. First three models have encoder output depth of 64.
* 0: Model A. 2 convolutions in the first / bottom layer | 1 convolution  in the second / top layer
* 1: Model B. 3 convolutions in the first / bottom layer | 2 convolutions in the second / top layer
* 3: Model C. 4 convolutions in the first / bottom layer | 2 convolutions in the second / top layer

Last model, Model D, has encoder output depth of 1.
* embed1: Model D. 2 convolutions in the first / bottom layer | 1 convolution in the second / top layer

It takes as the input the padded image in the `transformed_images` directory from the above code block and generate the following images in the `transformed_images` directory: 
* `{file_name}_original.png`
* Output from Model A: `{file_name}_recon_A.png`
* Output from Model B: `{file_name}_recon_B.png`
* Output from Model C: `{file_name}_recon_C.png`
* Output from Model D: `{file_name}_recon_D.png`

In [4]:
import torch
from torch import nn
from torch.autograd import Variable
from networks import VQVAE
from PIL import Image
from torchvision import transforms
from torchvision.utils import save_image
from utilities import rgb2gray

cuda = True if torch.cuda.is_available() else False
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
normalization = transforms.Normalize(mean=mean, std=std)
transform_array = [transforms.Resize(img_size), transforms.CenterCrop(img_size), transforms.ToTensor(), normalization]
transform = transforms.Compose(transform_array)

image = torch.zeros((1, 3, img_size, img_size))  # img_size from above
image[0, :] = transform(Image.open(f'transformed_images/{file_name}_pad.png'))  # file_name from above

mean = torch.FloatTensor([0.485, 0.456, 0.406]).reshape(3, 1, 1).type(Tensor)
std = torch.FloatTensor([0.229, 0.224, 0.225]).reshape(3, 1, 1).type(Tensor)

for model_name in ['A', 'B', 'C', 'D']:
    if model_name == 'A':
        # model_dir = path to {saved_model}.pt checkpoint file for model A
        model_dir = '/home/aisinai/work/VQ-VAE2/20200422/vq_vae/CheXpert/0/checkpoint/vqvae_040.pt'
        model = VQVAE(first_stride=4, second_stride=2).cuda() if cuda else VQVAE()
    elif model_name == 'B':
        # model_dir = path to {saved_model}.pt checkpoint file for model B
        model_dir = '/home/aisinai/work/VQ-VAE2/20200422/vq_vae/CheXpert/1/checkpoint/vqvae_040.pt'
        model = VQVAE(first_stride=8, second_stride=4).cuda() if cuda else VQVAE()
    elif model_name == 'C':
        # model_dir = path to {saved_model}.pt checkpoint file for model C
        model_dir = '/home/aisinai/work/VQ-VAE2/20200422/vq_vae/CheXpert/3/checkpoint/vqvae_040.pt'
        model = VQVAE(first_stride=16, second_stride=4).cuda() if cuda else VQVAE()
    elif model_name == 'D':
        # model_dir = path to {saved_model}.pt checkpoint file for model D
        model_dir = '/home/aisinai/work/VQ-VAE2/20200422/vq_vae/CheXpert/embed1/checkpoint/vqvae_040.pt'
        model = VQVAE(first_stride=4, second_stride=2, embed_dim=1).cuda() if cuda else VQVAE()

    model.load_state_dict(torch.load(model_dir))
    n_gpu = torch.cuda.device_count()
    if n_gpu > 1:
        device_ids = list(range(n_gpu))
        model = nn.DataParallel(model, device_ids=device_ids)
    model.eval()
    original_img = Variable(image.type(Tensor))

    with torch.no_grad():
        out, _ = model(original_img)
        decoded_img, _ = model(original_img)
        quant_t, quant_b, _, id_t, id_b = model.encode(original_img)
        upsample_t = model.upsample_t(quant_t)
        quant = torch.cat([upsample_t, quant_b], 1)

    original_img = original_img * std + mean
    out = out * std + mean
    save_image(original_img[0,:].data,
               f'transformed_images/{file_name}_original.png', 
               nrow=1, normalize=True, range=(0,1))
    save_image(out[0,:].data,
               f'transformed_images/{file_name}_recon_{model_name}.png',
               nrow=1, normalize=True, range=(0,1))

# Compute PSNR
Take two directories, one containing the original images and the other the reconstructed images, and compute PSNR

In [18]:
import os
from PIL import Image
import numpy as np
from math import log10, sqrt

orig_dir = '/home/aisinai/work/VQ-VAE2-Images/1024/frontal/original'
recon_dir = '/home/aisinai/work/VQ-VAE2-Images/1024/frontal'

def PSNR(original, compressed):
    mse = np.mean((original - compressed) ** 2)
    if(mse == 0):  # MSE is zero means no noise is present in the signal.
                   # Therefore PSNR have no importance.
        return 100
    max_pixel = 255.0
    psnr = 20 * log10(max_pixel / sqrt(mse))
    return psnr

PSNRs = []
for model in ['A', 'B', 'C']:
    for image in os.listdir(orig_dir):
        original = np.asarray(Image.open(f'{orig_dir}/{image}').convert('RGB'))
        recon = np.asarray(Image.open(f'{recon_dir}/{model}/{image}').convert('RGB'))
        PSNRs.append(PSNR(original, recon))
    print(f'Average PSNR value for model {model} is {np.average(PSNRs)} dB')

Average PSNR value for model A is 45.39284165046035 dB
Average PSNR value for model B is 44.05189311629433 dB
Average PSNR value for model C is 41.590470862564146 dB


# Save VQ-VAE-2 Models as ONNX

In [3]:
import torch
from networks import VQVAE

cuda = True if torch.cuda.is_available() else False
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

img_size = 1024
batch_size = 1
num_channel = 3  # 3 for RGB
dummy_image = torch.randn(batch_size, num_channel, img_size, img_size, requires_grad=True)

for model_name in ['A', 'B', 'C', 'D']:
    if model_name == 'A':
        # model_dir = path to {saved_model}.pt checkpoint file for model A
        model_dir = '/home/aisinai/work/VQ-VAE2/20200422/vq_vae/CheXpert/0/checkpoint/vqvae_040.pt'
        model = VQVAE(first_stride=4, second_stride=2)
    elif model_name == 'B':
        # model_dir = path to {saved_model}.pt checkpoint file for model B
        model_dir = '/home/aisinai/work/VQ-VAE2/20200422/vq_vae/CheXpert/1/checkpoint/vqvae_040.pt'
        model = VQVAE(first_stride=8, second_stride=4)
    elif model_name == 'C':
        # model_dir = path to {saved_model}.pt checkpoint file for model C
        model_dir = '/home/aisinai/work/VQ-VAE2/20200422/vq_vae/CheXpert/3/checkpoint/vqvae_040.pt'
        model = VQVAE(first_stride=16, second_stride=4)
    elif model_name == 'D':
        # model_dir = path to {saved_model}.pt checkpoint file for model D
        model_dir = '/home/aisinai/work/VQ-VAE2/20200422/vq_vae/CheXpert/embed1/checkpoint/vqvae_040.pt'
        model = VQVAE(first_stride=4, second_stride=2, embed_dim=1)

    model.load_state_dict(torch.load(model_dir))
    model.eval()
    out = model(dummy_image)
    torch.onnx.export(model, dummy_image, f'model_{model_name}.onnx')
    print(f'Model {model_name}: saved to model_{model_name}.onnx')

Model A: saved to model_A.onnx
Model B: saved to model_B.onnx
Model C: saved to model_C.onnx
Model D: saved to model_D.onnx


# Save DenseNet-121 Models as ONNX

In [4]:
import torch
from networks import Densenet121

cuda = True if torch.cuda.is_available() else False
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

img_size = 256
batch_size = 1

dummy_input = torch.randn(batch_size, num_channel, img_size, img_size, requires_grad=True)
n_classes = 14

for model_name in ['orig', 'recon', 'latent']:
    if model_name == 'orig':
        # model_dir = path to {saved_model}.pt checkpoint file for original image inputs
        model_dir = '/home/aisinai/work/VQ-VAE2/20200424/densenet121/orig/best_densenet_model.pt'
        model = Densenet121(n_classes=n_classes, input_type=model_name)
        model.model.load_state_dict(torch.load(model_dir))
        num_channel = 3  # 3 for RGB
        dummy_input = torch.randn(batch_size, num_channel, img_size, img_size, requires_grad=True)
    elif model_name == 'recon':
        # model_dir = path to {saved_model}.pt checkpoint file for reconstructed image inputs
        model_dir = '/home/aisinai/work/VQ-VAE2/20200424/densenet121/recon/best_densenet_model.pt'
        model = Densenet121(n_classes=n_classes, input_type=model_name)
        model.model.load_state_dict(torch.load(model_dir))
        num_channel = 3  # 3 for RGB
        dummy_input = torch.randn(batch_size, num_channel, img_size, img_size, requires_grad=True)
    elif model_name == 'latent':
        # model_dir = path to {saved_model}.pt checkpoint file for latent vector inputs
        model_dir = '/home/aisinai/work/VQ-VAE2/20200424/densenet121/latent/best_densenet_model.pt'
        model = Densenet121(n_classes=n_classes, input_type=model_name)
        num_channel = 2  # 2 for 2 latent vectors concatenated
        dummy_input = torch.randn(batch_size, num_channel, img_size, img_size, requires_grad=True)
        model.load_state_dict(torch.load(model_dir))

    model.eval()
    out = model(dummy_input)
    torch.onnx.export(model, dummy_input, f'densenet_{model_name}.onnx')
    print(f'Model {model_name}: saved to model_{model_name}.onnx')

Model orig: saved to model_orig.onnx
Model recon: saved to model_recon.onnx
Model latent: saved to model_latent.onnx


# Compute Number of FLOPs

In [9]:
from ptflops import get_model_complexity_info
from networks import Densenet121
from utilities import ChestXrayHDF5, compute_AUCs, save_loss_AUROC_plots

cuda = True if torch.cuda.is_available() else False
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

model_l = Densenet121(n_classes=14, input_type='latent')
macs_l, params_l = get_model_complexity_info(model_l, (2, 256, 256), as_strings=True,
                                             print_per_layer_stat=True, verbose=True)

model_o = Densenet121(n_classes=14, input_type='orig')
macs_o, params_o = get_model_complexity_info(model_o, (3, 1024, 1024), as_strings=True,
                                             print_per_layer_stat=True, verbose=True)


Densenet121(
  6.968 M, 100.000% Params, 3.764 GMac, 100.000% MACs, 
  (init_conv): ConvTranspose2d(0.0 M, 0.000% Params, 0.001 GMac, 0.016% MACs, 2, 3, kernel_size=(1, 1), stride=(1, 1))
  (model): DenseNet(
    6.968 M, 100.000% Params, 3.764 GMac, 99.984% MACs, 
    (features): Sequential(
      6.954 M, 99.794% Params, 3.764 GMac, 99.984% MACs, 
      (conv0): Conv2d(0.009 M, 0.135% Params, 0.154 GMac, 4.095% MACs, 3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (norm0): BatchNorm2d(0.0 M, 0.002% Params, 0.002 GMac, 0.056% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu0): ReLU(0.0 M, 0.000% Params, 0.001 GMac, 0.028% MACs, inplace=True)
      (pool0): MaxPool2d(0.0 M, 0.000% Params, 0.001 GMac, 0.028% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (denseblock1): _DenseBlock(
        0.335 M, 4.808% Params, 1.379 GMac, 36.634% MACs, 
        (denselayer1): _DenseLayer(
          0.045 M,

Densenet121(
  6.968 M, 100.000% Params, 60.219 GMac, 100.000% MACs, 
  (init_conv): ConvTranspose2d(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, 2, 3, kernel_size=(1, 1), stride=(1, 1))
  (model): DenseNet(
    6.968 M, 100.000% Params, 60.219 GMac, 100.000% MACs, 
    (features): Sequential(
      6.954 M, 99.794% Params, 60.219 GMac, 100.000% MACs, 
      (conv0): Conv2d(0.009 M, 0.135% Params, 2.466 GMac, 4.095% MACs, 3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (norm0): BatchNorm2d(0.0 M, 0.002% Params, 0.034 GMac, 0.056% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu0): ReLU(0.0 M, 0.000% Params, 0.017 GMac, 0.028% MACs, inplace=True)
      (pool0): MaxPool2d(0.0 M, 0.000% Params, 0.017 GMac, 0.028% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (denseblock1): _DenseBlock(
        0.335 M, 4.808% Params, 22.064 GMac, 36.640% MACs, 
        (denselayer1): _DenseLayer(
          0.04

In [10]:
print(macs_l,params_l)
print(macs_o,params_o)

3.76 GMac 6.97 M
60.22 GMac 6.97 M
