# Interactively validate a Convolutional Variational Autoencoder

The code in this nodebook is using a pre-trained Conv-VAE package to load a pre-trained model and experiment with the pre-trained encoded and decoder. 

In [1]:
import sys
sys.path.append("..")

from settings import Config
sys.path.append(Config().values["conv_vae"]["code_dir"])

# print(Config().values)

import argparse
import json
import re
import numpy as np
import torch
from tqdm import tqdm

# these imports are from the Conv-VAE package
import data_loader.data_loaders as module_data
import model.loss as module_loss
import model.metric as module_metric
import model.model as module_arch
from parse_config import ConfigParser
from torch.nn import functional as F
import torchvision.utils as vutils
from torchvision import transforms
from torch.autograd import Variable
import os
import matplotlib.pyplot as plt
import argparse
import socket

from mpl_toolkits.axes_grid1 import ImageGrid
#Fixes PosixPath Error
import pathlib

# Oh, this hack was fixing something, but for me it is the other way around
#temp = pathlib.PosixPath
#pathlib.PosixPath = pathlib.WindowsPath

Proceeding to load config file: G:\My Drive\LotziStudy\Code\PackageTracking\VisionBasedRobotManipulator\settings\settings-LotziYoga.yaml


In [None]:
# As the code is highly dependent on the command line, emulating it here
args = argparse.ArgumentParser(description='PyTorch Template')
args.add_argument('-c', '--config', default=None, type=str,
                     help='config file path (default: None)')
args.add_argument('-r', '--resume', default=None, type=str,
                      help='path to latest checkpoint (default: None)')
args.add_argument('-d', '--device', default=None, type=str,
                      help='indices of GPUs to enable (default: all)')

model_path = pathlib.Path(Config().values["conv_vae"]["model_dir"])
model_path = pathlib.Path(model_path, "models", Config().values["conv_vae"]["model_type"])
latest = latest_training_run(model_path)
print(latest)
model_path = pathlib.Path(model_path, latest)
model = latest_model(model_path)

print(model_path)
# how to pick the latest file
print(model)  # Output: 87



In [None]:

#model_path = pathlib.Path(VAE_MODEL_DIR, VAE_MODEL_FILE).resolve()

value = ["this-script", f"-c{VAE_CONFIG}", f"-r{model_path}"]

# we are changing the parameters from here, to avoid changing the github 
# downloaded package
sys.argv = value
config = ConfigParser.from_args(args)
print(config)

In [None]:
logger = config.get_logger('test')

# setup data_loader instances
# LOTZI: this is a specific data loader, which with the current implementation, it is very specific to the celeba. It needs to be changed...

# FIXME: Initially I changed this data loader to something else and added into the data_loader a new one called RobotDataLoader. 
# But I should not change the github downloaded package. 
# So for the time being, I changed it back to CelebDataLoader, with the 
# awareness that I can specify something in config and update it correspondingly here 

data_loader = getattr(module_data, config['data_loader']['type'])(
    config['data_loader']['args']['data_dir'],
    batch_size=36,
    shuffle=False,
    validation_split=0.0,
    # training=False,
    num_workers=2
)

# LOTZI: this is an uninitialized model architecture
# build model architecture
model = config.init_obj('arch', module_arch)
logger.info(model)

# get function handles of loss and metrics
loss_fn = getattr(module_loss, config['loss'])
# metric_fns = [getattr(module_metric, met) for met in config['metrics']]



In [None]:
# logger.info('Loading checkpoint: {} ...'.format(config.resume))
# checkpoint = torch.load(config.resume)

# loading on CPU-only machine
print("Loading the checkpoint")
checkpoint = torch.load(config.resume, map_location=torch.device('cpu'))
print("Checkpoint loaded")

state_dict = checkpoint['state_dict']
if config['n_gpu'] > 1:
    model = torch.nn.DataParallel(model)
model.load_state_dict(state_dict)

# prepare model for testing
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
model.eval()


In [None]:
print(model)
print(model.encoder)
print(f"latent_dim {model.latent_dim}")
# print(model.hidden_dims)

## Pass one picture through the complete autoencoder

Pass one specific picture through the complete autoencoder. Compare the input and the output. This is basically trying out whether the VAE had captured the picture sufficiently.

This code also is intended as a sample of how to use the pre-trained model, how to feed it new data without the training code. 

In [70]:
from PIL import Image
import torchvision.transforms as transforms


# picture_name = '../../localdata/img/Rafael.jpg'
# picture_name = '../../localdata/img/indian-man.jpeg'
# picture_name = '../../localdata/img/00029_dev2.jpg'

# This is just a randomly chosen image from the training data. 

directory = pathlib.Path(VAE_TRAININGDATA_DIR, "Class-Folder")

files = list(directory.glob('*.jpg'))
picture_name = pathlib.Path(directory, files[11])


In [None]:


#"ID_0046_AGE_0072_CONTRAST_1_CT.jpg")
# Load an image using PIL
image = Image.open(picture_name)

plt.imshow(image)

print(image.mode)
# at least for the medical image, this is in 16 bit unsigned integer
# 
image = image.convert("RGB")

plt.imshow(image)

# Define a transform pipeline (e.g., converting to tensor)
# transform = transforms.ToTensor()
image_size = 64

transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
])


# Apply the transform
image_tensor = transform(image)

# Display some information about the image tensor
print(image_tensor.shape)  # e.g., torch.Size([3, H, W])

# Convert the tensor to a format suitable for matplotlib (from [C, H, W] to [H, W, C])
image_tensor_for_pic = image_tensor.permute(1, 2, 0)
#plt.imshow(image_tensor_for_pic)


In [None]:

# Add a batch dimension: shape becomes [1, 3, 224, 224]
image_tensor_2 = image_tensor.unsqueeze(0)

# Move tensor to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
image_tensor_3 = image_tensor_2.to(device)

# Running the input on the output
output, mu, logvar = model(image_tensor_3)

# Output: the visual reconstruction
#print(output)
output_for_pic = output[0].cpu().permute(1, 2, 0).detach().numpy()
plt.imshow(output_for_pic)

# mu: the encoding, I think
print(f"mu = {mu}")

# log_var: the log-var values for this input 
print(f"logvar = {logvar}")

# initial and new
fig, axs = plt.subplots(1, 2, figsize=(10, 5))

axs[0].imshow(image_tensor_for_pic)
axs[1].imshow(output_for_pic)


In [None]:

# Try to do a decoding from the same mu
z2 = model.reparameterize(mu, logvar)

for i in range(512):
    z2[0][i] = 

#z2[0][1] = 2.0
#z2[0][3] = 2.0
output2 = model.decode(z2)
output_for_pic2 = output2[0].cpu().permute(1, 2, 0).detach().numpy()

# initial and new
fig, axs = plt.subplots(1, 2, figsize=(10, 5))

axs[0].imshow(output_for_pic)
axs[1].imshow(output_for_pic2)


# Generating random samples from

In [None]:
import random

# z2 = model.reparameterize(mu, logvar)
# initial and new
fig, axs = plt.subplots(5, 5, figsize=(10, 5))
for x in range(0, 5):
    for y in range(0, 5):
        for i in range(512):
            z2 = model.reparameterize(mu, logvar)
            z2[0][i] = random.uniform(-0.5, 0.5)
            output2 = model.decode(z2)
            output_for_pic2 = output2[0].cpu().permute(1, 2, 0).detach().numpy()
            axs[x][y].imshow(output_for_pic2)
#axs[0].imshow(output_for_pic)
#axs[1].imshow(output_for_pic2)