# Learn a Convolutional Variational Autoencoder 

The code in this notebook is using the Conv-VAE package to learn a convolutional variational autoencoder on training data collected from the robot. This is put into a notebook, as it needs to be run only once.

The result of running this code is the creation of a "final model". 

The encoder portion of this model is the component that can be used for the encoder of the robot. 

In [6]:
import sys
sys.path.append("..")
import settings
sys.path.append(settings.CONV_VAE_DIR)

import argparse
import numpy as np
import torch
from tqdm import tqdm

# these imports are from the Conv-VAE-Torch package
import data_loader.data_loaders as module_data
import model.loss as module_loss
import model.metric as module_metric
import model.model as module_arch
from parse_config import ConfigParser
from trainer import Trainer
from utils import prepare_device
# end of Conv-VAE-Torch imports

from torch.nn import functional as F
import torchvision.utils as vutils
from torchvision import transforms
from torch.autograd import Variable
import os
import matplotlib.pyplot as plt
import argparse
import socket

from mpl_toolkits.axes_grid1 import ImageGrid
#Fixes PosixPath Error
import pathlib

# Oh, this hack was fixing something, but for me it is the other way around
#temp = pathlib.PosixPath
#pathlib.PosixPath = pathlib.WindowsPath

## Setting up the directories for training data and models

The training data for this model is a collection of pictures that should be characteristic of the type of jobs the robot will perform. With the current data-loader (which is the one used in the code sample for training on the Celeb database, but it is quite generic), the all the training data should be in individual files and put in a single directory. 

The directory should be a subdirectory named "Class-Folder" of the directory indicated by VAE_TRAININGDATA_DIR

### IMPORTANT NOTE
In the following code, I am creating Python variables for some of these directories, but the way in which the Conv-VAE-Torch code is working, it is actually taking them from the json configuration file, so the values will need to be replicated there as well. 

In [2]:

machine_name = socket.gethostname()
print("The name of the current machine is:" + machine_name)


if (machine_name == "LotziYoga"):
    VAE_CONFIG = "config-robot-lotziyoga.json"
    VAE_MODEL_DIR = "." # FIXME, specify it
    VAE_TRAININGDATA_DIR = "." # FIXME, specify it

if (machine_name == "tredy2"):
    VAE_CONFIG = "config-robot-tredy2.json"
    VAE_MODEL_DIR = "/home/lboloni/Documents/Hackingwork/__Temporary/VisionBasedRobotManipulator-models/Conv-VAE/"
    VAE_MODEL_FILE = "0817_155635/checkpoint-epoch20.pth"
    VAE_TRAININGDATA_DIR = "/home/lboloni/Documents/Hackingwork/__Temporary/VisionBasedRobotManipulator-training-data/vae-training-data" 
    


The name of the current machine is:tredy2


## Training.

Most of the parameters of the training process are set up in the config file.

In [3]:
# As the code is highly dependent on the command line, emulating it here
args = argparse.ArgumentParser(description='PyTorch Template')
args.add_argument('-c', '--config', default=None, type=str,
                     help='config file path (default: None)')
args.add_argument('-r', '--resume', default=None, type=str,
                      help='path to latest checkpoint (default: None)')
args.add_argument('-d', '--device', default=None, type=str,
                      help='indices of GPUs to enable (default: all)')

value = ["this-script", f"-c{VAE_CONFIG}"]
# we are changing the parameters from here, to avoid changing the github 
# downloaded package
sys.argv = value
config = ConfigParser.from_args(args)
print(config)

<parse_config.ConfigParser object at 0x74dd3426f520>


In [4]:
def train(config):
    logger = config.get_logger('train')

    # setup data_loader instances
    data_loader = config.init_obj('data_loader', module_data)
    valid_data_loader = data_loader.split_validation()

    # build model architecture, then print to console
    model = config.init_obj('arch', module_arch)
    logger.info(model)

    # prepare for (multi-device) GPU training
    device, device_ids = prepare_device(config['n_gpu'])
    model = model.to(device)
    if len(device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=device_ids)

    # get function handles of loss and metrics
    criterion = getattr(module_loss, config['loss'])
    # metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.init_obj('optimizer', torch.optim, trainable_params)
    lr_scheduler = config.init_obj(
        'lr_scheduler', torch.optim.lr_scheduler, optimizer)

    trainer = Trainer(model, criterion, optimizer,
                      config=config,
                      device=device,
                      data_loader=data_loader,
                      valid_data_loader=None,
                      lr_scheduler=lr_scheduler)

    trainer.train()

In [7]:
train(config)

INFO:train:VanillaVAE(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (2): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (3): Sequential(
      (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )


KeyboardInterrupt: 