# Creating a CNN-based visual encoding finetuned with proprioception

We create an encoding for the robot starting from a pretrained CNN model. As the feature vector of this is still large (eg 512 * 7 * 7), we reduce this to the encoding with an MLP. 

We finetune the encoding with information from proprioception.  

The sensor processing object associated with the network trained like this is in sensorprocessing/sp_cnn.py

In [1]:
import sys
sys.path.append("..")
from settings import Config

import pathlib
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torchvision import models, transforms
#import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from behavior_cloning.demo_to_trainingdata import BCDemonstration
from sensorprocessing.sp_cnn import VGG19Regression, VGG19SensorProcessing
from robot.al5d_position_controller import RobotPosition

# Move data to GPU (if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# The experiment/run we are going to run: the specified model will be created
experiment = "sp_cnn"
run = "vgg19_00"
exp = Config().get_experiment(experiment, run)

Loading pointer config file: C:\Users\lboloni\.config\BerryPicker\mainsettings.yaml
Loading machine-specific config file: G:\My Drive\LotziStudy\Code\PackageTracking\BerryPicker\settings\settings-LotziYoga.yaml
Note: no system dependent config file G:\My Drive\LotziStudy\Code\PackageTracking\BerryPicker\settings\experiment-config\LotziYoga\sp_cnn\vgg19_00_sysdep.yaml,
 that is ok, proceeding.
Configuration for experiment: sp_cnn/vgg19_00 successfully loaded


### Create regression training data (image to proprioception)
The training data (X, Y) is all the pictures from a demonstration with the corresponding proprioception data. 

In [3]:
def load_images_as_proprioception_training(task, proprioception_input_file, proprioception_target_file):
    """Loads all the images of a task, and processes it as two tensors as input and target data for proprioception training. 
    Caches the processed results into the input and target file pointed in the config. Remove those files to recalculate
    """
    retval = {}
    if proprioception_input_file.exists():
        retval["inputs"] = torch.load(proprioception_input_file, weights_only=True)
        retval["targets"] = torch.load(proprioception_target_file, weights_only=True)
    else:
        demos_dir = pathlib.Path(Config()["demos"]["directory"])
        task_dir = pathlib.Path(demos_dir, "demos", task)
        
        inputlist = []
        targetlist = []

        for demo_dir in task_dir.iterdir():
            if not demo_dir.is_dir():
                pass
            bcd = BCDemonstration(demo_dir, sensorprocessor=None)
            for i in range(bcd.trim_from, bcd.trim_to):
                sensor_readings, _ = bcd.get_image(i)
                inputlist.append(sensor_readings[0])
                a = bcd.get_a(i)
                rp = RobotPosition.from_vector(a)
                anorm = rp.to_normalized_vector()        
                targetlist.append(torch.from_numpy(anorm))

        retval["inputs"] = torch.stack(inputlist)
        retval["targets"] = torch.stack(targetlist)
        torch.save(retval["inputs"], proprioception_input_file)
        torch.save(retval["targets"], proprioception_target_file)

    # Separate the training and validation data. 
    # We will be shuffling the demonstrations 
    length = retval["inputs"].size(0)
    rows = torch.randperm(length) 
    shuffled_inputs = retval["inputs"][rows]
    shuffled_targets = retval["targets"][rows]

    training_size = int( length * 0.67 )
    retval["inputs_training"] = shuffled_inputs[1:training_size]
    retval["targets_training"] = shuffled_targets[1:training_size]

    retval["inputs_validation"] = shuffled_inputs[training_size:]
    retval["targets_validation"] = shuffled_targets[training_size:]

    return retval

In [4]:
task = exp["proprioception_training_task"]
proprioception_input_file = pathlib.Path(
    exp["data_dir"], exp["proprioception_input_file"])
proprioception_target_file = pathlib.Path(
    exp["data_dir"], exp["proprioception_target_file"])

tr = load_images_as_proprioception_training(
    task, proprioception_input_file, proprioception_target_file)
inputs_training = tr["inputs_training"]
targets_training = tr["targets_training"]
inputs_validation = tr["inputs_validation"]
targets_validation = tr["targets_validation"]

### Create a model that performs proprioception regressio

FIXME: Should we move the loss into the model class?

In [5]:

hidden_size = exp["latent_dims"]
output_size = Config()["robot"]["action_space_size"]
if exp['model'] == 'VGG19Regression':
    model = VGG19Regression(hidden_size, output_size)
    if exp['loss'] == 'MSELoss':        
        criterion = nn.MSELoss()
    elif exp['loss'] == 'L1Loss':
        criterion = nn.L1Loss()
else:
    raise Exception(f"Unknown model {exp['model']}")

optimizer = optim.Adam(model.parameters(), lr=exp['learning_rate'])



In [6]:
# Create DataLoaders for batching
batch_size = exp['batch_size']
train_dataset = TensorDataset(inputs_training, targets_training)
test_dataset = TensorDataset(inputs_validation, targets_validation)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [7]:
def train_and_save_proprioception_model(model, criterion, optimizer, modelfile, device="cpu", epochs=20):
    """Trains and saves the proprioception model
    FIXME: must have parameters etc to investigate alternative models. 
    """

    model = model.to(device)
    criterion = criterion.to(device)
    # Training loop
    num_epochs = epochs
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X = batch_X.to(device)
            batch_y = batch_y.to(device)
            # Forward pass
            # print("batchX immediately after for loobatch_X.device" )
            predictions = model(batch_X)
            #print(batch_y.device)
            # I don't understand this  
            # print(batch_X.device)
            loss = criterion(predictions, batch_y)
            
            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        if (epoch + 1) % 1 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / len(train_loader):.4f}')

    # Evaluate the model
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            batch_X = batch_X.to(device)
            batch_y = batch_y.to(device)
            predictions = model(batch_X)
            loss = criterion(predictions, batch_y)
            test_loss += loss.item()

    test_loss /= len(test_loader)
    print(f'Test Loss: {test_loss:.4f}')
    torch.save(model.state_dict(), modelfile)

In [8]:
# modelfile = pathlib.Path(Config()["explorations"]["proprioception_mlp_model_file"])
modelfile = pathlib.Path(
    exp["data_dir"], exp["proprioception_mlp_model_file"])
epochs = exp["epochs"]
if modelfile.exists():
    model.load_state_dict(torch.load(modelfile))
else:
    train_and_save_proprioception_model(model, criterion, optimizer, modelfile, device=device, epochs=epochs)

  model.load_state_dict(torch.load(modelfile))


### Check the model for an encoding mode

Check whether we can load back the model

In [9]:
exp = Config().get_experiment(experiment, run)
hidden_size = exp["latent_dims"]
output_size = Config()["robot"]["action_space_size"]

if exp['model'] == 'VGG19Regression':
    enc = VGG19Regression(
        hidden_size=hidden_size, output_size=output_size)
else:
    raise Exception(f"Unknown model {exp['model']}")

modelfile = pathlib.Path(exp["data_dir"], 
                         exp["proprioception_mlp_model_file"])
assert modelfile.exists()
enc.load_state_dict(torch.load(modelfile))


Note: no system dependent config file G:\My Drive\LotziStudy\Code\PackageTracking\BerryPicker\settings\experiment-config\LotziYoga\sp_cnn\vgg19_00_sysdep.yaml,
 that is ok, proceeding.
Configuration for experiment: sp_cnn/vgg19_00 successfully loaded


  enc.load_state_dict(torch.load(modelfile))


<All keys matched successfully>

In [10]:
task = "random-uncluttered"
demos_dir = pathlib.Path(Config()["demos"]["directory"])
task_dir = pathlib.Path(demos_dir, "demos", task)
enc = enc.to(device)


inputlist = []
targetlist = []
with torch.no_grad():
    for demo_dir in task_dir.iterdir():
        if not demo_dir.is_dir():
            pass
        bcd = BCDemonstration(demo_dir, sensorprocessor=None)
        for i in range(bcd.trim_from, bcd.trim_to):
            sensor_readings, _ = bcd.get_image(i)
            z = enc.encode(sensor_readings)
            break

print(z)
print(z.shape)


Cameras found: ['dev2']
There are 596 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
Cameras found: ['dev2']
There are 388 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
Cameras found: ['dev2']
There are 547 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
Cameras found: ['dev2']
There are 523 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
tensor([[ 2.2591e+00, -3.1256e+00, -4.0274e+00, -1.4163e+00, -3.1300e+00,
         -6.4341e+00, -2.5449e-01, -3.3360e+00, -3.0946e+00, -1.8694e+00,
         -8.4439e+00, -1.4526e+00, -4.2678e+00, -3.3192e+00,  8.2174e-01,
         -2.4379e+00, -4.2174e+00,  2.1425e+00, -1.6212e+00, -3.7401e+00,
         -7.2628e-01,  3.1840e+00, -3.5117e+00,  2.2689e+00, -5.9462e+00,
         -2.6433e+00,  4.8190e+00,  1.0257e+00, -1.8991e+00, -1.8387e+00,
         -9.2060e-01, -5

### Show how to use an SensorProcessing package based on this approach

In [11]:
# Create the sensor processing package

exp = Config().get_experiment("sp_cnn", run)
sp = VGG19SensorProcessing(exp, device=device)

Note: no system dependent config file G:\My Drive\LotziStudy\Code\PackageTracking\BerryPicker\settings\experiment-config\LotziYoga\sp_cnn\vgg19_00_sysdep.yaml,
 that is ok, proceeding.
Configuration for experiment: sp_cnn/vgg19_00 successfully loaded


  self.enc.load_state_dict(torch.load(modelfile))


In [12]:
# Run the sensor processing package to process the images of a demonstration
task = "random-uncluttered"
demos_dir = pathlib.Path(Config()["demos"]["directory"])
task_dir = pathlib.Path(demos_dir, "demos", task)
with torch.no_grad():
    for demo_dir in task_dir.iterdir():
        if not demo_dir.is_dir():
            pass
        bcd = BCDemonstration(demo_dir, sensorprocessor=None)
        for i in range(bcd.trim_from, bcd.trim_to):
            sensor_readings, _ = bcd.get_image(i)
            sensor_readings = sensor_readings.to(device)
            z = sp.process(sensor_readings)
            break

print(z)
print(z.shape)

Cameras found: ['dev2']
There are 596 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
sensor readings shape torch.Size([1, 3, 256, 256])
Cameras found: ['dev2']
There are 388 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
sensor readings shape torch.Size([1, 3, 256, 256])
Cameras found: ['dev2']
There are 547 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
sensor readings shape torch.Size([1, 3, 256, 256])
Cameras found: ['dev2']
There are 523 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
sensor readings shape torch.Size([1, 3, 256, 256])
[ 2.25910211e+00 -3.12557912e+00 -4.02738857e+00 -1.41627145e+00
 -3.12995100e+00 -6.43405008e+00 -2.54491150e-01 -3.33602190e+00
 -3.09464884e+00 -1.86938953e+00 -8.44387341e+00 -1.45260823e+00
 -4.26778698e+00 -3.31915283e+00  8.21737051e-01 -2.43788624e+00
 -4.

In [13]:
# Run the sensor processing inside the BCDemonstration
task = "random-uncluttered"
demos_dir = pathlib.Path(Config()["demos"]["directory"])
task_dir = pathlib.Path(demos_dir, "demos", task)
with torch.no_grad():
    for demo_dir in task_dir.iterdir():
        if not demo_dir.is_dir():
            pass
        bcd = BCDemonstration(demo_dir, sensorprocessor=sp)
        for i in range(bcd.trim_from, bcd.trim_to):
            #sensor_readings, _ = bcd.get_image(i)
            #sensor_readings = sensor_readings.to(device)
            #z = sp.process(sensor_readings)
            z = bcd.get_z(i)
            break

print(z)
print(z.shape)

Cameras found: ['dev2']
There are 596 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
sensor readings shape torch.Size([1, 3, 256, 256])
Cameras found: ['dev2']
There are 388 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
sensor readings shape torch.Size([1, 3, 256, 256])
Cameras found: ['dev2']
There are 547 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
sensor readings shape torch.Size([1, 3, 256, 256])
Cameras found: ['dev2']
There are 523 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
sensor readings shape torch.Size([1, 3, 256, 256])
[ 2.25910211e+00 -3.12557912e+00 -4.02738857e+00 -1.41627145e+00
 -3.12995100e+00 -6.43405008e+00 -2.54491150e-01 -3.33602190e+00
 -3.09464884e+00 -1.86938953e+00 -8.44387341e+00 -1.45260823e+00
 -4.26778698e+00 -3.31915283e+00  8.21737051e-01 -2.43788624e+00
 -4.