# Train an LSTM based controller 

Train and save an LSTM-based controller. It contains:
* Code for loading and pre-processing the training data. 
* Training an LSTM with specific parameters and saving it

In [1]:
import sys
sys.path.append("..")
from settings import Config

import pathlib
#from pprint import pformat
from tqdm import tqdm
import numpy as np

#import matplotlib.pyplot as plt

import torch
import torch.nn as nn
#import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

from sensorprocessing import sp_conv_vae
from demo_to_trainingdata import create_RNN_training_sequence_xy, BCDemonstration
from bc_LSTM import LSTMXYPredictor, LSTMResidualController
from robot.al5d_position_controller import RobotPosition

from tensorboardX import SummaryWriter


Loading pointer config file: /home/ssheikholeslami/.config/BerryPicker/mainsettings.yaml
Loading machine-specific config file: /home/ssheikholeslami/SaharaBerryPickerData/settings-sahara.yaml


### Creating training and validation data
Create training and validation data from all the demonstrations of a certain task.

In [6]:

def create_bc_training_and_validation(task):
    conv_vae_jsonfile = pathlib.Path(Config()["controller"]["vae_json"])
    conv_vae_model_pthfile = pathlib.Path(Config()["controller"]["vae_model"])


    sp = sp_conv_vae.ConvVaeSensorProcessing(conv_vae_jsonfile,
                                            conv_vae_model_pthfile)

    demos_dir = pathlib.Path(Config()["demos"]["directory"])
    task_dir = pathlib.Path(demos_dir, "demos", task)

    inputlist = []
    targetlist = []

    for demo_dir in task_dir.iterdir():
        if not demo_dir.is_dir():
            pass
        bcd = BCDemonstration(demo_dir, sensorprocessor=sp)
        print(bcd)
        z, a = bcd.read_z_a()
        # normalize the actions
        print(z.shape)
        print(a.shape)
        anorm = np.zeros(a.shape, np.float32)
        for i in range(a.shape[0]):
            rp = RobotPosition.from_vector(a[i])
            anorm[i,:] = rp.to_normalized_vector()

        # FIXME the repeated name for inputs and targets
        inputs, targets = create_RNN_training_sequence_xy(z, anorm, sequence_length=10)
        inputlist.append(inputs)
        targetlist.append(targets)

    inputs = torch.cat(inputlist)
    targets = torch.cat(targetlist)

    # Separate the training and validation data.
    # We will be shuffling the demonstrations
    rows = torch.randperm(inputs.size(0))
    shuffled_inputs = inputs[rows]
    shuffled_targets = targets[rows]

    training_size = int( inputs.size(0) * 0.67 )
    inputs_training = shuffled_inputs[1:training_size]
    targets_training = shuffled_targets[1:training_size]

    inputs_validation = shuffled_inputs[training_size:]
    targets_validation = shuffled_targets[training_size:]
    return inputs_training, targets_training, input_validation, targets_validation

In [7]:
def validate_behavior_cloning(model, criterion, inputs_validation, targets_validation):
    """Calculates the validation error for the behavior cloning model using pairs of input strings (of the specific length) and single output target strings.
    The model is reset before each of the strings (i.e. state is not transferred)
    model: an LSTM or similar model that can consume a sequence of inputs
    """
    num_sequences = inputs_validation.shape[0]
    model.eval()
    val_loss = 0
    with torch.no_grad():  # Disable gradient computation
        for i in range(num_sequences):
            # Forward pass
            input_seq = inputs_validation[i]
            target = targets_validation[i]
            # Reshape for batch compatibility
            input_seq = input_seq.unsqueeze(0)  # Shape: [1, sequence_length, latent_size]
            target = target.unsqueeze(0)        # Shape: [1, latent_size]
            outputs = model(input_seq)
            loss = criterion(outputs, target)
            # Accumulate loss
            val_loss += loss.item()
    avg_loss = val_loss / num_sequences
    return avg_loss

def train_behavior_cloning(model, optimizer, criterion, inputs_training, targets_training, inputs_validation, targets_validation, num_epochs, writer = None):
    """Train a behavior cloning model of the LSTM class."""
    num_sequences = inputs_training.shape[0]

    for epoch in tqdm(range(num_epochs)):
        model.train()

        # Loop over each sequence in the batch
        training_loss = 0
        for i in range(num_sequences):
            # Prepare input and target
            input_seq = inputs_training[i]
            target = targets_training[i]

            # Reshape for batch compatibility
            input_seq = input_seq.unsqueeze(0)  # Shape: [1, sequence_length, latent_size]
            target = target.unsqueeze(0)        # Shape: [1, latent_size]

            # Forward pass
            output = model(input_seq)
            loss = criterion(output, target)
            training_loss += loss.item()
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        avg_training_loss = training_loss / num_sequences
        avg_validation_loss = validate_behavior_cloning(model, criterion, inputs_validation=inputs_validation, targets_validation=targets_validation)
        if writer is not None:
            writer.add_scalar("TrainingLoss", avg_training_loss, epoch)
            writer.add_scalar("ValidationLoss", avg_validation_loss, epoch)
            writer.flush()


        if (epoch+1) % 2 == 0: # was 0
            print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_training_loss:.4f} Validation Loss: {avg_validation_loss:.4f} ')




# Train the LSTMXYPredictor model 

Trains the single layer LSTM model LSTMXYPredictor. This is a baseline LSTM model. 

Training notes:
* On the proprioception experiments, this reaches the performance:
    Epoch [20/100], Training Loss: 0.0079 Validation Loss: 0.0080
* No further improvement is observed from there. 

In [8]:
# Original
latent_size = Config()["robot"]["latent_encoding_size"]
output_size = 6  # degrees of freedom in the robot
num_layers = 2
hidden_size = 32  #

# Instantiate model, loss function, and optimizer
model = LSTMXYPredictor(latent_size=latent_size, hidden_size=hidden_size, output_size = output_size, num_layers=num_layers)

task = "proprioception-uncluttered"
inputs_training, targets_training, inputs_validation, targets_validation = create_bc_training_and_validation(task)


criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 100

# Create a SummaryWriter instance
# where does the logdir go???
writer = SummaryWriter(logdir="/home/lboloni/runs/example")
train_behavior_cloning(
    model, optimizer, criterion,
    inputs_training=inputs_training,
    targets_training=targets_training,
    inputs_validation=inputs_validation,
    targets_validation=targets_validation,
    num_epochs=num_epochs, writer=writer)
print("Training complete.")
writer.close()


KeyError: 'controller'

In [9]:

# FIXME: save the model
filename_lstm = Config()["controller"]["lstm_model_file"]
torch.save(model.state_dict(), filename_lstm)

KeyError: 'controller'

# Load the behavior cloning controller and use it with a real time data

In [None]:
# Original
latent_size = Config()["robot"]["latent_encoding_size"]
hidden_size = 32  # degrees of freedom in the robot
output_size = 6  # degrees of freedom in the robot
num_layers = 2

# Instantiate model, loss function, and optimizer
model = LSTMXYPredictor(latent_size=latent_size, hidden_size=hidden_size, output_size = output_size, num_layers=num_layers)
criterion = nn.MSELoss()  # Mean Squared Error for regression
filename_lstm = Config()["controller"]["lstm_model_file"]
model.load_state_dict(torch.load(filename_lstm))

In [None]:
# Get one demonstration
task = "proprioception-uncluttered"
sp = sp_conv_vae.ConvVaeSensorProcessing()

demos_dir = pathlib.Path(Config()["demos"]["directory"])
task_dir = pathlib.Path(demos_dir, "demos", task)

inputlist = []
targetlist = []

demo_dir = next(task_dir.iterdir())
bcd = BCDemonstration(demo_dir, sensorprocessor=sp)
z, a = bcd.read_z_a()

In [None]:
z.shape[0]
print(a[1])

In [None]:
for i in range(z.shape[0]-1):
    input = torch.from_numpy(z[i])
    input = input.unsqueeze(0)
    input = input.unsqueeze(0)
    print(input)
    a_pred = model.forward_keep_state(input)
    a_real = a[i+1]
    print(f"a_real: {a_real}\na_pred: {a_pred}")