# Train an LSTM based controller 

Train and save an LSTM-based controller. It contains:
* Code for loading and pre-processing the training data. 
* Training an LSTM with specific parameters and saving it

In [1]:
import sys
sys.path.append("..")
from settings import Config

import pathlib
#from pprint import pformat
from tqdm import tqdm
import numpy as np

#import matplotlib.pyplot as plt

import torch
import torch.nn as nn
#import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

from sensorprocessing import sp_conv_vae
from demo_to_trainingdata import create_RNN_training_sequence_xy, BCDemonstration
from bc_LSTM import LSTMXYPredictor, LSTMResidualController
from robot.al5d_position_controller import RobotPosition

from tensorboardX import SummaryWriter


Loading pointer config file: C:\Users\lboloni\.config\BerryPicker\mainsettings.yaml
Loading machine-specific config file: G:\My Drive\LotziStudy\Code\PackageTracking\BerryPicker\settings\settings-LotziYoga.yaml


### Creating training and validation data
Create training and validation data from all the demonstrations of a certain task.

In [6]:

def create_bc_training_and_validation(task):
    conv_vae_jsonfile = pathlib.Path(Config()["controller"]["vae_json"])
    conv_vae_model_pthfile = pathlib.Path(Config()["controller"]["vae_model"])


    sp = sp_conv_vae.ConvVaeSensorProcessing(conv_vae_jsonfile,
                                            conv_vae_model_pthfile)

    demos_dir = pathlib.Path(Config()["demos"]["directory"])
    task_dir = pathlib.Path(demos_dir, "demos", task)

    inputlist = []
    targetlist = []

    for demo_dir in task_dir.iterdir():
        if not demo_dir.is_dir():
            pass
        bcd = BCDemonstration(demo_dir, sensorprocessor=sp)
        print(bcd)
        z, a = bcd.read_z_a()
        # normalize the actions
        print(z.shape)
        print(a.shape)
        anorm = np.zeros(a.shape, np.float32)
        for i in range(a.shape[0]):
            rp = RobotPosition.from_vector(a[i])
            anorm[i,:] = rp.to_normalized_vector()
        
        # FIXME the repeated name for inputs and targets
        inputs, targets = create_RNN_training_sequence_xy(z, anorm, sequence_length=10)
        inputlist.append(inputs)
        targetlist.append(targets)

    inputs = torch.cat(inputlist)
    targets = torch.cat(targetlist)

    # Separate the training and validation data. 
    # We will be shuffling the demonstrations 
    rows = torch.randperm(inputs.size(0)) 
    shuffled_inputs = inputs[rows]
    shuffled_targets = targets[rows]

    training_size = int( inputs.size(0) * 0.67 )
    inputs_training = shuffled_inputs[1:training_size]
    targets_training = shuffled_targets[1:training_size]

    inputs_validation = shuffled_inputs[training_size:]
    targets_validation = shuffled_targets[training_size:] 
    return inputs_training, targets_training, input_validation, targets_validation

Cameras found: ['dev2']
There are 744 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
{'actiontype': 'rc-position-target',
 'camera': 'dev2',
 'cameras': ['dev2'],
 'maxsteps': 744,
 'sensorprocessor': <sensorprocessing.sp_conv_vae.ConvVaeSensorProcessing object at 0x0000018CFDB7BF50>,
 'source_dir': WindowsPath('C:/Users/lboloni/Documents/Code/_TempData/BerryPicker-demos/demos/proprioception-uncluttered/2024_10_26__16_18_47'),
 'trim_from': 1,
 'trim_to': 744}
(743, 128)
(743, 6)
Cameras found: ['dev2']
There are 968 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
{'actiontype': 'rc-position-target',
 'camera': 'dev2',
 'cameras': ['dev2'],
 'maxsteps': 968,
 'sensorprocessor': <sensorprocessing.sp_conv_vae.ConvVaeSensorProcessing object at 0x0000018CFDB7BF50>,
 'source_dir': WindowsPath('C:/Users/lboloni/Documents/Code/_TempData/BerryPicker-demos/demos/proprioception-uncluttered/2024_10_26__

In [7]:
def validate_behavior_cloning(model, criterion, inputs_validation, targets_validation):
    """Calculates the validation error for the behavior cloning model using pairs of input strings (of the specific length) and single output target strings. 
    The model is reset before each of the strings (i.e. state is not transferred)
    model: an LSTM or similar model that can consume a sequence of inputs    
    """
    num_sequences = inputs_validation.shape[0]
    model.eval()
    val_loss = 0
    with torch.no_grad():  # Disable gradient computation
        for i in range(num_sequences):
            # Forward pass
            input_seq = inputs_validation[i]
            target = targets_validation[i]
            # Reshape for batch compatibility
            input_seq = input_seq.unsqueeze(0)  # Shape: [1, sequence_length, latent_size]
            target = target.unsqueeze(0)        # Shape: [1, latent_size]
            outputs = model(input_seq)
            loss = criterion(outputs, target)
            # Accumulate loss
            val_loss += loss.item()
    avg_loss = val_loss / num_sequences
    return avg_loss

def train_behavior_cloning(model, optimizer, criterion, inputs_training, targets_training, inputs_validation, targets_validation, num_epochs, writer = None):
    """Train a behavior cloning model of the LSTM class."""
    num_sequences = inputs_training.shape[0]

    for epoch in tqdm(range(num_epochs)):
        model.train()
        
        # Loop over each sequence in the batch
        training_loss = 0
        for i in range(num_sequences):
            # Prepare input and target
            input_seq = inputs_training[i]
            target = targets_training[i]

            # Reshape for batch compatibility
            input_seq = input_seq.unsqueeze(0)  # Shape: [1, sequence_length, latent_size]
            target = target.unsqueeze(0)        # Shape: [1, latent_size]

            # Forward pass
            output = model(input_seq)
            loss = criterion(output, target)
            training_loss += loss.item()
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        avg_training_loss = training_loss / num_sequences
        avg_validation_loss = validate_behavior_cloning(model, criterion, inputs_validation=inputs_validation, targets_validation=targets_validation)
        if writer is not None:
            writer.add_scalar("TrainingLoss", avg_training_loss, epoch)
            writer.add_scalar("ValidationLoss", avg_validation_loss, epoch)
            writer.flush()


        if (epoch+1) % 2 == 0: # was 0
            print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_training_loss:.4f} Validation Loss: {avg_validation_loss:.4f} ')




# Train the LSTMXYPredictor model 

Trains the single layer LSTM model LSTMXYPredictor. This is a baseline LSTM model. 

Training notes:
* On the proprioception experiments, this reaches the performance:
    Epoch [20/100], Training Loss: 0.0079 Validation Loss: 0.0080
* No further improvement is observed from there. 

In [8]:
# Original
latent_size = Config()["robot"]["latent_encoding_size"]  
output_size = 6  # degrees of freedom in the robot
num_layers = 2
hidden_size = 32  # 

# Instantiate model, loss function, and optimizer
model = LSTMXYPredictor(latent_size=latent_size, hidden_size=hidden_size, output_size = output_size, num_layers=num_layers)

task = "proprioception-uncluttered"
inputs_training, targets_training, inputs_validation, targets_validation = create_bc_training_and_validation(task)


criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 100

# Create a SummaryWriter instance
# where does the logdir go???
writer = SummaryWriter(logdir="/home/lboloni/runs/example")
train_behavior_cloning(
    model, optimizer, criterion,
    inputs_training=inputs_training, 
    targets_training=targets_training, 
    inputs_validation=inputs_validation,
    targets_validation=targets_validation,
    num_epochs=num_epochs, writer=writer)
print("Training complete.")
writer.close()


  2%|▏         | 2/100 [00:15<12:47,  7.84s/it]

Epoch [2/100], Training Loss: 0.0117 Validation Loss: 0.0115 


  4%|▍         | 4/100 [00:31<12:38,  7.90s/it]

Epoch [4/100], Training Loss: 0.0108 Validation Loss: 0.0105 


  6%|▌         | 6/100 [00:47<12:25,  7.93s/it]

Epoch [6/100], Training Loss: 0.0103 Validation Loss: 0.0101 


  8%|▊         | 8/100 [01:04<12:27,  8.12s/it]

Epoch [8/100], Training Loss: 0.0099 Validation Loss: 0.0097 


 10%|█         | 10/100 [01:20<12:12,  8.14s/it]

Epoch [10/100], Training Loss: 0.0096 Validation Loss: 0.0093 


 12%|█▏        | 12/100 [01:37<12:06,  8.25s/it]

Epoch [12/100], Training Loss: 0.0093 Validation Loss: 0.0089 


 14%|█▍        | 14/100 [01:54<12:17,  8.58s/it]

Epoch [14/100], Training Loss: 0.0089 Validation Loss: 0.0084 


 16%|█▌        | 16/100 [02:11<11:58,  8.55s/it]

Epoch [16/100], Training Loss: 0.0085 Validation Loss: 0.0079 


 18%|█▊        | 18/100 [02:30<12:02,  8.81s/it]

Epoch [18/100], Training Loss: 0.0082 Validation Loss: 0.0081 


 20%|██        | 20/100 [02:47<11:41,  8.77s/it]

Epoch [20/100], Training Loss: 0.0079 Validation Loss: 0.0080 


 22%|██▏       | 22/100 [03:05<11:27,  8.81s/it]

Epoch [22/100], Training Loss: 0.0077 Validation Loss: 0.0076 


 24%|██▍       | 24/100 [03:22<11:02,  8.72s/it]

Epoch [24/100], Training Loss: 0.0075 Validation Loss: 0.0075 


 26%|██▌       | 26/100 [03:40<11:02,  8.96s/it]

Epoch [26/100], Training Loss: 0.0074 Validation Loss: 0.0074 


 28%|██▊       | 28/100 [03:58<10:55,  9.10s/it]

Epoch [28/100], Training Loss: 0.0078 Validation Loss: 0.0075 


 30%|███       | 30/100 [04:15<10:11,  8.74s/it]

Epoch [30/100], Training Loss: 0.0070 Validation Loss: 0.0073 


 32%|███▏      | 32/100 [04:33<09:52,  8.71s/it]

Epoch [32/100], Training Loss: 0.0068 Validation Loss: 0.0077 


 34%|███▍      | 34/100 [04:51<09:47,  8.91s/it]

Epoch [34/100], Training Loss: 0.0067 Validation Loss: 0.0073 


 36%|███▌      | 36/100 [05:10<09:45,  9.16s/it]

Epoch [36/100], Training Loss: 0.0067 Validation Loss: 0.0073 


 38%|███▊      | 38/100 [05:29<09:47,  9.48s/it]

Epoch [38/100], Training Loss: 0.0065 Validation Loss: 0.0071 


 40%|████      | 40/100 [05:50<09:54,  9.90s/it]

Epoch [40/100], Training Loss: 0.0066 Validation Loss: 0.0074 


 42%|████▏     | 42/100 [06:08<09:12,  9.53s/it]

Epoch [42/100], Training Loss: 0.0062 Validation Loss: 0.0072 


 44%|████▍     | 44/100 [06:27<08:56,  9.58s/it]

Epoch [44/100], Training Loss: 0.0061 Validation Loss: 0.0082 


 46%|████▌     | 46/100 [06:47<08:47,  9.77s/it]

Epoch [46/100], Training Loss: 0.0059 Validation Loss: 0.0074 


 48%|████▊     | 48/100 [07:10<09:09, 10.57s/it]

Epoch [48/100], Training Loss: 0.0056 Validation Loss: 0.0076 


 50%|█████     | 50/100 [07:34<09:31, 11.44s/it]

Epoch [50/100], Training Loss: 0.0055 Validation Loss: 0.0081 


 52%|█████▏    | 52/100 [08:01<10:01, 12.53s/it]

Epoch [52/100], Training Loss: 0.0054 Validation Loss: 0.0082 


 54%|█████▍    | 54/100 [08:29<10:07, 13.20s/it]

Epoch [54/100], Training Loss: 0.0056 Validation Loss: 0.0079 


 56%|█████▌    | 56/100 [08:51<08:49, 12.04s/it]

Epoch [56/100], Training Loss: 0.0053 Validation Loss: 0.0088 


 58%|█████▊    | 58/100 [09:12<07:57, 11.36s/it]

Epoch [58/100], Training Loss: 0.0053 Validation Loss: 0.0085 


 60%|██████    | 60/100 [09:32<07:03, 10.59s/it]

Epoch [60/100], Training Loss: 0.0053 Validation Loss: 0.0079 


 62%|██████▏   | 62/100 [09:53<06:36, 10.44s/it]

Epoch [62/100], Training Loss: 0.0051 Validation Loss: 0.0081 


 64%|██████▍   | 64/100 [10:14<06:24, 10.67s/it]

Epoch [64/100], Training Loss: 0.0052 Validation Loss: 0.0101 


 66%|██████▌   | 66/100 [10:35<05:58, 10.55s/it]

Epoch [66/100], Training Loss: 0.0049 Validation Loss: 0.0087 


 68%|██████▊   | 68/100 [10:59<06:02, 11.33s/it]

Epoch [68/100], Training Loss: 0.0048 Validation Loss: 0.0084 


 70%|███████   | 70/100 [11:21<05:35, 11.19s/it]

Epoch [70/100], Training Loss: 0.0048 Validation Loss: 0.0083 


 72%|███████▏  | 72/100 [11:42<05:00, 10.72s/it]

Epoch [72/100], Training Loss: 0.0045 Validation Loss: 0.0090 


 74%|███████▍  | 74/100 [12:05<04:49, 11.14s/it]

Epoch [74/100], Training Loss: 0.0048 Validation Loss: 0.0074 


 76%|███████▌  | 76/100 [12:28<04:28, 11.21s/it]

Epoch [76/100], Training Loss: 0.0043 Validation Loss: 0.0078 


 78%|███████▊  | 78/100 [12:49<04:00, 10.92s/it]

Epoch [78/100], Training Loss: 0.0047 Validation Loss: 0.0088 


 80%|████████  | 80/100 [13:11<03:37, 10.89s/it]

Epoch [80/100], Training Loss: 0.0045 Validation Loss: 0.0081 


 82%|████████▏ | 82/100 [13:31<03:09, 10.53s/it]

Epoch [82/100], Training Loss: 0.0046 Validation Loss: 0.0087 


 84%|████████▍ | 84/100 [13:52<02:48, 10.50s/it]

Epoch [84/100], Training Loss: 0.0042 Validation Loss: 0.0079 


 86%|████████▌ | 86/100 [14:15<02:35, 11.13s/it]

Epoch [86/100], Training Loss: 0.0040 Validation Loss: 0.0077 


 88%|████████▊ | 88/100 [14:38<02:15, 11.27s/it]

Epoch [88/100], Training Loss: 0.0041 Validation Loss: 0.0085 


 90%|█████████ | 90/100 [14:59<01:48, 10.84s/it]

Epoch [90/100], Training Loss: 0.0039 Validation Loss: 0.0080 


 92%|█████████▏| 92/100 [15:21<01:26, 10.82s/it]

Epoch [92/100], Training Loss: 0.0041 Validation Loss: 0.0081 


 94%|█████████▍| 94/100 [15:45<01:09, 11.51s/it]

Epoch [94/100], Training Loss: 0.0039 Validation Loss: 0.0073 


 96%|█████████▌| 96/100 [16:09<00:46, 11.75s/it]

Epoch [96/100], Training Loss: 0.0039 Validation Loss: 0.0078 


 98%|█████████▊| 98/100 [16:29<00:21, 10.88s/it]

Epoch [98/100], Training Loss: 0.0042 Validation Loss: 0.0077 


100%|██████████| 100/100 [16:50<00:00, 10.10s/it]

Epoch [100/100], Training Loss: 0.0035 Validation Loss: 0.0077 
Training complete.





In [None]:

# FIXME: save the model
filename_lstm = Config()["controller"]["lstm_model_file"]
torch.save(model.state_dict(), filename_lstm)

# Load the behavior cloning controller and use it with a real time data

In [None]:
# Original
latent_size = Config()["robot"]["latent_encoding_size"]  
hidden_size = 32  # degrees of freedom in the robot
output_size = 6  # degrees of freedom in the robot
num_layers = 2

# Instantiate model, loss function, and optimizer
model = LSTMXYPredictor(latent_size=latent_size, hidden_size=hidden_size, output_size = output_size, num_layers=num_layers)
criterion = nn.MSELoss()  # Mean Squared Error for regression
filename_lstm = Config()["controller"]["lstm_model_file"]
model.load_state_dict(torch.load(filename_lstm))

In [None]:
# Get one demonstration
task = "proprioception-uncluttered"
sp = sp_conv_vae.ConvVaeSensorProcessing()

demos_dir = pathlib.Path(Config()["demos"]["directory"])
task_dir = pathlib.Path(demos_dir, "demos", task)

inputlist = []
targetlist = []

demo_dir = next(task_dir.iterdir())
bcd = BCDemonstration(demo_dir, sensorprocessor=sp)
z, a = bcd.read_z_a()

In [None]:
z.shape[0]
print(a[1])

In [None]:
for i in range(z.shape[0]-1):
    input = torch.from_numpy(z[i])
    input = input.unsqueeze(0)
    input = input.unsqueeze(0)
    print(input)
    a_pred = model.forward_keep_state(input)
    a_real = a[i+1]
    print(f"a_real: {a_real}\na_pred: {a_pred}")