# Train an LSTM based controller 

Train and save an LSTM-based controller. It contains:
* Code for loading and pre-processing the training data. 
* Training an LSTM with specific parameters and saving it

In [1]:
import sys
sys.path.append("..")
from settings import Config

import pathlib
#from pprint import pformat


#import matplotlib.pyplot as plt

import torch
import torch.nn as nn
#import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

from sensorprocessing import sp_conv_vae
from demo_to_trainingdata import create_RNN_training_sequence_xy, BCDemonstration
from bc_LSTM import LSTMXYPredictor, LSTMResidualController

Loading pointer config file: /home/lboloni/.config/BerryPicker/mainsettings.yaml
Loading machine-specific config file: /home/lboloni/Insync/lotzi.boloni@gmail.com/Google Drive/LotziStudy/Code/PackageTracking/BerryPicker/settings/settings-tredy2.yaml


### Creating training and validation data
Create training and validation data from all the demonstrations of a certain task.

In [2]:
task = "proprioception-uncluttered"
sp = sp_conv_vae.ConvVaeSensorProcessing()

demos_dir = pathlib.Path(Config()["demos"]["directory"])
task_dir = pathlib.Path(demos_dir, "demos", task)

inputlist = []
targetlist = []

for demo_dir in task_dir.iterdir():
    if not demo_dir.is_dir():
        pass
    bcd = BCDemonstration(demo_dir, sensorprocessor=sp)
    print(bcd)
    z, a = bcd.read_z_a()
    print(z.shape)
    print(a.shape)
    inputs, targets = create_RNN_training_sequence_xy(z, a, sequence_length=10)
    inputlist.append(inputs)
    targetlist.append(targets)

inputs = torch.cat(inputlist)
targets = torch.cat(targetlist)

# Separate the training and validation data. 
# We will be shuffling the demonstrations 
rows = torch.randperm(inputs.size(0)) 
shuffled_inputs = inputs[rows]
shuffled_targets = targets[rows]

training_size = int( inputs.size(0) * 0.67 )
inputs_training = shuffled_inputs[1:training_size]
targets_training = shuffled_targets[1:training_size]

inputs_validation = shuffled_inputs[training_size:]
targets_validation = shuffled_targets[training_size:]

resume_model and jsonfile are:
	resume_model=/home/lboloni/Documents/Hackingwork/__Temporary/BerryPicker-models/Conv-VAE/models/VAE_Robot/0901_125042/checkpoint-epoch171.pth
	jsonfile=/home/lboloni/Documents/Hackingwork/__Temporary/BerryPicker-models/Conv-VAE/models/VAE_Robot/0901_125042/config.json
{
    "name": "VAE_Robot",
    "n_gpu": 1,
    "arch": {
        "type": "VanillaVAE",
        "args": {
            "in_channels": 3,
            "latent_dims": 128,
            "flow": false
        }
    },
    "data_loader": {
        "###type-prev": "RobotDataLoader",
        "type": "CelebDataLoader",
        "args": {
            "data_dir": "/home/lboloni/Documents/Hackingwork/__Temporary/VisionBasedRobotManipulator-training-data/vae-training-data",
            "batch_size": 64,
            "shuffle": true,
            "validation_split": 0.2,
            "num_workers": 2
        }
    },
    "optimizer": {
        "type": "Adam",
        "args": {
            "lr": 0.005,
         

  self.checkpoint = torch.load(self.config.resume, map_location=torch.device('cpu'))


Cameras found: ['dev2']
There are 753 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
{'actiontype': 'rc-position-target',
 'camera': 'dev2',
 'cameras': ['dev2'],
 'maxsteps': 753,
 'sensorprocessor': <sensorprocessing.sp_conv_vae.ConvVaeSensorProcessing object at 0x77a54596e9e0>,
 'source_dir': PosixPath('/home/lboloni/Documents/Hackingwork/__Temporary/BerryPicker-demos/demos/proprioception-uncluttered/2024_10_26__16_31_40'),
 'trim_from': 1,
 'trim_to': 753}
(752, 128)
(752, 6)
Cameras found: ['dev2']
There are 968 steps in this demonstration
This demonstration was recorded by the following cameras: ['dev2']
{'actiontype': 'rc-position-target',
 'camera': 'dev2',
 'cameras': ['dev2'],
 'maxsteps': 968,
 'sensorprocessor': <sensorprocessing.sp_conv_vae.ConvVaeSensorProcessing object at 0x77a54596e9e0>,
 'source_dir': PosixPath('/home/lboloni/Documents/Hackingwork/__Temporary/BerryPicker-demos/demos/proprioception-uncluttered/2024_10_26__

In [3]:
def validate_behavior_cloning(model, criterion, inputs_validation, targets_validation):
    num_sequences = inputs_validation.shape[0]
    model.eval()
    val_loss = 0
    with torch.no_grad():  # Disable gradient computation
        for i in range(num_sequences):
            # Forward pass
            input_seq = inputs_validation[i]
            target = targets_validation[i]
            # Reshape for batch compatibility
            input_seq = input_seq.unsqueeze(0)  # Shape: [1, sequence_length, latent_size]
            target = target.unsqueeze(0)        # Shape: [1, latent_size]

            outputs = model(input_seq)
            loss = criterion(outputs, target)
            # Accumulate loss
            val_loss += loss.item()
    avg_loss = val_loss / num_sequences
    return avg_loss

def train_behavior_cloning(model, optimizer, criterion, inputs_training, targets_training, inputs_validation, targets_validation, num_epochs):
    num_sequences = inputs_training.shape[0]

    for epoch in range(num_epochs):
        model.train()
        
        # Loop over each sequence in the batch
        training_loss = 0
        for i in range(num_sequences):
            # Prepare input and target
            input_seq = inputs_training[i]
            target = targets_training[i]

            # Reshape for batch compatibility
            input_seq = input_seq.unsqueeze(0)  # Shape: [1, sequence_length, latent_size]
            target = target.unsqueeze(0)        # Shape: [1, latent_size]

            # Forward pass
            output = model(input_seq)
            loss = criterion(output, target)
            training_loss += loss.item()
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        avg_training_loss = training_loss / num_sequences
        avg_validation_loss = validate_behavior_cloning(model, criterion, inputs_validation=inputs_validation, targets_validation=targets_validation)

        if (epoch+1) % 2 == 0: # was 0
            print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_training_loss:.4f} Validation Loss: {avg_validation_loss:.4f} ')




In [None]:
# Original
latent_size = Config()["robot"]["latent_encoding_size"]  
hidden_size = 32  # degrees of freedom in the robot
output_size = 6  # degrees of freedom in the robot
num_layers = 2

# Instantiate model, loss function, and optimizer
model = LSTMXYPredictor(latent_size=latent_size, hidden_size=hidden_size, output_size = output_size, num_layers=num_layers)
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 8

train_behavior_cloning(
    model, optimizer, criterion,
    inputs_training=inputs_training, 
    targets_training=targets_training, 
    inputs_validation=inputs_validation,
    targets_validation=targets_validation,
    num_epochs=num_epochs)
print("Training complete.")


Epoch [2/8], Training Loss: 681.0813 Validation Loss: 673.9674 
Epoch [4/8], Training Loss: 657.6183 Validation Loss: 673.2660 
Epoch [6/8], Training Loss: 657.6235 Validation Loss: 673.2649 
Epoch [8/8], Training Loss: 650.1760 Validation Loss: 654.8228 
Training complete.


RuntimeError: Parent directory /home/lboloni/Documents/Hackingwork/__Temporary/BerryPicker-models/Controller does not exist.

In [5]:

# FIXME: save the model
filename_lstm = Config()["controller"]["lstm_model_file"]
torch.save(model.state_dict(), filename_lstm)

# Load the behavior cloning controller and use it with a real time data

In [11]:
# Original
latent_size = Config()["robot"]["latent_encoding_size"]  
hidden_size = 32  # degrees of freedom in the robot
output_size = 6  # degrees of freedom in the robot
num_layers = 2

# Instantiate model, loss function, and optimizer
model = LSTMXYPredictor(latent_size=latent_size, hidden_size=hidden_size, output_size = output_size, num_layers=num_layers)
criterion = nn.MSELoss()  # Mean Squared Error for regression
filename_lstm = Config()["controller"]["lstm_model_file"]
model.load_state_dict(torch.load(filename_lstm))

  model.load_state_dict(torch.load(filename_lstm))


<All keys matched successfully>

In [12]:
# Get one demonstration
task = "proprioception-uncluttered"
sp = sp_conv_vae.ConvVaeSensorProcessing()

demos_dir = pathlib.Path(Config()["demos"]["directory"])
task_dir = pathlib.Path(demos_dir, "demos", task)

inputlist = []
targetlist = []

demo_dir = next(task_dir.iterdir())
bcd = BCDemonstration(demo_dir, sensorprocessor=sp)
z, a = bcd.read_z_a()

resume_model and jsonfile are:
	resume_model=/home/lboloni/Documents/Hackingwork/__Temporary/BerryPicker-models/Conv-VAE/models/VAE_Robot/0901_125042/checkpoint-epoch171.pth
	jsonfile=/home/lboloni/Documents/Hackingwork/__Temporary/BerryPicker-models/Conv-VAE/models/VAE_Robot/0901_125042/config.json
{
    "name": "VAE_Robot",
    "n_gpu": 1,
    "arch": {
        "type": "VanillaVAE",
        "args": {
            "in_channels": 3,
            "latent_dims": 128,
            "flow": false
        }
    },
    "data_loader": {
        "###type-prev": "RobotDataLoader",
        "type": "CelebDataLoader",
        "args": {
            "data_dir": "/home/lboloni/Documents/Hackingwork/__Temporary/VisionBasedRobotManipulator-training-data/vae-training-data",
            "batch_size": 64,
            "shuffle": true,
            "validation_split": 0.2,
            "num_workers": 2
        }
    },
    "optimizer": {
        "type": "Adam",
        "args": {
            "lr": 0.005,
         

  self.checkpoint = torch.load(self.config.resume, map_location=torch.device('cpu'))


In [13]:
z.shape[0]
print(a[1])

[  5.   5.   0. -45.  75. 100.]


In [15]:
for i in range(z.shape[0]-1):
    input = torch.from_numpy(z[i])
    input = input.unsqueeze(0)
    input = input.unsqueeze(0)
    print(input)
    a_pred = model.forward_keep_state(input)
    a_real = a[i+1]
    print(f"a_real: {a_real}\na_pred: {a_pred}")

tensor([[[ 0.0809, -0.3292,  0.3982,  0.0399, -0.5501,  0.0559,  0.5886,
           0.1671, -0.4045, -0.4336, -0.0031,  0.2405, -0.3666,  0.1904,
          -0.2558,  0.2077,  0.4217,  0.0933, -0.0935, -0.1027, -0.2679,
          -0.1674, -0.0418, -0.2481, -0.1123,  0.8844,  0.2335, -0.0526,
           0.3117,  0.1491, -0.1633, -0.3215,  0.5215,  0.2879,  0.2479,
          -0.7827,  0.0869,  0.2670,  0.1933,  0.5232, -0.2639, -0.2443,
           0.0714, -0.0761, -0.1390, -0.4110,  0.0309,  0.1366,  0.2065,
           0.2161,  0.0827, -0.2270, -0.7678,  0.6647, -0.1513, -0.1283,
           0.0489,  0.4195, -0.1720,  0.1673,  0.4944, -0.1630, -0.0482,
           0.5421, -0.5594,  0.5295, -0.5656, -0.4235, -0.1329,  0.0323,
           0.4436, -0.5242, -0.0415, -0.4039, -0.4887,  0.0466,  0.2195,
           0.6017, -0.2847,  0.3494, -0.3781, -0.3122,  0.2698, -0.7181,
          -0.5909, -0.4190,  0.3867, -0.0398, -0.1077, -0.0463,  0.0394,
           0.2645, -0.4183, -0.2487, -0.3309,  0.68