In [None]:
# %%
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
from tensorboardX import SummaryWriter
from torch.utils.data import Subset
from torch_geometric.loader import DataLoader

from environments.panda_env import PandaEnv
from simulator import run_policy        
from simulator import save_to_file
from src.dataset import RobotGraph

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:

class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(MLP, self).__init__()
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(hidden_dim, latent_dim),
        )

    def forward(self, data):
        x = data.x
        x = self.mlp(x)
        return x



In [None]:

def train(train_loader, test_loader, writer, model, optimizer, num_epochs=50, l1_lambda=0.01):
    print("Device:", device)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.975)
    model.to(device)

    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in train_loader:
            batch = batch.to(device)
            optimizer.zero_grad()
            pred = model(batch)
            loss = F.huber_loss(pred.squeeze(), batch.y)
            # L1 regularization
            l1_reg = torch.tensor(0., device=device, requires_grad=True)
            for name, param in model.named_parameters():
                if 'weight' in name:
                    l1_reg = l1_reg + torch.norm(param, 1)
            loss_no_reg = loss.detach().cpu().numpy()
            loss = loss + l1_lambda * l1_reg
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(train_loader.dataset)
        writer.add_scalar("loss", total_loss, epoch)

        if epoch % 1 == 0:
            test_loss = test(test_loader, model)
            print(
                f"Epoch {epoch}. L1 Loss: {total_loss:.4f}. Huber Loss:{loss_no_reg: .4f}. Test loss: {test_loss:.4f}")
            writer.add_scalar("test_loss", test_loss, epoch)
        if epoch % 20 == 0:
            scheduler.step()

    return model


In [None]:

def test(loader, model):
    model.eval()
    total_loss = 0.0
    total_samples = 0

    for data in loader:
        data = data.to(device)
        with torch.no_grad():
            pred = model(data)
        target = data.y.reshape(pred.shape[0], pred.shape[1])

        # loss = F.mse_loss(pred.squeeze(), target).to(device)
        loss = F.huber_loss(pred, target).to(device)
        total_loss += loss.item() * data.num_graphs
        total_samples += data.num_graphs

        avg_loss = total_loss / total_samples
    return avg_loss

In [None]:
MAX_STEPS = 250
NUM_TRAJECTORIES = 250
TARGET_POSE_QUATERNION = [0, 1, 0, 0]


def generate_target_pose():
    return [np.random.default_rng().uniform(low=-.5, high=.5) for _ in range(3)] + TARGET_POSE_QUATERNION


def evaluate_policy(model: nn.Module):
    # Initialize environment and motion planner
    env = PandaEnv()
    mp = MotionPlanner(env=env)

    # Setup the planner
    mp.setup_planner()

    # Reset the environment and get the initial observation
    obs = reset_environment(env)

    # Iterate over trajectories
    for _ in range(NUM_TRAJECTORIES):
        target_pose = generate_target_pose()
        env.cube.set_pose(sapien.Pose(target_pose[:3]))
        trajectory = mp.move_to_pose(target_pose, with_screw=True)

        if trajectory == -1:
            print("No solution found for trajectory. Skipping...")
            continue

        obs = execute_trajectory_steps(model, env, obs, trajectory, target_pose)

    env.close()


def reset_environment(env):
    env.reset()
    return env.get_obs()


def execute_trajectory_steps(model, env, obs, trajectory, target_pose):
    steps = 0
    while steps < MAX_STEPS:
        next_action = run_policy(obs, model, env)
        obs, reward, done, info = env.step(next_action)
        model_trajectory = mp.move_to_pose(target_pose, with_screw=True)

        if model_trajectory == -1:
            print("No solution found for model trajectory. Breaking...")
            break

        true_action = model_trajectory['position'][0]
        save_to_file(obs, true_action)
        steps += 1

        if done:
            break

    return reset_environment(env)


In [None]:
TRAIN_RATIO = 0.8
BATCH_SIZE = 512
LEARNING_RATE = 0.0001
WEIGHT_DECAY = 5e-4
ITERATIONS = 5
WRITER = SummaryWriter("../runs/test")

def run_training(run_id, model=None):
    # Load and split the dataset
    dataset = RobotGraph("dataset")
    num_samples = len(dataset)
    num_train = int(TRAIN_RATIO * num_samples)
    indices = list(range(num_samples))
    train_indices, test_indices = train_test_split(
        indices, train_size=num_train, shuffle=True, random_state=42
    )

    # Create data loaders for the training and test set
    train_dataset = Subset(dataset, train_indices)
    test_dataset = Subset(dataset, test_indices)
    train_loader = DataLoader(
        train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=4
    )
    test_loader = DataLoader(
        test_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=4
    )

    # Initialize the model if not provided
    if model is None:
        model = MLP(8, 128, 1)

    # Define the optimizer
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    # Train the model and save it
    trained_model = train(train_loader, test_loader, WRITER, model, optimizer)
    print("Training complete.")
    torch.save(trained_model, f"models/model_{run_id}.pt")


def main():
    for i in range(ITERATIONS):
        run_training(i)
        try:
            model = torch.load(f"models/model_{i}.pt")
        except FileNotFoundError:
            print("Model not found. Skipping...")
            return
        evaluate_policy(model)

In [None]:
if __name__ == "__main__":
    main()