## 1. Check if the model is differentiable

In [15]:
import torch
from torch.nn.parameter import Parameter

class QubeDynamics(torch.nn.Module):
    """Solve equation M qdd + C(q, qd) = tau for qdd."""

    def __init__(self):
        super().__init__()
        # Gravity
        # self.g = Parameter(data=torch.Tensor([9.81]), requires_grad=True)
        self.g = torch.tensor([9.81])

        # Motor
        self.Rm = Parameter(data=torch.Tensor([8.4]), requires_grad=True)

        # back-emf constant (V-s/rad)
        self.km = Parameter(data=torch.Tensor([0.042]), requires_grad=True)

        # Rotary arm
        self.Mr = Parameter(data=torch.Tensor([0.095]), requires_grad=True)
        self.Lr = Parameter(data=torch.Tensor([0.085]), requires_grad=True)
        self.Dr = Parameter(data=torch.Tensor([5e-6]), requires_grad=True)

        # Pendulum link
        self.Mp = Parameter(data=torch.Tensor([0.024]), requires_grad=True)
        self.Lp = Parameter(data=torch.Tensor([0.129]), requires_grad=True)
        self.Dp = Parameter(data=torch.Tensor([1e-6]), requires_grad=True)

        # Init constants
        # self._init_const()

    def set_random_params(self):
        for p in self.parameters():
            p.data = torch.rand_like(p.data)/10 # most params between 0 and 0.1

        # except for Rm
        self.Rm = Parameter(data=torch.Tensor([5]), requires_grad=True)
        
        # self._init_const()

    def _init_const(self):
        # Moments of inertia
        Jr = self.Mr * self.Lr ** 2 / 12  # inertia about COM (kg-m^2)
        Jp = self.Mp * self.Lp ** 2 / 12  # inertia about COM (kg-m^2)

        # Constants for equations of motion
        self._c = torch.zeros(5)
        self._c[0] = Jr + self.Mp * self.Lr ** 2
        self._c[1] = 0.25 * self.Mp * self.Lp ** 2
        self._c[2] = 0.5 * self.Mp * self.Lp * self.Lr
        self._c[3] = Jp + self._c[1]
        self._c[4] = 0.5 * self.Mp * self.Lp * self.g


    def forward(self, s_batch, u_batch, dt_batch):
        # Unbind the batch tensors to get individual tensors for each input
        s_list = torch.unbind(s_batch, dim=0)
        u_list = torch.unbind(u_batch, dim=0)
        dt_list = torch.unbind(dt_batch, dim=0)

        # Create a list to store the next states for each input in the batch
        next_state_list = []

        # need to re-init each time we update params
        self._init_const()

        # Loop over the tensors in the batch
        for s, u, dt in zip(s_list, u_list, dt_list):
            th, al, thd, ald = s
            voltage = u[0] * 12


            # Define mass matrix M = [[a, b], [b, c]]
            a = self._c[0] + self._c[1] * torch.sin(al) ** 2
            b = self._c[2] * torch.cos(al)
            c = self._c[3]
            d = a * c - b * b

            # Calculate vector [x, y] = tau - C(q, qd)
            trq = self.km * (voltage - self.km * thd) / self.Rm
            c0 = self._c[1] * torch.sin(2 * al) * thd * ald \
                - self._c[2] * torch.sin(al) * ald * ald
            c1 = -0.5 * self._c[1] * torch.sin(2 * al) * thd * thd \
                + self._c[4] * torch.sin(al)
            x = trq - self.Dr * thd - c0
            y = -self.Dp * ald - c1

            # Compute M^{-1} @ [x, y]
            thdd = (c * x - b * y) / d
            aldd = (a * y - b * x) / d

            next_state = torch.clone(s)
            next_state[3] += (dt * aldd)[0]
            next_state[2] += (dt * thdd)[0]
            next_state[1] += (dt * next_state[3])[0]
            next_state[0] += (dt * next_state[2])[0]

            next_state_list.append(next_state)

        return torch.stack(next_state_list, dim=0)

In [4]:
# from furuta_gym.envs.furuta_sim import QubeDynamics as QD

# baseline = QD()
model = QubeDynamics()

state, action, dt, next_state = ds[10]

# run model
pred_next_state = model(state, action, dt)

print(next_state)
print(pred_next_state)
# loss = torch.nn.functional.mse_loss(pred_next_state, next_state)
# print(loss)

# TODO put the state update in the model

NameError: name 'ds' is not defined

In [13]:
# make a dataset
# input is state + action + dt, output is next state
import torch
from typing import Union
from pathlib import Path
import os
from furuta_gym.logging.protobuf.pendulum_state_pb2 import PendulumState
from mcap_protobuf.reader import read_protobuf_messages
from tqdm import tqdm

class MCAPDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir):
        if isinstance(root_dir, str):
            root_dir = Path(root_dir)

        # parse the data
        # TODO it's gonna load it all in RAM
        # + have some duplicates
        # but should be ok since this is pretty light < 1MB
        self.samples = []
        for f in tqdm(os.listdir(root_dir)):
            if f.endswith(".mcap"):
                try:
                    self.parse_mcap(root_dir / f)
                except Exception as e:
                    print(f"Error parsing {f}: {e}")

    def parse_mcap(self, pth):
        msgs = list(read_protobuf_messages(pth, log_time_order=True))
        for i in range(1, len(msgs)-1):
            msg = msgs[i-1]
            next_msg = msgs[i]

            p = msg.proto_msg
            state = torch.tensor([p.motor_angle, p.pendulum_angle, 
                                  p.motor_angle_velocity, p.pendulum_angle_velocity],
                                  requires_grad=False,
                                  dtype=torch.float32)

            next_p = next_msg.proto_msg
            next_state = torch.tensor([next_p.motor_angle, next_p.pendulum_angle, 
                                       next_p.motor_angle_velocity, next_p.pendulum_angle_velocity],
                                       requires_grad=False,
                                       dtype=torch.float32)

            dt = torch.tensor([(next_msg.log_time - msg.log_time).total_seconds()], requires_grad=False)
            # dt = torch.tensor([1/50], requires_grad=False)
            action = torch.tensor([next_p.corrected_action], requires_grad=False)

            sample = (state, action, dt, next_state)
            self.samples.append(sample)

    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        return self.samples[idx]

ds = MCAPDataset("../data/outul4rm/")
print(len(ds))
ds[0]


 77%|███████▋  | 161/209 [00:02<00:00, 83.23it/s]

Error parsing ep208_20221207-151228.mcap: 


100%|██████████| 209/209 [00:03<00:00, 69.42it/s]

37643





(tensor([ 0.0778, -0.0716,  4.2630, -4.3147]),
 tensor([0.7659]),
 tensor([0.0200]),
 tensor([  0.3347,  -0.3242,  12.8458, -12.6317]))

In [16]:
import wandb
config = {
    "epochs": 20,
    "batch_size": 512,
    "start_lr": 1e-1,
    "end_lr": 0.25e-3,
    "subset_pct": 1
}
with wandb.init(project="furuta", job_type="system_id", config=config) as run:
    config = run.config

    # setup dataset
    ds = MCAPDataset("../data/outul4rm/")

    # take a random subset of the data
    ds = torch.utils.data.Subset(ds, torch.randperm(len(ds))[:int(len(ds) * config.subset_pct)])

    # setup dataloader
    dl = torch.utils.data.DataLoader(ds, batch_size=config.batch_size, shuffle=True)

    # setup model
    model = QubeDynamics()
    model.set_random_params()
    model.train()

    bn_layer = torch.nn.BatchNorm1d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    
    wandb.watch(model, log_freq=10, log="gradients")

    # setup optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=config.start_lr)

    # setup loss
    # loss = torch.nn.MSELoss()

    # lr starts at start lr and ends at end lr when epoch = epochs
    lambda1 = lambda epoch: config.start_lr + (config.end_lr - config.start_lr) * epoch / config.epochs

    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)

    # train
    for epoch in range(config.epochs):
        for batch in tqdm(dl):
            # unpack batch
            states, actions, dts, next_states = batch

            # reset gradients
            optimizer.zero_grad()

            # normalize inputs
            inputs = torch.cat([states, actions, dts], dim=1)
            inputs = bn_layer(inputs)

            # re-split inputs into states, actions, dts
            states, actions, dts = torch.split(inputs, [4, 1, 1], dim=1)

            # run model
            # preds = []
            # for i in range(state.size()[0]):
            #     preds.append(model(state[i], action[i], dt[i]))
            pred_next_states = model(states, actions, dts)

            # calculate loss (least squares)
            l = torch.sum( (pred_next_states - next_states) ** 2 )

            # backprop
            l.backward()

            # update weights
            optimizer.step()

            # build gradient dict
            grads = {}
            for name, param in model.named_parameters():
                grads[f"gradients/{name}"] = param.grad

            to_log = dict(model.state_dict())
            to_log["loss"] = l

            # merge to_log and grads
            to_log = {**to_log, **grads} 
            run.log(to_log)
        
        scheduler.step()

 81%|████████  | 169/209 [00:01<00:00, 96.89it/s] 

Error parsing ep208_20221207-151228.mcap: 


100%|██████████| 209/209 [00:02<00:00, 83.03it/s]
  0%|          | 0/74 [00:00<?, ?it/s]


RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor []], which is output 0 of AsStridedBackward0, is at version 8; expected version 6 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).