In [1]:
!pip install "minari[all]"
import minari

Collecting minari[all]
  Downloading minari-0.5.2-py3-none-any.whl.metadata (5.8 kB)
Collecting gymnasium>=0.28.1 (from minari[all])
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium>=0.28.1->minari[all])
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m958.1/958.1 kB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading minari-0.5.2-py3-none-any.whl (55 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.1/55.1 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium, minari
Successfully installed farama-notifications-0.0.4 gymnasium-1.0.0 minari-0.5.2


In [2]:
minari.download_dataset("D4RL/door/human-v2") # download the dataset

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


namespace_metadata.json:   0%|          | 0.00/1.68k [00:00<?, ?B/s]

door/human-v2/data/metadata.json:   0%|          | 0.00/2.44k [00:00<?, ?B/s]

namespace_metadata.json:   0%|          | 0.00/113 [00:00<?, ?B/s]

namespace_metadata.json:   0%|          | 0.00/10.9k [00:00<?, ?B/s]

namespace_metadata.json:   0%|          | 0.00/1.68k [00:00<?, ?B/s]

door/namespace_metadata.json:   0%|          | 0.00/1.04k [00:00<?, ?B/s]


Downloading D4RL/door/human-v2 from Farama servers...


Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

main_data.hdf5:   0%|          | 0.00/3.51M [00:00<?, ?B/s]


Dataset D4RL/door/human-v2 downloaded to /root/.minari/datasets/D4RL/door/human-v2


In [3]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as f
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import random_split

In [4]:
class DoorDataset(Dataset):
  def __init__(self, dataset):
    self.dataset = dataset
    self.observations = dataset.observations # ndarray of observations in one episode
    self.actions = dataset.actions

  def __len__(self):
    return len(self.observations)

  def __getitem__(self, idx):
    return (
            torch.tensor(self.observations[idx-1], dtype=torch.float32),
            torch.tensor(self.actions[idx-1], dtype=torch.float32)
        )

In [6]:
episodes = minari.load_dataset('D4RL/door/human-v2') # load the dataset
print(len(episodes[0]))
dataset = DoorDataset(episodes[0])
val_size = 50
train_size = len(dataset) - val_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])
train_dl = DataLoader(train_ds, batch_size=25, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=25)

300


In [7]:
# creating the model
input_size = 39 # 1d array of 39 observations
out_size = 28 # action space - 1d array of size 28

In [8]:
class BehaviorCloningBase(nn.Module):
    def training_step(self, batch):
        observations, actions = batch
        out = self(observations)
        loss = f.mse_loss(out, actions)
        return loss

    def validation_step(self, batch):
        observations, actions = batch
        out = self(observations)
        loss = f.mse_loss(out, actions)
        mae = f.l1_loss(out, actions)
        return {'val_loss': loss.detach(), 'val_mae': mae.detach()}

    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()
        batch_maes = [x['val_mae'] for x in outputs]
        epoch_mae = torch.stack(batch_maes).mean()
        return {'val_loss': epoch_loss.item(), 'val_mae': epoch_mae.item()}

    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_mae: {:.4f}".format(
            epoch, result['train_loss'], result['val_loss'], result['val_mae']))

In [9]:
class NeurNet(BehaviorCloningBase):
  def __init__(self,in_size,out_size):
    super().__init__()
    self.linear1 = nn.Linear(in_size, 64)
    self.relu = nn.ReLU(inplace=True)
    self.linear2 = nn.Linear(64, 32)
    self.linear3 = nn.Linear(32, out_size)

  def forward(self, xb):
    # out = xb.view(xb.size(0), -1)
    out = self.linear1(xb)
    out = self.relu(out)
    out = self.linear2(out)
    out = self.linear3(out)
    return out

In [10]:
@torch.no_grad()
def evaluate(model,val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit_one_cycle(epochs, lr, model, train_loader, val_loader,
                  weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
    history = []

    optimizer = opt_func(model.parameters(), lr, weight_decay=weight_decay)

    # # Set up one-cycle learning rate scheduler
    # sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, lr, epochs=epochs, steps_per_epoch=len(train_loader)) # steps per epoch means total number of batches

    for epoch in range(epochs):
        # Training Phase
        model.train()
        train_losses = []
        # lrs = []

        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()

            # Gradient clipping
            if grad_clip: # if set to true
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)

            optimizer.step() # do one epoch
            optimizer.zero_grad() # reset gradients so they don't accumulate

            # # Record & update learning rate
            # lrs.append(get_lr(optimizer))
            # sched.step() # learning rate scheduling by 1cycle policy

        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        # result['lrs'] = lrs
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [11]:
epochs = 32
lr = 0.01
# grad_clip = 0.1
weight_decay = 1e-4
opt_func = torch.optim.Adam
model = NeurNet(input_size,out_size)

In [12]:
history = fit_one_cycle(epochs,lr,model,train_dl,val_dl,grad_clip=False,weight_decay=weight_decay,opt_func=opt_func)

Epoch [0], train_loss: 0.1392, val_loss: 0.0612, val_mae: 0.1958
Epoch [1], train_loss: 0.0373, val_loss: 0.0295, val_mae: 0.1353
Epoch [2], train_loss: 0.0235, val_loss: 0.0245, val_mae: 0.1178
Epoch [3], train_loss: 0.0198, val_loss: 0.0230, val_mae: 0.1182
Epoch [4], train_loss: 0.0183, val_loss: 0.0177, val_mae: 0.1015
Epoch [5], train_loss: 0.0160, val_loss: 0.0172, val_mae: 0.0997
Epoch [6], train_loss: 0.0152, val_loss: 0.0174, val_mae: 0.0978
Epoch [7], train_loss: 0.0155, val_loss: 0.0158, val_mae: 0.0955
Epoch [8], train_loss: 0.0132, val_loss: 0.0156, val_mae: 0.0970
Epoch [9], train_loss: 0.0161, val_loss: 0.0173, val_mae: 0.0995
Epoch [10], train_loss: 0.0192, val_loss: 0.0175, val_mae: 0.1024
Epoch [11], train_loss: 0.0150, val_loss: 0.0140, val_mae: 0.0914
Epoch [12], train_loss: 0.0130, val_loss: 0.0150, val_mae: 0.0931
Epoch [13], train_loss: 0.0139, val_loss: 0.0142, val_mae: 0.0917
Epoch [14], train_loss: 0.0129, val_loss: 0.0138, val_mae: 0.0895
Epoch [15], train_lo

In [16]:
test_ds = DoorDataset(episodes[1])
test_obs, test_actions = test_ds[6]
xb = test_obs.unsqueeze(0) # since model expects as batch, we give it as a batch size = 1
yb = model(xb)
print("Mean absolute error: {:.4f}".format(f.l1_loss(yb, test_actions.unsqueeze(0))))
print("Cool bruh!")

Mean absolute error: 0.0833
Cool bruh!
