In [26]:

import os, sys, math, time
import numpy as np
import numpy.linalg as la
import plotly.graph_objects as go
import plotly.express as ex
from plotly.subplots import make_subplots
import pandas as pd

import json as js
import _pickle as pickle
import bz2
import ray

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.utils.data import Dataset, TensorDataset
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split
from collections import OrderedDict

from ray import tune
from ray.tune.suggest.bayesopt import BayesOptSearch
import shutil
import tempfile
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler, PopulationBasedTraining
from ray.tune.integration.pytorch_lightning import TuneReportCallback, \
    TuneReportCheckpointCallback

import pytorch_lightning as pl
from pytorch_lightning.utilities.cloud_io import load as pl_load
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import EarlyStopping

from cytoolz import sliding_window
sys.path.append("../")
import func

## Simple MLP Autoencoder
$
f(x,\theta) = dec(enc(x,\theta_1), \theta_2) = x,   \quad \theta = (\theta_1, \theta_2)
$

$
enc(x, \theta_1) = z, \quad   z \in Z \quad \text{ = latent space}
$

$
dec(z, \theta_2) = x, \quad   x \in X \quad \text{ = input space}
$

This model uses simple Multi-layered perceptron (MLP) for both encoder and decoder.

$
enc = dec = mlp(X, \theta), \quad \theta = W, b

$
mlp(X, W) = f(f(X \cdot w_1 + b_1) \cdot w_2 + b_2) \cdot w_3 + b_3
$
## Analyse features


In [3]:
class MLP(nn.Module):
    def __init__(self, dimensions:list, act_fn:str, keep_prob:float=.2, batch_size:int=1):
        super(MLP, self).__init__()
        self.dimensions = dimensions          #   [(in, h1), (h1, h2), ..., (hn, out)]
        self.act= act_fn                     #   func
        self.keep_prob = keep_prob          #   %
        self.batch_size = batch_size        #   int

        self.model = []

        assert(len(dimensions) >= 2)
        assert(batch_size > 0)
        assert(act_fn == "elu" or act_fn == "relu")
        assert(keep_prob < 1)
        for e in dimensions: assert(type(e) == int)

        self.build()
        self.model.apply(self.init_params)


    def build(self):
        layers = []
        for i, size in enumerate(zip(self.dimensions[0:], self.dimensions[1:])):
            layers.append(("fc"+str(i), nn.Linear(size[0], size[1])))
            if i < len(self.dimensions)-2:
                layers.append(("act"+str(i), self.activation(self.act)))
                layers.append(("drop"+str(i+1), nn.Dropout(self.keep_prob)))

        self.model = nn.Sequential(OrderedDict(layers))


    def forward(self, x:torch.Tensor) -> torch.Tensor:
        return self.model(x)

    @staticmethod
    def activation(fn_name):
        if fn_name == "elu":
            return nn.ELU()
        elif fn_name == "relu":
            return nn.ReLU()
        else:
            return nn.ReLU()

    @staticmethod
    def init_params(m):
        if type(m) == nn.Linear:
            nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(.01)





In [4]:
class MLP_AE(nn.Module):
    def __init__(self, encoder:nn.Module, decoder:nn.Module):
        super(MLP_AE, self).__init__()
        self.encoder = encoder
        self.decoder = decoder


    def forward(self, x):
        return self.decoder(self.encoder(x))

In [2]:
@ray.remote
def loadFeatures(data, feature_list):
    data = pickle.loads(data)
    features = []
    for f in data["frames"]:
        p = []
        for feature in feature_list:
            if feature == "rotMat":
                p.append(np.concatenate([jo["rotMat"].ravel() for jo in f]))
            else:
                p.append(np.concatenate([jo[feature] for jo in f]))

        p = np.concatenate(p)
        features.append(p)
    return np.vstack(features)

def processData(compressed_data, feature_list, num_cpus=24):
    ray.init(num_cpus=num_cpus,ignore_reinit_error=True)
    data = [loadFeatures.remote(d, feature_list) for d in compressed_data]
    data = [ray.get(d) for d in data]
    ray.shutdown()
    return data

In [3]:
# Prepare train data
data_path = "../../data/"

# load data
data_1 = func.load(data_path+"LOCO_R2-default-locomotion.pbz2")
data_2 = func.load(data_path+"LOCO_R2-default-locomotion-small.pbz2")
data_3 = func.load(data_path+"LOCO_R2-default-locomotion-large.pbz2")
data = data_1 + data_2 + data_3

In [4]:
# loading data
d = processData(data, ["pos", "rotMat", "velocity"])



2021-03-26 20:43:03,107	INFO services.py:1172 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


In [25]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# preparing data loaders
data_ratio = (.7, .15, .15) # training, validation, testing
SEED = 2021
batch_size = 10
input_data = np.vstack(d)
# x_tensor = torch.from_numpy(input_data).float().to(device)
# y_tensor = torch.from_numpy(input_data).float().to(device)

x_tensor = torch.from_numpy(input_data).float()
y_tensor = torch.from_numpy(input_data).float()
dataset = TensorDataset(x_tensor, y_tensor)
N = len(dataset)

train_ratio = int(data_ratio[0]*N)
val_ratio = int(data_ratio[1] * N)
test_ratio = int(N-train_ratio-val_ratio)
print("Train: ", train_ratio, ", Validation: ", val_ratio, ", Test: ", test_ratio)

train_set, val_set, test_set = random_split(dataset, [train_ratio, val_ratio, test_ratio], generator=torch.Generator().manual_seed(SEED))

train_loader = DataLoader(dataset=train_set, batch_size=batch_size, pin_memory=True)
val_loader = DataLoader(dataset=val_set, batch_size=batch_size, pin_memory=True)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, pin_memory=True)




Train:  3024 , Validation:  648 , Test:  648


In [28]:
def train(model, criterion, optimizer, scheduler, num_epochs, device,
          data_loaders=None, n_epochs_no_improve=10, verbose=True,
          save_model=False, save_path="../../models/best_model",
          use_tune=False):
    np.random.seed(SEED)
    torch.random.manual_seed(SEED)

    model.to(device)


    train_loader, val_loader, test_loader = data_loaders
    total_step = len(train_loader)
    i = 0

    train_loader_len = float(len(train_loader))
    val_loader_len = float(len(val_loader))
    test_loader_len = float(len(test_loader))

    last_avg_training_loss = 0
    min_loss = np.inf
    epochs_no_improve = 0
    best_model_after_epoch = 0

    for epoch in range(num_epochs):
        training_loss = 0
        # training
        for inputs, outputs in train_loader:
            optimizer.zero_grad()

            inputs = inputs.to(device)
            outputs = outputs.to(device)

            pred = model(inputs)
            loss = criterion(pred, outputs)
            training_loss+=loss.item()

            loss.backward()
            optimizer.step()

        scheduler.step()
        last_avg_training_loss = training_loss / train_loader_len
        if verbose:
            print ('Epoch [{}/{}], Loss: {:.4f}'
                .format(epoch+1, num_epochs, last_avg_training_loss))

        # early stopping
        with torch.no_grad():
            val_loss = 0
            for inputs, outputs in val_loader:
                inputs = inputs.to(device)
                outputs = outputs.to(device)

                pred_val = model(inputs)
                loss_val = criterion(pred_val, outputs)
                val_loss += loss_val.item()

            val_loss /= val_loader_len
            if min_loss > val_loss:
                min_loss = val_loss
                epochs_no_improve = 0
                best_model_after_epoch = epoch
                if save_model:
                    torch.save(model.state_dict(), save_path)

            else:
                epochs_no_improve+=1
                if epochs_no_improve > n_epochs_no_improve and verbose:
                    print("Early stopping at Epoch: ", epoch)
                    print("last training loss: {:2f}".format(last_avg_training_loss))
                    print("achieved best validation loss: {:.4f} after at Epoch {}".format(min_loss, best_model_after_epoch))
                    break
            if use_tune:
                tune.report(iterations=epoch, mean_loss=val_loss)
    # Testing
    with torch.no_grad():
        test_loss = 0
        for inputs, outputs in test_loader:
            inputs = inputs.to(device)
            outputs = outputs.to(device)
            pred_test = model(inputs)
            loss_test = criterion(pred_test, outputs)
            test_loss += loss_test.item()

        test_loss /= test_loader_len
        if verbose:
            print("Test loss: {:.4f}".format(test_loss))
        if use_tune:
            tune.report(iterations=epoch, mean_loss=test_loss)


In [29]:
# Test Ray Tune for hyperparameter tuning
input_dim = input_data.shape[1]
output_dim = input_data.shape[1]
k = 10
latent_dim = k * (3 + 9 + 3)         # 12 * 3
encoder_layer_sizes = [input_dim, 256, 256, latent_dim]
decoder_layer_sizes = [latent_dim, 256, 256, output_dim]
num_epochs = 300
act_fn = "elu"
keep_prob = .2



def trainable(config):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    latent_dim = int(config["k"])
    encoder_layer_sizes = [input_dim, 256, 256, latent_dim]
    decoder_layer_sizes = [latent_dim, 256, 256, output_dim]
    encoder = MLP(encoder_layer_sizes, act_fn, keep_prob, batch_size)
    decoder = MLP(decoder_layer_sizes, act_fn, keep_prob, batch_size)
    model = MLP_AE(encoder, decoder)

    criterion = nn.MSELoss(reduction="mean")
    optimizer = torch.optim.AdamW(model.parameters(), lr=config["lr"])
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.95)
    train(model, criterion, optimizer, scheduler, num_epochs, device=device,
          data_loaders=(train_loader, val_loader, test_loader),verbose=False, use_tune=True)

search_space= {
    "k" : tune.randint(3, 125),
    "lr" : tune.uniform(1e-3, 1e-9)
}

# bayesopt = BayesOptSearch(metric="mean_loss", mode="min")
trial_scheduler = ray.tune.schedulers.ASHAScheduler(grace_period=5, max_t = 100)
tune.run(trainable, config=search_space, scheduler=trial_scheduler, metric="mean_loss", mode="min",
         resources_per_trial={"cpu":12, "gpu":1},
         num_samples=20, stop={"training_iteration":20})

. In total there are 0 pending tasks and 1 pending actors on this node. This is likely due to all cluster resources being claimed by actors. To resolve the issue, consider creating fewer actors or increase the resources available to this Ray cluster. You can ignore this message if this Ray cluster is expected to auto-scale.


Trial name,status,loc,k,lr
trainable_85689_00000,RUNNING,,37,0.000911737


KeyboardInterrupt: 

In [49]:
# Test torch lightning + ray tune

class MLP2(pl.LightningModule):
    def __init__(self, config, dimensions:list,  loss_fn=None,
                 dataset=None, train_set=None, val_set=None, test_set=None,
                 keep_prob:float=.2):

        super(MLP2, self).__init__()

        self.k = config["k"]
        self.learning_rate = config["lr"]
        dimensions.append(self.k)
        self.dimensions = dimensions
        self.loss_fn = loss_fn
        self.keep_prob = keep_prob          #   %
        self.batch_size = config["batch_size"]

        self.dataset = dataset

        self.train_set = train_set
        self.val_set = val_set
        self.test_set = test_set

        self.build()
        if self.train_set is None:
            self.setup_data([.7, .15, .15])

        self.encoder.apply(self.init_params)
        self.decoder.apply(self.init_params)

    def build(self):
        layers = []
        layer_sizes = list(sliding_window(2, self.dimensions))

        for i, size in enumerate(layer_sizes):
            layers.append(("fc"+str(i), nn.Linear(size[0], size[1])))
            if i < len(self.dimensions)-2:
                layers.append(("act"+str(i), nn.ELU()))
                layers.append(("drop"+str(i+1), nn.Dropout(self.keep_prob)))
        self.encoder = nn.Sequential(OrderedDict(layers))

        layers = []
        for i, size in enumerate(layer_sizes[-1::-1]):
            layers.append(("fc"+str(i), nn.Linear(size[1], size[0])))
            if i < len(self.dimensions)-2:
                layers.append(("act"+str(i), nn.ELU()))
                layers.append(("drop"+str(i+1), nn.Dropout(self.keep_prob)))
        self.decoder = nn.Sequential(OrderedDict(layers))

    def forward(self, x:torch.Tensor) -> torch.Tensor:
        return self.decoder(self.encoder(x))

    def training_step(self, batch, batch_idx):
        x, y = batch
        prediction = self(x)
        loss = self.loss_fn(prediction, y)

        self.log("ptl/train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch

        prediction = self(x)
        loss = self.loss_fn(prediction, y)

        self.log('ptl/val_loss', loss, prog_bar=True)
        return {"val_loss":loss}

    def test_step(self, batch, batch_idx):
        return self.validation_step(batch, batch_idx)

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        self.log("avg_val_loss", avg_loss)

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.learning_rate)
        return optimizer

    def setup_data(self, split_ratio):
        self.n_train_samples= int(split_ratio[0]*N)
        self.n_val_samples= int(split_ratio[1] * N)
        self.n_test_samples= int(N-self.n_train_samples-self.n_val_samples)
        self.train_set, self.val_set, self.test_set = random_split(self.dataset,
                                                                   [self.n_train_samples,
                                                                    self.n_val_samples,
                                                                    self.n_test_samples])

    def train_dataloader(self):
        return DataLoader(self.train_set, batch_size=self.batch_size, pin_memory=True)

    def val_dataloader(self):
        return DataLoader(self.val_set, batch_size=self.batch_size, pin_memory=True)

    def test_dataloader(self):
        return DataLoader(self.test_set, batch_size=self.batch_size, pin_memory=True)

    @staticmethod
    def init_params(m):
        if type(m) == nn.Linear:
            nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(.01)

In [50]:
def train_tune(config, dimensions:list,  loss_fn=None,
                 dataset=None, train_set=None, val_set=None, test_set=None,
                 keep_prob:float=.2, num_epochs=300, num_cpus=24, num_gpus=1):
    model = MLP2(config, dimensions, loss_fn, dataset, train_set, val_set, test_set, keep_prob)
    trainer = pl.Trainer(
        max_epochs=num_epochs,
        gpus=num_gpus,
        logger=TensorBoardLogger(save_dir="logs/", name="test", version="0.0"),
        progress_bar_refresh_rate=20,
        callbacks=[
            TuneReportCallback({"loss":"avg_val_loss",}, on="validation_end"),
            EarlyStopping(monitor="avg_val_loss")
        ],
        precision=16,
    )
    trainer.fit(model)

In [46]:
EPOCHS = 300
data_ratio = (.7, .15, .15) # training, validation, testing
SEED = 2021
batch_size = 10
input_data = np.vstack(d)

x_tensor = torch.from_numpy(input_data).float()
y_tensor = torch.from_numpy(input_data).float()
dataset = TensorDataset(x_tensor, y_tensor)
N = len(dataset)

train_ratio = int(data_ratio[0]*N)
val_ratio = int(data_ratio[1] * N)
test_ratio = int(N-train_ratio-val_ratio)
print("Train: ", train_ratio, ", Validation: ", val_ratio, ", Test: ", test_ratio)

train_set, val_set, test_set = random_split(dataset, [train_ratio, val_ratio, test_ratio], generator=torch.Generator().manual_seed(SEED))

Train:  3024 , Validation:  648 , Test:  648


In [52]:
input_dim = input_data.shape[1]
output_dim = input_data.shape[1]

dimensions = [input_dim, 256]
loss_fn = F.mse_loss
keep_prob = .2

num_gpus = 1
num_samples = 20

In [53]:
config = {
    "k":tune.randint(3, 125),
    "lr": tune.loguniform(1e-3, 1e-7),
    "batch_size":tune.choice([1, 6, 12])
}
scheduler = ASHAScheduler(max_t = EPOCHS, grace_period=1, reduction_factor=2)
reporter = CLIReporter(
    parameter_columns=["k", "lr", "batch_size"],
    metric_columns=["loss", "training_iteration"])
analysis = tune.run(
    tune.with_parameters(
        train_tune,
        dimensions = dimensions,
        loss_fn = loss_fn,
        train_set = train_set, val_set = val_set, test_set=test_set,
        keep_prob = keep_prob,
        num_epochs = EPOCHS,
        num_gpus=num_gpus
    ),
    resources_per_trial= {"cpu":1, "gpu":num_gpus},
    metric="loss",
    mode="min",
    config=config,
    num_samples=num_samples,
    scheduler=scheduler,
    progress_reporter=reporter,
    stop={"training_iteration": 50},
    name="test",
    verbose=1
)
print("Best hyperparameters found were: ", analysis.best_config)
print("Best achieved loss was: ", analysis.best_result)

== Status ==
Memory usage on this node: 12.0/31.3 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 256.000: None | Iter 128.000: None | Iter 64.000: None | Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Resources requested: 1/24 CPUs, 1/1 GPUs, 0.0/12.79 GiB heap, 0.0/4.39 GiB objects (0/1.0 accelerator_type:GTX)
Result logdir: /home/nuoc/ray_results/test
Number of trials: 1/20 (1 RUNNING)
+--------------------+----------+-------+-----+-------------+--------------+
| Trial name         | status   | loc   |   k |          lr |   batch_size |
|--------------------+----------+-------+-----+-------------+--------------|
| _inner_9e280_00000 | RUNNING  |       | 101 | 3.33988e-05 |            6 |
+--------------------+----------+-------+-----+-------------+--------------+


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]
Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]
Epoch 0:   0%|        

[2m[36m(pid=74350)[0m GPU available: True, used: True
[2m[36m(pid=74350)[0m TPU available: None, using: 0 TPU cores
[2m[36m(pid=74350)[0m Using native 16bit precision.
[2m[36m(pid=74350)[0m LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[2m[36m(pid=74350)[0m GPU available: True, used: True
[2m[36m(pid=74350)[0m TPU available: None, using: 0 TPU cores
[2m[36m(pid=74350)[0m Using native 16bit precision.
[2m[36m(pid=74350)[0m LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[2m[36m(pid=74350)[0m 
[2m[36m(pid=74350)[0m   | Name    | Type       | Params
[2m[36m(pid=74350)[0m ---------------------------------------
[2m[36m(pid=74350)[0m 0 | encoder | Sequential | 106 K 
[2m[36m(pid=74350)[0m 1 | decoder | Sequential | 107 K 
[2m[36m(pid=74350)[0m ---------------------------------------
[2m[36m(pid=74350)[0m 213 K     Trainable params
[2m[36m(pid=74350)[0m 0         Non-trainable params
[2m[36m(pid=74350)[0m 213 K     Total params
[2m[36m(pid=74350)[0

In [63]:
ray.shutdown()
ray.init(num_gpus=1)
print(ray.get_gpu_ids())
ray.shutdown()

2021-03-26 20:34:00,608	INFO services.py:1172 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[]


In [36]:
model = MLP2(config={"k":5, "lr":1e-3, "batch_size":1}, dimensions=dimensions, loss_fn=loss_fn, train_set=train_set, val_set=val_set,test_set=test_set)

In [37]:
print(model)

MLP2(
  (encoder): Sequential(
    (fc0): Linear(in_features=315, out_features=256, bias=True)
    (act0): ELU(alpha=1.0)
    (drop1): Dropout(p=0.2, inplace=False)
    (fc1): Linear(in_features=256, out_features=256, bias=True)
    (act1): ELU(alpha=1.0)
    (drop2): Dropout(p=0.2, inplace=False)
    (fc2): Linear(in_features=256, out_features=5, bias=True)
  )
  (decoder): Sequential(
    (fc0): Linear(in_features=256, out_features=5, bias=True)
    (act0): ELU(alpha=1.0)
    (drop1): Dropout(p=0.2, inplace=False)
    (fc1): Linear(in_features=256, out_features=256, bias=True)
    (act1): ELU(alpha=1.0)
    (drop2): Dropout(p=0.2, inplace=False)
    (fc2): Linear(in_features=315, out_features=256, bias=True)
  )
)
