In [1]:

import os, sys, math, time
import numpy as np
import numpy.linalg as la
import plotly.graph_objects as go
import plotly.express as ex
from plotly.subplots import make_subplots
import pandas as pd

import json as js
import _pickle as pickle
import bz2
import ray

import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, TensorDataset
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split
from collections import OrderedDict

sys.path.append("../")
import func

In [3]:
torch.cuda.device_count()

0

## Analyse features

In [2]:
data_path = "../../data/"
# load data
data = func.load(data_path+"LOCO_R2-default-locomotion.pbz2")
data_2 = func.load(data_path+"LOCO_R2-default-locomotion-small.pbz2")

## RBF-MLP Autoencoder
$
f(x,\theta) = dec(enc(x,\theta_1), \theta_2) = x,   \quad \theta = (\theta_1, \theta_2)
$

$
enc(x, \theta_1) = z, \quad   z \in Z \quad \text{ = latent space}
$

$
dec(z, \theta_2) = x, \quad   x \in X \quad \text{ = input space}
$

This model uses RBF-layer with Gaussian kernel for encoder

$
enc = rbf(X, \theta, k),    \quad \theta = W,b \quad k = c_1, c_2,..., c_k
$

$
rbf(X, \theta, k) = \phi(\sigma||X - C||)
$

$
dec = mlp(X, \theta), \quad \theta = W,b
$

$
mlp(X, W) = f(f(X \cdot w_1 + b_1) \cdot w_2 + b_2) \cdot w_3 + b_3
$

In [10]:
# RBF Layer
class RBF_Layer(nn.Module):
    """
    from JeremyLinux on GitHub {https://github.com/JeremyLinux/PyTorch-Radial-Basis-Function-Layer/blob/master/Torch%20RBF/torch_rbf.py}

    Transforms incoming data using a given radial basis function:
    u_{i} = rbf(||x - c_{i}|| / s_{i})
    Arguments:
        in_features: size of each input sample
        out_features: size of each output sample
    Shape:
        - Input: (N, in_features) where N is an arbitrary batch size
        - Output: (N, out_features) where N is an arbitrary batch size
    Attributes:
        centres: the learnable centres of shape (out_features, in_features).
            The values are initialised from a standard normal distribution.
            Normalising inputs to have mean 0 and standard deviation 1 is
            recommended.

        sigmas: the learnable scaling factors of shape (out_features).
            The values are initialised as ones.

        basis_func: the radial basis function used to transform the scaled
            distances.
    """

    def __init__(self, in_features:int, out_features:int, basis_func:func):
        super(RBF_Layer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.centres = nn.Parameter(torch.Tensor(out_features, in_features))
        self.sigmas = nn.Parameter(torch.Tensor(out_features))
        self.basis_func = basis_func
        self.reset_parameters()

    def reset_parameters(self):
        nn.init.normal_(self.centres, 0, 1)
        nn.init.constant_(self.sigmas, .01)

    def forward(self, x):
        size = (x.size(0), self.out_features, self.in_features)
        x = x.unsqueeze(1).expand(size)
        c = self.centres.unsqueeze(0).expand(size)
        distances = (x - c).pow(2).sum(-1).pow(0.5) * self.sigmas.unsqueeze(0)  # ALT. / (2*sigma**2)
        return self.basis_func(distances)
    def __str__(self):
        return "RFB(in={}, out={}, centers={}, sigma={}".format(
            self.in_features, self.out_features, self.centres.size(), self.sigmas.size())


def gaussian(alpha):
    phi = torch.exp(-1*alpha.pow(2))
    return phi

def linear(alpha):
    phi = alpha
    return phi

def quadratic(alpha):
    phi = alpha.pow(2)
    return phi

def inverse_quadratic(alpha):
    phi = torch.ones_like(alpha) / (torch.ones_like(alpha) + alpha.pow(2))
    return phi

def multiquadric(alpha):
    phi = (torch.ones_like(alpha) + alpha.pow(2)).pow(0.5)
    return phi

def inverse_multiquadric(alpha):
    phi = torch.ones_like(alpha) / (torch.ones_like(alpha) + alpha.pow(2)).pow(0.5)
    return phi

def spline(alpha):
    phi = (alpha.pow(2) * torch.log(alpha + torch.ones_like(alpha)))
    return phi

def poisson_one(alpha):
    phi = (alpha - torch.ones_like(alpha)) * torch.exp(-alpha)
    return phi

def poisson_two(alpha):
    phi = ((alpha - 2*torch.ones_like(alpha)) / 2*torch.ones_like(alpha)) \
    * alpha * torch.exp(-alpha)
    return phi

def matern32(alpha):
    phi = (torch.ones_like(alpha) + 3**0.5*alpha)*torch.exp(-3**0.5*alpha)
    return phi

def matern52(alpha):
    phi = (torch.ones_like(alpha) + 5**0.5*alpha + (5/3) \
    * alpha.pow(2))*torch.exp(-5**0.5*alpha)
    return phi

def basis_func_dict():
    """
    A helper function that returns a dictionary containing each RBF
    """
    bases = {'gaussian': gaussian,
             'linear': linear,
             'quadratic': quadratic,
             'inverse quadratic': inverse_quadratic,
             'multiquadric': multiquadric,
             'inverse multiquadric': inverse_multiquadric,
             'spline': spline,
             'poisson one': poisson_one,
             'poisson two': poisson_two,
             'matern32': matern32,
             'matern52': matern52}
    return bases

class RBF(nn.Module):
    def __init__(self, dimensions:list, act_fn:str, rbf_index:list, rbf_kernel:str, keep_prob:float=.2, batch_size:int=1):
        super(RBF, self).__init__()
        self.dimensions = dimensions          #   [(in, h1), (h1, h2), ..., (hn, out)]
        self.act= act_fn                     #   func
        self.keep_prob = keep_prob          #   %
        self.batch_size = batch_size        #   int
        self.rbf_index=rbf_index
        self.kernel = basis_func_dict()[rbf_kernel]
        self.model = []

        assert(len(dimensions) >= 2)
        assert(batch_size > 0)
        assert(act_fn == "elu" or act_fn == "relu")
        assert(keep_prob < 1)
        assert(len(rbf_index) > 0)
        for e in dimensions: assert(type(e) == int)

        self.build()
        self.model.apply(self.init_params)


    def build(self):
        layers = []
        j = 0
        for i, size in enumerate(zip(self.dimensions[0:], self.dimensions[1:])):
            if i == self.rbf_index[j]:
                layers.append(("rbf"+str(j), RBF_Layer(size[0], size[1], self.kernel)))
                j+=1
            else:
                layers.append(("fc"+str(i), nn.Linear(size[0], size[1])))
                if i < len(self.dimensions)-2:
                    layers.append(("act"+str(i), self.activation(self.act)))
                    layers.append(("drop"+str(i+1), nn.Dropout(self.keep_prob)))

        self.model = nn.Sequential(OrderedDict(layers))


    def forward(self, x):
        return self.model(x)

    @staticmethod
    def activation(fn_name):
        if fn_name == "elu":
            return nn.ELU()
        elif fn_name == "relu":
            return nn.ReLU()
        else:
            return nn.ReLU()

    @staticmethod
    def init_params(m):
        if type(m) == nn.Linear:
            nn.init.xavier_normal_(m.weight)
            m.bias.data.fill_(.01)


class MLP(nn.Module):
    def __init__(self, dimensions:list, act_fn, keep_prob:float=.2, batch_size:int=1):
        super(MLP, self).__init__()
        self.dimensions = dimensions          #   [(in, h1), (h1, h2), ..., (hn, out)]
        self.act= act_fn                     #   func
        self.keep_prob = keep_prob          #   %
        self.batch_size = batch_size        #   int

        self.model = []

        assert(len(dimensions) >= 2)
        assert(batch_size > 0)
        assert(act_fn == "elu" or act_fn == "relu")
        assert(keep_prob < 1)
        for e in dimensions: assert(type(e) == int)

        self.build()
        self.model.apply(self.init_params)


    def build(self):
        layers = []
        for i, size in enumerate(zip(self.dimensions[0:], self.dimensions[1:])):
            layers.append(("fc"+str(i), nn.Linear(size[0], size[1])))
            if i < len(self.dimensions)-2:
                layers.append(("act"+str(i), self.activation(self.act)))
                layers.append(("drop"+str(i+1), nn.Dropout(self.keep_prob)))

        self.model = nn.Sequential(OrderedDict(layers))


    def forward(self, x):
        return self.model(x)

    @staticmethod
    def activation(fn_name):
        if fn_name == "elu":
            return nn.ELU()
        elif fn_name == "relu":
            return nn.ReLU()
        else:
            return nn.ReLU()

    @staticmethod
    def init_params(m):
        if type(m) == nn.Linear:
            nn.init.xavier_normal_(m.weight)
            m.bias.data.fill_(.01)






In [7]:
class RBF_AE(nn.Module):
    def __init__(self, encoder:nn.Module, decoder:nn.Module):
        super(RBF_AE, self).__init__()
        self.encoder = encoder
        self.decoder = decoder


    def forward(self, x):
        return self.decoder(self.encoder(x))

In [8]:
# Prepare train data
all_data = []
for d in data:
    d = pickle.loads(d)
    pos = []
    for f in d["frames"]:
        p = [jo["pos"] for jo in f]
        pos.append(p)
    all_data.append(pos)

input_data = np.array([np.concatenate([p for p in j]) for pos in all_data for j in pos])
print(input_data.shape)

(1440, 63)


In [9]:
data_ratio = (.7, .15, .15) # training, validation, testing
SEED = 2021
batch_size = 1

x_tensor = torch.from_numpy(input_data).float()
y_tensor = torch.from_numpy(input_data).float()

dataset = TensorDataset(x_tensor, y_tensor)
N = len(dataset)

train_ratio = int(data_ratio[0]*N)
val_ratio = int(data_ratio[1] * N)
test_ratio = int(N-train_ratio-val_ratio)
print("Train: ", train_ratio, ", Validation: ", val_ratio, ", Test: ", test_ratio)

train_set, val_set, test_set = random_split(dataset, [train_ratio, val_ratio, test_ratio], generator=torch.Generator().manual_seed(SEED))

train_loader = DataLoader(dataset=train_set, batch_size=batch_size)
val_loader = DataLoader(dataset=val_set, batch_size=batch_size)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size)


Train:  1007 , Validation:  216 , Test:  217


In [12]:
# Hyper-parameters
input_dim = input_data.shape[1]
output_dim = input_data.shape[1]
latent_dim = 36         # 12 * 3
encoder_layer_sizes = [input_dim, 256, 128, latent_dim]
decoder_layer_sizes = [latent_dim, 256, 256, output_dim]
num_epochs = 100
learning_rate = 0.001
act_fn = "elu"
keep_prob = .2

# model, loss and scheduler
encoder = RBF(encoder_layer_sizes, act_fn, [2], "gaussian", keep_prob, batch_size)
decoder = MLP(decoder_layer_sizes, act_fn, keep_prob, batch_size)
model = RBF_AE(encoder, decoder)

criterion = nn.MSELoss(reduction="mean")
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)

print(model)

RBF_AE(
  (encoder): RBF(
    (model): Sequential(
      (fc0): Linear(in_features=63, out_features=256, bias=True)
      (act0): ELU(alpha=1.0)
      (drop1): Dropout(p=0.2, inplace=False)
      (fc1): Linear(in_features=256, out_features=128, bias=True)
      (act1): ELU(alpha=1.0)
      (drop2): Dropout(p=0.2, inplace=False)
      (rbf0): RBF_Layer()
    )
  )
  (decoder): MLP(
    (model): Sequential(
      (fc0): Linear(in_features=36, out_features=256, bias=True)
      (act0): ELU(alpha=1.0)
      (drop1): Dropout(p=0.2, inplace=False)
      (fc1): Linear(in_features=256, out_features=256, bias=True)
      (act1): ELU(alpha=1.0)
      (drop2): Dropout(p=0.2, inplace=False)
      (fc2): Linear(in_features=256, out_features=63, bias=True)
    )
  )
)


In [89]:
print(torch.cuda.is_available())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

False
cpu


In [13]:
total_step = len(train_loader)
i = 0
n_epochs_no_improve = 5

train_loader_len = float(len(train_loader))
val_loader_len = float(len(val_loader))
test_loader_len = float(len(test_loader))

last_avg_training_loss = 0
min_loss = np.inf
epochs_no_improve = 0
best_model_after_epoch = 0

for epoch in range(num_epochs):
    training_loss = 0
    # training
    for inputs, labels in train_loader:
        # inputs = inputs.to(device)
        # outputs = outputs.to(device)

        pred = model(inputs)
        loss = criterion(pred, labels)
        training_loss+=loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

    last_avg_training_loss = training_loss / train_loader_len
    print ('Epoch [{}/{}], Loss: {:.4f}'
        .format(epoch+1, num_epochs, last_avg_training_loss))

    # early stopping
    with torch.no_grad():
        val_loss = 0
        for inputs, labels in val_loader:
            pred_val = model(inputs)
            loss_val = criterion(pred_val, labels)
            val_loss += loss_val.item()

        val_loss /= val_loader_len
        if min_loss > val_loss:
            min_loss = val_loss
            epochs_no_improve = 0
            best_model_after_epoch = epoch

        else:
            epochs_no_improve+=1
            if epochs_no_improve > n_epochs_no_improve:
                print("Early stopping at Epoch: ", epoch)
                print("last training loss: {:2f}".format(last_avg_training_loss))
                print("achieved best validation loss: {:.4f} after at Epoch {}".format(min_loss, best_model_after_epoch))
                break

# Testing
with torch.no_grad():
    test_loss = 0
    for inputs, labels in test_loader:
        pred_test = model(inputs)
        loss_test = criterion(pred_test, labels)
        test_loss += loss_test.item()

    test_loss /= test_loader_len
    print("Test loss: {:.4f}".format(test_loss))


Epoch [1/100], Loss: 0.1236
Epoch [2/100], Loss: 0.0950
Epoch [3/100], Loss: 0.0929
Epoch [4/100], Loss: 0.0932
Epoch [5/100], Loss: 0.0939
Epoch [6/100], Loss: 0.0930
Epoch [7/100], Loss: 0.0927
Epoch [8/100], Loss: 0.0939
Early stopping at Epoch:  7
last training loss: 0.093866
achieved best validation loss: 0.0955 after at Epoch 1
Test loss: 0.1037
