### The benchmark of SARCOS dataset

The performance of different algorithms on this dataset can be found in: https://github.com/Kaixhin/SARCOS

#### Ridge regression with fourier(polynomial()) feature: 2.177

In [1]:
from jylearn.data.reg_data import robot_inv_data
import numpy as np
import torch as th
device = "cuda" if th.cuda.is_available() else "cpu"

Loss = th.nn.MSELoss()
X_train, Y_train, X_val, Y_val, X_test, Y_test = robot_inv_data()

In [2]:
X, Y = np.concatenate([X_train, X_val], axis=0), np.concatenate([Y_train, Y_val], axis=0)
from jylearn.feature.polynomial import PolynomialFT
from jylearn.feature.fourier import FourierBases
from jylearn.parametric.ridge import RidgeReg

f1 = FourierBases(15)
f2 = PolynomialFT(2)
X_f = f1(f2(X))
print("feature dim: ", X_f.shape[1])
X_test_f = f1(f2(X_test))
X_f_t, Y_t, X_test_f_t, Y_t_test = th.from_numpy(X_f).to(device), th.from_numpy(Y).to(device), th.from_numpy(X_test_f).to(device), th.from_numpy(Y_test).to(device)
rr = RidgeReg().fit(X_f_t, Y_t)
pred = rr.predict(X_test_f_t)
mse = Loss(pred, Y_t_test)
print(mse)

feature dim:  7842


100%|██████████| 10/10 [01:55<00:00, 11.56s/it]


tensor(2.1766, device='cuda:0')


#### Neural network: 1.469

In [2]:
X_train, Y_train, X_val, Y_val, X_test, Y_test =\
    th.from_numpy(X_train).to(device), th.from_numpy(Y_train).to(device), th.from_numpy(X_val).to(device), \
    th.from_numpy(Y_val).to(device), th.from_numpy(X_test).to(device), th.from_numpy(Y_test).to(device)

In [6]:
from torch.optim import Adam
from torch.nn import MSELoss
from jylearn.parametric.mlp import MLP
param = {"layer":4, "nodes":[21, 500, 500, 7], "batch":128, "lr":1e-3, "decay":0.}
net = MLP(param).to(device)
print(net)
Loss = MSELoss()
parameters = MLP.setParams(net, param["decay"])
optimizer = Adam(parameters, lr=param["lr"])
for _ in range(100):
    for i in range(len(X_train)//param["batch"]):
        optimizer.zero_grad()
        index = th.randperm(len(X_train))
        curr_index = index[i*param["batch"]:(i+1)*param["batch"]]
        X_b = X_train[curr_index]
        Y_b = Y_train[curr_index]
        pred = net(X_b)
        L = Loss(pred, Y_b)
        L.backward()
        optimizer.step()
    with th.no_grad():
        pred_val = net(X_val)
        L_val = Loss(pred_val, Y_val)
        print("Curr validation loss: ", L_val)
net.eval()
pred_test = net(X_test)
print(Loss(pred_test, Y_test))

MLP(
  (net): Sequential(
    (0): Linear(in_features=21, out_features=500, bias=True)
    (1): ReLU()
    (2): Linear(in_features=500, out_features=500, bias=True)
    (3): ReLU()
    (4): Linear(in_features=500, out_features=7, bias=True)
  )
)
Curr validation loss:  tensor(22.8014, device='cuda:0')
Curr validation loss:  tensor(19.0006, device='cuda:0')
Curr validation loss:  tensor(16.6982, device='cuda:0')
Curr validation loss:  tensor(13.5147, device='cuda:0')
Curr validation loss:  tensor(13.6710, device='cuda:0')
Curr validation loss:  tensor(10.9160, device='cuda:0')
Curr validation loss:  tensor(10.5393, device='cuda:0')
Curr validation loss:  tensor(10.2446, device='cuda:0')
Curr validation loss:  tensor(9.8874, device='cuda:0')
Curr validation loss:  tensor(9.7807, device='cuda:0')
Curr validation loss:  tensor(9.4553, device='cuda:0')
Curr validation loss:  tensor(8.9885, device='cuda:0')
Curr validation loss:  tensor(8.5249, device='cuda:0')
Curr validation loss:  tensor(