In [57]:
import sys
import os

sys.path.append('.')
from torch.utils.data import Dataset

import numpy as np
import pickle
from hydra.utils import get_original_cwd
from omegaconf import OmegaConf
import pandas as pd


In [58]:

class metaMobileData(Dataset):
    def __init__(self, data_cfg=None):
        if data_cfg is None:
            raise Exception('Please specify a valid Confg for data')
        else:
            self.c = data_cfg
        if self.c.terrain=='sin2':
            self._dataFolder = get_original_cwd() + '/data/MobileRobot/sin2/'
            if self.c.frequency == '500':
                self._trajectoryPath = self._dataFolder + 'ts_002_50x2000_w_grad.npz'
            if self.c.frequency == '240':
                self._trajectoryPath = self._dataFolder + 'ts_def_50x1000_w_grad.npz'
        else:
            self._dataFolder = get_original_cwd() + '/data/MobileRobot/sinMix2/'
            if self.c.frequency == '500':
                self._trajectoryPath = self._dataFolder + 'ts_002_50x2000_w_grad.npz'
            if self.c.frequency == '240':
                self._trajectoryPath = self._dataFolder + 'ts_def_50x1000_w_grad.npz'

        self._save_windows = self.c.save
        self._load_windows = self.c.load
        self.dim = self.c.dim
        self.trajPerTask = self.c.trajPerTask

        self.tar_type = self.c.tar_type

        self._split = OmegaConf.to_container(self.c.split)
        self._shuffle_split = self.c.shuffle_split

        self.meta_batch_size = self.c.meta_batch_size
        self.batch_size = self.c.batch_size  # window length/2
        self.normalization = None
        self.filename = self.c.file_name
        self.standardize = self.c.standardize
        self.train_windows, self.test_windows = self._load_data()
        # data_windows = {'obs': obs_batch, 'act': act_batch, 'target': target_batch, 'obs_valid':obs_valid_batch}

    def normalize(self, data, mean, std):
        dim = data.shape[-1]
        return (data - mean[:self.dim]) / (std[:self.dim] + 1e-10)

    def denormalize(self, data, mean, std):
        dim = data.shape[-1]
        return data * (std[:self.dim] + 1e-10) + mean[:self.dim]

    def get_statistics(self, data, dim, difference=False):
        if difference:
            data = (data[:, 1:, :self.dim] - data[:, :-1, :self.dim])
        reshape = lambda x: np.reshape(x, (x.shape[0] * x.shape[1], -1))
        data = reshape(data);
        mean = np.mean(data, axis=0)
        std = np.std(data, axis=0)
        return mean, std

    def _load_data(self):
        # load the pickle file of trajectories
        if self._load_windows is not None:
            train_data_window = pickle.load(open(self._dataFolder + self.filename + '_train.pickle', 'rb'))
            test_data_window = pickle.load(open(self._dataFolder + self.filename + '_test.pickle', 'rb'))
            self.normalization = train_data_window['normalization']
            print('>>>>>>>>>>>>Loaded Saved Windows with shape<<<<<<<<<<<<<<<', train_data_window['obs'].shape)

        else:
            data_np = np.load(self._trajectoryPath) #load the .npz data fr
            print('>>>>>>>>>>>>Loaded Data Trajectories with shape<<<<<<<<<<<<<<<', data_np['pos'].shape)

            # collect obs, act, next states
            data = {'observations':[], 'actions':[], 'next_observations':[]}
            # dim = np.r_[0:15, 30:41]
            data['observations'] = np.concatenate((data_np['pos'][:,:-1,:3],np.sin(data_np['orn_euler'])[:,:-1,:],np.cos(data_np['orn_euler'])[:,:-1,:]),axis=-1)
            data['actions'] = data_np['jointAppliedTorques'][:,:-1, :]
            data['grad'] = data_np['pos'][:, :-1, 3]
            data['next_observations'] = np.concatenate((data_np['pos'][:,1:,:3],np.sin(data_np['orn_euler'])[:,1:,:],np.cos(data_np['orn_euler'])[:,1:,:]),axis=-1)
            obs = data['observations']
            print('>>>>>>>>>>>>Processed Data Trajectories with shape<<<<<<<<<<<<<<<', obs.shape,data_np['jointAppliedTorques'].shape)
            act = data['actions']
            next_obs = data['next_observations']
            grad = data["grad"]

            # train test split
            train_obs, train_act, train_next_obs, train_grad, test_obs, test_act, test_next_obs, test_grad = self.train_test_split(obs, act,
                                                                                                            next_obs,grad)
            train_delta = train_next_obs - train_obs
            test_delta = test_next_obs - test_obs

            # get different statistics for state, actions, delta_state, delta_action and residuals which will be used for standardization
            mean_state_diff, std_state_diff = self.get_statistics(train_delta, self.dim, difference=True)
            mean_obs, std_obs = self.get_statistics(train_obs, self.dim)
            mean_act, std_act = self.get_statistics(train_act, 2 * self.dim)
            self.normalization = dict()
            self.normalization['observations'] = [mean_obs, std_obs]
            self.normalization['actions'] = [mean_act, std_act]
            self.normalization['diff'] = [mean_state_diff, std_state_diff]

            # compute delta
            if self.tar_type == 'delta':
                print(">>>>>>>>>>>>>>>>>>>>>>>>>>> Training On Differences <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
                self.normalization['targets'] = [mean_state_diff, std_state_diff]
            else:
                print(
                    ">>>>>>>>>>>>>>>>>>>>>>>>>>> Training On Next States(not differences) <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
                self.normalization['targets'] = [mean_obs, std_obs]

            # Standardize
            if self.standardize:
                print(">>>>>>>>>>>>>>>>>>>>>>>>>Standardizing The Data<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")

                self.train_obs = self.normalize(train_obs, self.normalization["observations"][0],
                                                self.normalization["observations"][1])
                self.train_acts = self.normalize(train_act, self.normalization["actions"][0],
                                                 self.normalization["actions"][1])

                self.test_obs = self.normalize(test_obs, self.normalization["observations"][0],
                                               self.normalization["observations"][1])
                self.test_acts = self.normalize(test_act, self.normalization["actions"][0],
                                                self.normalization["actions"][1])

                if self.tar_type == 'delta':
                    self.train_targets = self.normalize(train_delta, self.normalization["diff"][0],
                                                        self.normalization["diff"][1])
                    self.test_targets = self.normalize(test_delta, self.normalization["diff"][0],
                                                       self.normalization["diff"][1])
                else:
                    self.train_targets = self.normalize(train_next_obs, self.normalization["observations"][0],
                                                        self.normalization["observations"][1])
                    self.test_targets = self.normalize(test_next_obs, self.normalization["observations"][0],
                                                       self.normalization["observations"][1])


            else:
                self.train_obs = train_obs
                self.train_acts = train_act
                self.test_obs = test_obs
                self.test_acts = test_act
                if self.tar_type == 'delta':
                    self.train_targets = train_delta
                    self.test_targets = test_delta
                else:
                    self.train_targets = train_next_obs
                    self.test_targets = test_next_obs
            self.train_task_idx = train_grad
            self.test_task_idx = test_grad

            # Get random windows
            train_data_window = self._get_batch(train=True)
            test_data_window = self._get_batch(train=False)
            if self._save_windows is not None:
                pickle.dump(train_data_window, open(self._dataFolder + self.filename + '_train.pickle', 'wb'))
                pickle.dump(test_data_window, open(self._dataFolder + self.filename + '_test.pickle', 'wb'))

        if isinstance(self._shuffle_split, float):
            dataset_size = train_data_window['obs'].shape[0]
            indices = np.arange(dataset_size)
            print(indices, dataset_size)
            np.random.shuffle(indices)
            print(indices)
            split_idx = int(dataset_size * self._shuffle_split)
            idx_train = indices[:split_idx]
            idx_test = indices[split_idx:]
            train_set = {'obs': [], 'act': [], 'target': [], 'task_index': [], 'normalization': []}
            test_set = {'obs': [], 'act': [], 'target': [], 'task_index': [], 'normalization': []}
            train_set['obs'] = train_data_window['obs'][idx_train,:,:3];
            test_set['obs'] = train_data_window['obs'][idx_test,:,:3];
            train_set['act'] = train_data_window['act'][idx_train];
            test_set['act'] = train_data_window['act'][idx_test];
            train_set['target'] = train_data_window['target'][idx_train,:,:3];
            test_set['target'] = train_data_window['target'][idx_test,:,:3];
            train_set['task_index'] = train_data_window['task_index'][idx_train,:,3];
            test_set['task_index'] = train_data_window['task_index'][idx_test,:,3];
            train_set['normalization'] = self.normalization;
            test_set['normalization'] = self.normalization
            print('Train Test Split Ratio', self._shuffle_split)
            return train_set, test_set

        return train_data_window, test_data_window


    def _get_batch(self, train, percentage_imputation=0.0):
        # Takes multiple paths and splits them into windows based on random locations within a trajectory
        if train:
            num_paths, len_path = self.train_obs.shape[:2]
            idx_path = np.random.randint(0, num_paths,
                                         size=self.meta_batch_size)  # task index, which gets mixed along the
            # process
            idx_batch = np.random.randint(self.batch_size, len_path - self.batch_size, size=self.meta_batch_size)

            obs_batch = np.array([self.train_obs[ip,
                                  ib - self.batch_size:ib + self.batch_size, :]
                                  for ip, ib in zip(idx_path, idx_batch)])
            act_batch = np.array([self.train_acts[ip,
                                  ib - self.batch_size:ib + self.batch_size, :]
                                  for ip, ib in zip(idx_path, idx_batch)])
            target_batch = np.array([self.train_targets[ip,
                                     ib - self.batch_size:ib + self.batch_size, :]
                                     for ip, ib in zip(idx_path, idx_batch)])
            t_idx_batch = np.array([self.train_task_idx[ip,
                                    ib - self.batch_size:ib + self.batch_size]
                                    for ip, ib in zip(idx_path, idx_batch)])

        else:
            num_paths, len_path = self.test_obs.shape[:2]
            idx_path = np.random.randint(0, num_paths, size=self.meta_batch_size)
            idx_batch = np.random.randint(self.batch_size, len_path - self.batch_size, size=self.meta_batch_size)

            obs_batch = np.array([self.test_obs[ip,
                                  ib - self.batch_size:ib + self.batch_size, :]
                                  for ip, ib in zip(idx_path, idx_batch)])
            act_batch = np.array([self.test_acts[ip,
                                  ib - self.batch_size:ib + self.batch_size, :]
                                  for ip, ib in zip(idx_path, idx_batch)])
            target_batch = np.array([self.test_targets[ip,
                                     ib - self.batch_size:ib + self.batch_size, :]
                                     for ip, ib in zip(idx_path, idx_batch)])
            t_idx_batch = np.array([self.test_task_idx[ip,
                                  ib - self.batch_size:ib + self.batch_size]
                                  for ip, ib in zip(idx_path, idx_batch)])

        rs = np.random.RandomState(seed=42)
        obs_valid_batch = rs.rand(obs_batch.shape[0], obs_batch.shape[1], 1) < 1 - percentage_imputation
        obs_valid_batch[:, :5] = True

        data_windows = {'obs': obs_batch, 'act': act_batch, 'target': target_batch, 'obs_valid': obs_valid_batch,
                        'normalization': self.normalization,
                        'task_index': np.mean(t_idx_batch,-1)}  ###CLEVER TRICK %trajPerTask
        # TODO for the target(second half) initialize few things to True

        return data_windows

    def train_test_split(self, obs, act, delta, grad):
        print(obs.shape[0],act.shape[0],delta.shape[0])
        assert obs.shape[0] == act.shape[0] == delta.shape[0]
        assert isinstance(self._split, list) or isinstance(self._split, float)
        episodes = obs.shape[0]

        assert len(self._split) == 2
        idx_train = self._split[0]
        idx_test = self._split[1]
        print('Training Indices:', idx_train, 'Testing Indices:', idx_test)

        # idx_test = [8,9]

        assert len(idx_train) + len(idx_test) <= episodes

        return obs[idx_train, :], act[idx_train, :], delta[idx_train, :], grad[idx_train, :], \
               obs[idx_test, :], act[idx_test, :], delta[idx_test, :], grad[idx_test, :]


In [7]:

if __name__ == '__main__':
    dataFolder = os.getcwd() + '/data/MobileRobot/sin2/'
    # self._trajectoryPath = self._dataFolder + 'HalfCheetahEnv_6c2_cripple.pickle'
    trajectoryPath = dataFolder + 'ts_002_50x2000.npz'
    data = np.load(trajectoryPath)
    print(data.keys())
    print(np.sin(data['orn_euler']))
    print(np.cos(data['orn_euler']))
    print(data['orn_euler'])

KeysView(<numpy.lib.npyio.NpzFile object at 0x7ff4390d47d0>)
[[[-1.87221911e-05 -4.26430594e-02  3.71033138e-06]
  [-1.90942578e-05 -4.34857699e-02  3.79171458e-06]
  [-1.94666134e-05 -4.43288896e-02  3.87345301e-06]
  ...
  [-1.08461183e-04  1.65951306e-01 -3.25643621e-03]
  [-1.65225740e-04  1.67743412e-01 -3.25876662e-03]
  [-1.76083972e-04  1.69558887e-01 -3.25771456e-03]]

 [[-1.65184812e-05 -3.76408302e-02  3.23404593e-06]
  [-1.68449043e-05 -3.83809907e-02  3.30388923e-06]
  [-1.71715502e-05 -3.91214679e-02  3.37400593e-06]
  ...
  [-1.27586040e-03 -2.22571665e-01  4.04456655e-03]
  [-1.38144994e-03 -2.21449456e-01  4.06020347e-03]
  [-1.46614140e-03 -2.20313648e-01  4.06762874e-03]]

 [[-2.17341178e-05 -4.94703612e-02  4.37806023e-06]
  [-2.21689129e-05 -5.04535053e-02  4.47597836e-06]
  [-2.26041024e-05 -5.14372015e-02  4.57438169e-06]
  ...
  [ 6.56318952e-07  2.94274671e-01 -8.07648936e-04]
  [ 5.91791898e-05  2.93051721e-01 -7.96141366e-04]
  [ 9.00750063e-05  2.91759848e-0

In [53]:
import os
import sys

sys.path.append('.')
sys.path.append(os.getcwd())
sys.path.append('./experiments/mobileRobot/')

print(sys.path)


from omegaconf import DictConfig, OmegaConf
import hydra


import numpy as np
import torch
import wandb
import pickle

from data.mobileDataSeq import metaMobileData
from data.mobileDataSeq_Infer import metaMobileDataInfer
from meta_dynamic_models.neural_process_dynamics.neural_process.setFunctionContext import SetEncoder
from meta_dynamic_models.neural_process_dynamics.neural_process_ssm.recurrentEncoderDecoder import acrknContextualDecoder
from meta_dynamic_models.neural_process_dynamics.npDynamics import npDyn
from learning import hiprssm_dyn_trainer
from inference import hiprssm_dyn_inference
from utils.metrics import naive_baseline
from utils.dataProcess import split_k_m
from utils.metrics import root_mean_squared
from utils.latentVis import plot_clustering, plot_clustering_1d

nn = torch.nn

['/Applications/PyCharm.app/Contents/plugins/python/helpers-pro/jupyter_debug', '/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev', '/Users/I562243/Documents/to_send_GD/code/kit/HiP-RSSM-Internal', '/Users/I562243/Documents/to_send_GD/code/kit/HiP-RSSM-Internal', '/Users/I562243/opt/anaconda3/envs/Rohit/lib/python37.zip', '/Users/I562243/opt/anaconda3/envs/Rohit/lib/python3.7', '/Users/I562243/opt/anaconda3/envs/Rohit/lib/python3.7/lib-dynload', '', '/Users/I562243/opt/anaconda3/envs/Rohit/lib/python3.7/site-packages', '/Users/I562243/opt/anaconda3/envs/Rohit/lib/python3.7/site-packages/IPython/extensions', '/Users/I562243/.ipython', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '/Users/I562243/Documents/to_send_GD/code/kit/HiP-RSSM-Internal', '.', '/Users/I562243/Documents/to_send_GD/code/kit/HiP-RSSM-Internal', '.', '/Users/I562243/Documents/to_send_GD/code/kit/HiP-RSSM-Internal', './experiments/mobileRobot/']


In [60]:
def generate_mobile_robot_data_set(data, dim):
    train_windows, test_windows = data.train_windows, data.test_windows

    train_targets = train_windows['target'][:,:,:dim]
    test_targets = test_windows['target'][:,:,:dim]

    train_obs = train_windows['obs'][:,:,:dim]
    test_obs = test_windows['obs'][:,:,:dim]

    train_task_idx = train_windows['task_index']
    test_task_idx = test_windows['task_index']

    train_act = train_windows['act'][:,:,:dim]
    test_act = test_windows['act'][:,:,:dim]
    print(test_act.shape, train_act.shape)

    return torch.from_numpy(train_obs).float(), torch.from_numpy(train_act).float(), torch.from_numpy(train_targets).float(), torch.from_numpy(train_task_idx).float(),\
           torch.from_numpy(test_obs).float(), torch.from_numpy(test_act).float(), torch.from_numpy(test_targets).float(), torch.from_numpy(test_task_idx).float()

@hydra.main(config_path='conf',config_name='config')
def my_app(cfg)->OmegaConf:
    global config
    model_cfg = cfg.model
    exp = Experiment(model_cfg)




def main():
    my_app()



## https://stackoverflow.com/questions/32761999/how-to-pass-an-entire-list-as-command-line-argument-in-python/32763023
if __name__ == '__main__':
    main()


usage: ipykernel_launcher.py [--help] [--hydra-help] [--version]
                             [--cfg {job,hydra,all}] [--resolve]
                             [--package PACKAGE] [--run] [--multirun]
                             [--shell-completion] [--config-path CONFIG_PATH]
                             [--config-name CONFIG_NAME]
                             [--config-dir CONFIG_DIR]
                             [--info [{all,config,defaults,defaults-tree,plugins,searchpath}]]
                             [overrides [overrides ...]]
ipykernel_launcher.py: error: unrecognized arguments: -f


SystemExit: 2

In [55]:

class Experiment():
    def __init__(self, cfg):
        self.global_cfg = cfg
        self._experiment()


    def _experiment(self):
        """Data"""
        cfg = self.global_cfg
        torch.cuda.empty_cache()

        tar_type = cfg.data_reader.tar_type  # 'delta' - if to train on differences to current states
        # 'next_state' - if to trian directly on the  next states

        data = metaMobileData(cfg.data_reader)

        train_obs, train_act, train_targets, train_task_idx, test_obs, test_act, test_targets, test_task_idx = generate_mobile_robot_data_set(
            data,  cfg.data_reader.dim)
        act_dim = train_act.shape[-1]

        """Naive Baseline"""
        naive_baseline(test_obs[:, :-1, :], test_obs[:, 1:, :], steps=[1, 3, 5, 7, 10], data=data, denorma=True)

        ####
        impu = cfg.data_reader.imp

        save_path = os.getcwd() + '/experiments/saved_models/' + cfg.wandb.exp_name + '.ckpt'

        ##### Define WandB Stuffs
        expName = cfg.wandb.exp_name
        if cfg.wandb.log:
            mode = "online"
        else:
            mode = "disabled"

        ## Initializing wandb object and sweep object
        wandb_run = wandb.init(project=cfg.wandb.project_name, name=expName,
                               mode=mode)  # wandb object has a set of configs associated with it as well

        ### Model, Train and Inference Modules
        encoder = SetEncoder(
            train_obs.shape[-1] + train_act.shape[-1] + train_targets.shape[-1],
            lod=cfg.np.agg_dim, config=cfg.set_encoder)


        decoder = acrknContextualDecoder(ltd=cfg.np.agg_dim*2, target_dim=train_targets.shape[-1],
                                         lod=cfg.np.latent_obs_dim,
                                         lad=train_act.shape[-1], config=cfg.ssm_decoder)

        np_model = npDyn(encoder, decoder, dec_type='acrkn', config=cfg.np)
        np_learn = hiprssm_dyn_trainer.Learn(np_model, loss=cfg.learn.loss, imp=impu, config=cfg, run=wandb_run,
                                           log=cfg.wandb['log'])

        if cfg.learn.load == False:
            #### Train the Model
            np_learn.train(train_obs, train_act, train_targets, train_task_idx, cfg.learn.epochs, cfg.learn.batch_size,
                           test_obs, test_act,
                           test_targets, test_task_idx)

        if not cfg.wandb.sweep:
            ##### Load best model
            model_at = wandb_run.use_artifact('saved_model' + ':latest')
            model_path = model_at.download()  ###return the save durectory path in wandb local
            np_model.load_state_dict(torch.load(save_path))
            print('>>>>>>>>>>Loaded The Model From Local Folder<<<<<<<<<<<<<<<<<<<')

            ###### Inference

            ##########  Initialize inference class
            np_infer = hiprssm_dyn_inference.Infer(np_model, data=data, config=cfg, run=wandb_run)
            batch_size = 10
            k = int(train_obs.shape[1] / 2)
            pred_mean, pred_var, gt, obs_valid, _, _, cur_obs = np_infer.predict(test_obs, test_act, test_targets, test_task_idx,
                                                                        imp=impu, k=k,
                                                                        test_gt_known=True, batch_size=batch_size, tar=tar_type)
            print(pred_mean.shape, pred_var.shape, gt.shape, obs_valid.shape)



            rmse_next_state, pred_obs, gt_obs = root_mean_squared(pred_mean, gt, data,
                                                                      tar="observations", denorma=True)
            wandb_run.summary['rmse_denorma_next_state'] = rmse_next_state

            print("Root mean square Error is:", rmse_next_state)


            multiSteps = [1,3,5,10,20,30,40,50]
            for step in multiSteps:
                 pred_mean, pred_var, gt_multi = np_infer.predict_mbrl(test_obs, test_act, test_targets, k=k,
                                                                 batch_size=batch_size,
                                                                 multiStep=step, tar=tar_type)

                 rmse_next_state, pred_obs, gt_obs = root_mean_squared(pred_mean, gt_multi, data, tar="observations", denorma=True)
                 print(step,rmse_next_state)
                 wandb_run.summary['rmse_multi_step_' + str(step)] = rmse_next_state
