# Value gradient error for linear policies in LQG

Experiment description on [Overleaf](https://www.overleaf.com/read/cmbgmxxpxqzr).

**Versioning:** [CalVer](https://calver.org) `MM.DD.MICRO`

In [1]:
from __future__ import annotations

import logging
import os
import os.path as osp

import lqsvg
import lqsvg.envs.lqr.utils as lqg_util
import lqsvg.experiment.utils as utils
import lqsvg.torch.named as nt
import pytorch_lightning as pl
import ray
from lqsvg.experiment.data import build_datamodule
from lqsvg.experiment.models import LightningModel
from lqsvg.experiment.worker import make_worker
from ray import tune
from raylab.policy.model_based.lightning import LightningTrainerSpec
from torch import Tensor

import wandb

In [2]:
CWD = os.getcwd()

In [3]:
class InputStatistics(pl.callbacks.Callback):
    def on_train_batch_end(
        self,
        trainer: pl.Trainer,
        pl_module: pl.LightningModule,
        outputs: Tensor,
        batch: tuple[Tensor, Tensor, Tensor],
        batch_idx: int,
        dataloader_idx: int,
    ):
        del trainer, outputs, batch_idx, dataloader_idx
        obs, act, new_obs = batch
        pl_module.log("train/obs-mean", obs.mean())
        pl_module.log("train/obs-std", obs.std())
        pl_module.log("train/act-mean", act.mean())
        pl_module.log("train/act-std", act.std())
        pl_module.log("train/new_obs-mean", new_obs.mean())
        pl_module.log("train/new_obs-std", new_obs.std())

In [4]:
class Experiment(tune.Trainable):
    def setup(self, config: dict):
        self.run = wandb.init(
            dir=osp.join(CWD, ""),
            name="SVG Prediction",
            config=config,
            project="LQG-SVG",
            entity="angelovtt",
            tags=[utils.calver()],
            reinit=True,
            mode="online",
            save_code=True,
        )

        self.make_worker()
        self.make_model()
        self.make_datamodule()
        self.make_lightning_trainer()
        self.make_artifact()
        utils.suppress_lightning_info_logging()

    @property
    def hparams(self):
        return self.run.config

    def make_worker(self):
        with nt.suppress_named_tensor_warning():
            self.worker = make_worker(
                env_config=self.hparams.env_config,
                policy_config=self.hparams.policy,
                log_level=logging.WARNING,
            )

    def make_model(self):
        self.model = LightningModel(self.worker.get_policy(), self.worker.env)
        self.model.hparams.learning_rate = self.hparams.learning_rate
        self.model.hparams.mc_samples = self.hparams.mc_samples

    def make_datamodule(self):
        self.datamodule = build_datamodule(
            self.worker, total_trajs=self.hparams.total_trajs
        )
        self.datamodule.collect_trajectories(prog=False)

    def make_lightning_trainer(self):
        logger = pl.loggers.WandbLogger(
            save_dir=self.run.dir, log_model=False, experiment=self.run
        )

        early_stopping = pl.callbacks.EarlyStopping(
            monitor=LightningModel.early_stop_on,
            min_delta=float(self.hparams.improvement_delta),
            patience=int(self.hparams.patience),
            mode="min",
            strict=True,
        )
        checkpointing = pl.callbacks.ModelCheckpoint(
            dirpath=osp.join(self.run.dir, "checkpoints"),
            monitor=LightningModel.early_stop_on,
            save_top_k=-1,
            period=10,
            save_last=True,
        )
        self.trainer = pl.Trainer(
            default_root_dir=self.run.dir,
            logger=logger,
            num_sanity_val_steps=2,
            callbacks=[early_stopping, checkpointing, InputStatistics()],
            max_epochs=self.hparams.max_epochs,
            progress_bar_refresh_rate=0,  # don't show progress bar for model training
            weights_summary=None,  # don't print summary before training
        )

    def make_artifact(self):
        env = self.worker.env
        self.artifact = wandb.Artifact(
            f"svg_prediction-lqg{env.n_state}.{env.n_ctrl}.{env.horizon}", type="model"
        )

    def step(self) -> dict:
        self.log_env_info()
        with utils.suppress_dataloader_warning():
            self.trainer.fit(self.model, datamodule=self.datamodule)

            results = self.trainer.test(self.model, datamodule=self.datamodule)[0]
            self.run.summary.update(results)

        self.artifact.add_dir(self.trainer.checkpoint_callback.dirpath)
        self.run.log_artifact(self.artifact)
        return {tune.result.DONE: True, **results}

    def log_env_info(self):
        dynamics = self.worker.env.dynamics
        eigvals = lqg_util.stationary_eigvals(dynamics)
        tests = {
            "stability": lqg_util.isstable(eigvals=eigvals),
            "controllability": lqg_util.iscontrollable(dynamics),
        }
        self.run.summary.update(tests)
        self.run.summary.update({"Fs_eigvals": wandb.Histogram(eigvals)})

    def cleanup(self):
        self.run.finish()

In [5]:
ray.init(logging_level=logging.WARNING)
lqsvg.register_all()
utils.suppress_lightning_info_logging()

config = {
    "env_config": dict(
        n_state=2,
        n_ctrl=2,
        horizon=100,
        stationary=True,
        Fs_eigval_range=(0.0, 1.0),
        num_envs=100,
    ),
    "policy": {"initialization": "xavier_uniform"},
    "learning_rate": 1e-3,
    "mc_samples": 32,
    "total_trajs": 1000,
    "improvement_delta": 0.0,
    "patience": 3,
    "max_epochs": 200,
}

analysis = tune.run(Experiment, config=config, num_samples=4, local_dir="./results")

Trial name,status,loc
Experiment_6dbf9_00000,RUNNING,


[2m[36m(pid=13612)[0m wandb: Currently logged in as: angelovtt (use `wandb login --relogin` to force relogin)
[2m[36m(pid=13615)[0m wandb: Currently logged in as: angelovtt (use `wandb login --relogin` to force relogin)
[2m[36m(pid=13613)[0m wandb: Currently logged in as: angelovtt (use `wandb login --relogin` to force relogin)
[2m[36m(pid=13614)[0m wandb: Currently logged in as: angelovtt (use `wandb login --relogin` to force relogin)
[2m[36m(pid=13612)[0m wandb: Tracking run with wandb version 0.10.23
[2m[36m(pid=13612)[0m wandb: Syncing run SVG Prediction
[2m[36m(pid=13612)[0m wandb: ⭐️ View project at https://wandb.ai/angelovtt/LQG-SVG
[2m[36m(pid=13612)[0m wandb: 🚀 View run at https://wandb.ai/angelovtt/LQG-SVG/runs/1nrdaeun
[2m[36m(pid=13612)[0m wandb: Run data is saved locally in /Users/angelolovatto/Repositories/personal/LQG-SVG/experiments/wandb/wandb/run-20210324_140743-1nrdaeun
[2m[36m(pid=13612)[0m wandb: Run `wandb offline` to turn off syncing

[2m[36m(pid=13612)[0m 
[2m[36m(pid=13614)[0m 
[2m[36m(pid=13615)[0m 
[2m[36m(pid=13613)[0m 


[2m[36m(pid=13612)[0m GPU available: False, used: False
[2m[36m(pid=13612)[0m TPU available: None, using: 0 TPU cores
[2m[36m(pid=13612)[0m 2021-03-24 14:08:10,027	INFO trainable.py:100 -- Trainable.setup took 27.533 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=13613)[0m GPU available: False, used: False
[2m[36m(pid=13613)[0m TPU available: None, using: 0 TPU cores
[2m[36m(pid=13614)[0m GPU available: False, used: False
[2m[36m(pid=13614)[0m TPU available: None, using: 0 TPU cores
[2m[36m(pid=13614)[0m 2021-03-24 14:08:10,069	INFO trainable.py:100 -- Trainable.setup took 27.575 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=13615)[0m GPU available: False, used: False
[2m[36m(pid=13615)[0m TPU available: None, using: 0 TPU cores
[2m[36m(pid=13615)[0m 2021-03-24 14:08:10,150	INFO trai

[2m[36m(pid=13612)[0m --------------------------------------------------------------------------------
[2m[36m(pid=13612)[0m DATALOADER:0 TEST RESULTS
[2m[36m(pid=13612)[0m {'test/analytic_cossim': tensor(0.1866),
[2m[36m(pid=13612)[0m  'test/analytic_diff': tensor(17.0537),
[2m[36m(pid=13612)[0m  'test/analytic_svg_norm': tensor(9697.0586),
[2m[36m(pid=13612)[0m  'test/analytic_value': tensor(-5062.4160),
[2m[36m(pid=13612)[0m  'test/loss': tensor(351.4277),
[2m[36m(pid=13612)[0m  'test/monte_carlo_cossim': tensor(0.1868),
[2m[36m(pid=13612)[0m  'test/monte_carlo_diff': tensor(-132.6631),
[2m[36m(pid=13612)[0m  'test/monte_carlo_svg_norm': tensor(10097.2988),
[2m[36m(pid=13612)[0m  'test/monte_carlo_value': tensor(-5212.1328),
[2m[36m(pid=13612)[0m  'true_svg_norm': tensor(1673.1217),
[2m[36m(pid=13612)[0m  'true_value': tensor(-5079.4697)}
[2m[36m(pid=13612)[0m --------------------------------------------------------------------------------
R

Trial name,status,loc,iter,total time (s),test/loss,true_value,true_svg_norm
Experiment_6dbf9_00000,RUNNING,,,,,,
Experiment_6dbf9_00001,RUNNING,,,,,,
Experiment_6dbf9_00002,RUNNING,,,,,,
Experiment_6dbf9_00003,TERMINATED,,1.0,240.959,,-5079.47,1673.12


[2m[36m(pid=13615)[0m --------------------------------------------------------------------------------
[2m[36m(pid=13615)[0m DATALOADER:0 TEST RESULTS
[2m[36m(pid=13615)[0m {
[2m[36m(pid=13615)[0m 'test/analytic_cossim': 
[2m[36m(pid=13615)[0m tensor(0.2268),
[2m[36m(pid=13615)[0m  'test/analytic_diff': tensor(-8.5337),
[2m[36m(pid=13615)[0m  'test/analytic_svg_norm': 
[2m[36m(pid=13615)[0m tensor(668.1313),
[2m[36m(pid=13615)[0m  
[2m[36m(pid=13615)[0m 'test/analytic_value': tensor(-681.2368),
[2m[36m(pid=13615)[0m  
[2m[36m(pid=13615)[0m 'test/loss': 
[2m[36m(pid=13615)[0m tensor(235.5539),
[2m[36m(pid=13615)[0m  'test/monte_carlo_cossim': 
[2m[36m(pid=13615)[0m tensor(0.2239),
[2m[36m(pid=13615)[0m  'test/monte_carlo_diff'
[2m[36m(pid=13615)[0m : tensor(-13.2927)
[2m[36m(pid=13615)[0m ,
[2m[36m(pid=13615)[0m  'test/monte_carlo_svg_norm'
[2m[36m(pid=13615)[0m : 
[2m[36m(pid=13615)[0m tensor(663.5935)
[2m[36m(pid=13615)

[2m[36m(pid=13612)[0m wandb: Waiting for W&B process to finish, PID 13656
[2m[36m(pid=13612)[0m wandb: Program ended successfully.
[2m[36m(pid=13615)[0m wandb: Adding directory to artifact (/Users/angelolovatto/Repositories/personal/LQG-SVG/experiments/wandb/wandb/run-20210324_140743-26rc648x/files/checkpoints)... Done. 0.0s


Result for Experiment_6dbf9_00000:
  date: 2021-03-24_14-12-11
  done: true
  experiment_id: 4aa7abfb04564424a93d53be5e661326
  hostname: Angelos-MBP
  iterations_since_restore: 1
  node_ip: 192.168.15.8
  pid: 13615
  test/analytic_cossim: 0.22680942714214325
  test/analytic_diff: -8.53369140625
  test/analytic_svg_norm: 668.1312866210938
  test/analytic_value: -681.2367553710938
  test/loss: 235.55386352539062
  test/monte_carlo_cossim: 0.22388145327568054
  test/monte_carlo_diff: -13.292724609375
  test/monte_carlo_svg_norm: 663.593505859375
  test/monte_carlo_value: -685.9957885742188
  time_since_restore: 241.2482888698578
  time_this_iter_s: 241.2482888698578
  time_total_s: 241.2482888698578
  timestamp: 1616605931
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 6dbf9_00000
  true_svg_norm: 505.98089599609375
  true_value: -672.7030639648438
  


[2m[36m(pid=13615)[0m wandb: Waiting for W&B process to finish, PID 13655
[2m[36m(pid=13615)[0m wandb: Program ended successfully.
[2m[36m(pid=13613)[0m wandb: Adding directory to artifact (/Users/angelolovatto/Repositories/personal/LQG-SVG/experiments/wandb/wandb/run-20210324_140743-3pepdaao/files/checkpoints)... 
[2m[36m(pid=13613)[0m Done. 0.0s
[2m[36m(pid=13614)[0m wandb: Adding directory to artifact (/Users/angelolovatto/Repositories/personal/LQG-SVG/experiments/wandb/wandb/run-20210324_140743-2uceg1h7/files/checkpoints)... 
[2m[36m(pid=13614)[0m Done. 0.1s


[2m[36m(pid=13613)[0m --------------------------------------------------------------------------------
[2m[36m(pid=13613)[0m DATALOADER:0 TEST RESULTS
[2m[36m(pid=13613)[0m {'test/analytic_cossim': tensor(0.6133),
[2m[36m(pid=13613)[0m  'test/analytic_diff': tensor(3.2177),
[2m[36m(pid=13613)[0m  'test/analytic_svg_norm': tensor(222.3044),
[2m[36m(pid=13613)[0m  'test/analytic_value': tensor(-647.3267),
[2m[36m(pid=13613)[0m  'test/loss': tensor(301.6771),
[2m[36m(pid=13613)[0m  'test/monte_carlo_cossim': tensor(0.5788),
[2m[36m(pid=13613)[0m  'test/monte_carlo_diff': tensor(18.7900),
[2m[36m(pid=13613)[0m  'test/monte_carlo_svg_norm': tensor(217.4593),
[2m[36m(pid=13613)[0m  'test/monte_carlo_value': tensor(-631.7544),
[2m[36m(pid=13613)[0m  'true_svg_norm': tensor(565.4276),
[2m[36m(pid=13613)[0m  'true_value': tensor(-650.5444)}
[2m[36m(pid=13613)[0m --------------------------------------------------------------------------------
[2m[36m(p

[2m[36m(pid=13613)[0m wandb: Waiting for W&B process to finish, PID 13654
[2m[36m(pid=13613)[0m wandb: Program ended successfully.
[2m[36m(pid=13614)[0m wandb: Waiting for W&B process to finish, PID 13653
[2m[36m(pid=13614)[0m wandb: Program ended successfully.


Trial name,status,loc,iter,total time (s),test/loss,true_value,true_svg_norm
Experiment_6dbf9_00000,TERMINATED,,1,241.248,,-672.703,505.981
Experiment_6dbf9_00001,TERMINATED,,1,242.229,,-11567.3,26996.0
Experiment_6dbf9_00002,TERMINATED,,1,242.059,,-650.544,565.428
Experiment_6dbf9_00003,TERMINATED,,1,240.959,,-5079.47,1673.12


[2m[36m(pid=13612)[0m wandb: - 1.19MB of 1.19MB uploaded (0.00MB deduped)
[2m[36m(pid=13615)[0m wandb: - 1.19MB of 1.19MB uploaded (0.00MB deduped)
[2m[36m(pid=13613)[0m wandb: ERROR Error while calling W&B API: Error 1062: Duplicate entry '140944-22' for key 'unique_artifact_collection_membership_version' (<Response [409]>)
[2m[36m(pid=13614)[0m wandb: - 1.19MB of 1.19MB uploaded (0.00MB deduped)
wandb:                                                                                
[2m[36m(pid=13615)[0m wandb: \ 2.19MB of 2.24MB uploaded (0.00MB deduped)
[2m[36m(pid=13613)[0m wandb: - 1.19MB of 1.19MB uploaded (0.00MB deduped)
[2m[36m(pid=13612)[0m wandb: Find user logs for this run at: /Users/angelolovatto/Repositories/personal/LQG-SVG/experiments/wandb/wandb/run-20210324_140743-1nrdaeun/logs/debug.log
[2m[36m(pid=13612)[0m wandb: Find internal logs for this run at: /Users/angelolovatto/Repositories/personal/LQG-SVG/experiments/wandb/wandb/run-20210324_140743-

[2m[36m(pid=13612)[0m 


wandb:                                                                                
[2m[36m(pid=13614)[0m wandb: Find user logs for this run at: /Users/angelolovatto/Repositories/personal/LQG-SVG/experiments/wandb/wandb/run-20210324_140743-2uceg1h7/logs/debug.log
[2m[36m(pid=13614)[0m wandb: Find internal logs for this run at: /Users/angelolovatto/Repositories/personal/LQG-SVG/experiments/wandb/wandb/run-20210324_140743-2uceg1h7/logs/debug-internal.log
[2m[36m(pid=13614)[0m wandb: Run summary:
[2m[36m(pid=13614)[0m wandb:                   stability True
[2m[36m(pid=13614)[0m wandb:             controllability True
[2m[36m(pid=13614)[0m wandb:                    val/loss 442.57678
[2m[36m(pid=13614)[0m wandb:       val/monte_carlo_value -3661.51758
[2m[36m(pid=13614)[0m wandb:    val/monte_carlo_svg_norm 5190.45068
[2m[36m(pid=13614)[0m wandb:          val/analytic_value -3404.2644
[2m[36m(pid=13614)[0m wandb:       val/analytic_svg_norm 4690.47559
[2m

[2m[36m(pid=13614)[0m 


[2m[36m(pid=13615)[0m wandb: Find user logs for this run at: /Users/angelolovatto/Repositories/personal/LQG-SVG/experiments/wandb/wandb/run-20210324_140743-26rc648x/logs/debug.log
[2m[36m(pid=13615)[0m wandb: Find internal logs for this run at: /Users/angelolovatto/Repositories/personal/LQG-SVG/experiments/wandb/wandb/run-20210324_140743-26rc648x/logs/debug-internal.log
[2m[36m(pid=13615)[0m wandb: Run summary:
[2m[36m(pid=13615)[0m wandb:                   stability True
[2m[36m(pid=13615)[0m wandb:             controllability True
[2m[36m(pid=13615)[0m wandb:                    val/loss 236.32231
[2m[36m(pid=13615)[0m wandb:       val/monte_carlo_value -691.99103
[2m[36m(pid=13615)[0m wandb:    val/monte_carlo_svg_norm 685.31439
[2m[36m(pid=13615)[0m wandb:          val/analytic_value -681.23676
[2m[36m(pid=13615)[0m wandb:       val/analytic_svg_norm 668.13129
[2m[36m(pid=13615)[0m wandb:        val/monte_carlo_diff -19.28796
[2m[36m(pid=13615)[0m

[2m[36m(pid=13615)[0m 


wandb:                                                                                
[2m[36m(pid=13613)[0m wandb: Find user logs for this run at: /Users/angelolovatto/Repositories/personal/LQG-SVG/experiments/wandb/wandb/run-20210324_140743-3pepdaao/logs/debug.log
[2m[36m(pid=13613)[0m wandb: Find internal logs for this run at: /Users/angelolovatto/Repositories/personal/LQG-SVG/experiments/wandb/wandb/run-20210324_140743-3pepdaao/logs/debug-internal.log
[2m[36m(pid=13613)[0m wandb: Run summary:
[2m[36m(pid=13613)[0m wandb:                   stability True
[2m[36m(pid=13613)[0m wandb:             controllability True
[2m[36m(pid=13613)[0m wandb:                    val/loss 303.50375
[2m[36m(pid=13613)[0m wandb:       val/monte_carlo_value -599.3916
[2m[36m(pid=13613)[0m wandb:    val/monte_carlo_svg_norm 186.22202
[2m[36m(pid=13613)[0m wandb:          val/analytic_value -647.32672
[2m[36m(pid=13613)[0m wandb:       val/analytic_svg_norm 222.30438
[2m[36

In [6]:
ray.shutdown()