# Value gradient error for linear policies in LQG

Experiment description on [Overleaf](https://www.overleaf.com/read/cmbgmxxpxqzr).

**Versioning:** [CalVer](https://calver.org) `MM.DD.MICRO`

In [1]:
from __future__ import annotations

import logging
import os.path as osp

import lqsvg
import lqsvg.experiment.utils as utils
import lqsvg.torch.named as nt
import pytorch_lightning as pl
import ray
from lqsvg.experiment.data import build_datamodule
from lqsvg.experiment.models import LightningModel
from lqsvg.experiment.worker import make_worker
from ray import tune
from raylab.policy.model_based.lightning import LightningTrainerSpec
from torch import Tensor

import wandb

In [2]:
class InputStatistics(pl.callbacks.Callback):
    def on_train_batch_end(
        self,
        trainer: pl.Trainer,
        pl_module: pl.LightningModule,
        outputs: Tensor,
        batch: tuple[Tensor, Tensor, Tensor],
        batch_idx: int,
        dataloader_idx: int,
    ):
        del trainer, outputs, batch_idx, dataloader_idx
        obs, act, new_obs = batch
        pl_module.log("train/obs-mean", obs.mean())
        pl_module.log("train/obs-std", obs.std())
        pl_module.log("train/act-mean", act.mean())
        pl_module.log("train/act-std", act.std())
        pl_module.log("train/new_obs-mean", new_obs.mean())
        pl_module.log("train/new_obs-std", new_obs.std())

In [3]:
class Experiment(tune.Trainable):
    def setup(self, config: dict):
        self.run = wandb.init(
            name="SVG Prediction",
            config=config,
            project="LQG-SVG",
            entity="angelovtt",
            tags=[utils.calver()],
            reinit=True,
            mode="online",
            save_code=True,
        )

        self.make_worker()
        self.make_model()
        self.make_datamodule()
        self.make_lightning_trainer()
        self.make_artifact()
        utils.suppress_lightning_info_logging()

    @property
    def hparams(self):
        return self.run.config

    def make_worker(self):
        with nt.suppress_named_tensor_warning():
            self.worker = make_worker(
                env_config=self.hparams.env_config, log_level=logging.WARNING
            )

    def make_model(self):
        self.model = LightningModel(self.worker.get_policy(), self.worker.env)
        self.model.hparams.learning_rate = self.hparams.learning_rate
        self.model.hparams.mc_samples = self.hparams.mc_samples

    def make_datamodule(self):
        self.datamodule = build_datamodule(
            self.worker, total_trajs=self.hparams.total_trajs
        )
        self.datamodule.collect_trajectories(prog=False)

    def make_lightning_trainer(self):
        logger = pl.loggers.WandbLogger(
            save_dir=self.run.dir, log_model=False, experiment=self.run
        )

        early_stopping = pl.callbacks.EarlyStopping(
            monitor=LightningModel.early_stop_on,
            min_delta=float(self.hparams.improvement_delta),
            patience=int(self.hparams.patience),
            mode="min",
            strict=True,
        )
        checkpointing = pl.callbacks.ModelCheckpoint(
            dirpath=osp.join(self.run.dir, "checkpoints"),
            monitor=LightningModel.early_stop_on,
            save_top_k=-1,
            period=10,
            save_last=True,
        )
        self.trainer = pl.Trainer(
            default_root_dir=self.run.dir,
            logger=logger,
            num_sanity_val_steps=2,
            callbacks=[early_stopping, checkpointing, InputStatistics()],
            max_epochs=self.hparams.max_epochs,
            progress_bar_refresh_rate=0,  # don't show progress bar for model training
            weights_summary=None,  # don't print summary before training
        )

    def make_artifact(self):
        env = self.worker.env
        self.artifact = wandb.Artifact(
            f"svg_prediction-lqg{env.n_state}.{env.n_ctrl}.{env.horizon}", type="model"
        )

    def step(self) -> dict:
        with utils.suppress_dataloader_warning():
            self.trainer.fit(self.model, datamodule=self.datamodule)

            results = self.trainer.test(self.model, datamodule=self.datamodule)[0]
            self.run.log(results)

            self.artifact.add_dir(self.trainer.checkpoint_callback.dirpath)
            self.run.log_artifact(self.artifact)

        return {tune.result.DONE: True, **results}

    def cleanup(self):
        self.run.finish()

In [4]:
ray.init(logging_level=logging.WARNING)
lqsvg.register_all()

config = {
    "env_config": dict(n_state=2, n_ctrl=2, horizon=100, num_envs=100),
    "learning_rate": 1e-3,
    "mc_samples": 32,
    "total_trajs": 1000,
    "improvement_delta": 0.0,
    "patience": 3,
    "max_epochs": 200,
}

analysis = tune.run(Experiment, config=config, num_samples=2)

Trial name,status,loc
Experiment_661c5_00000,RUNNING,


[2m[36m(pid=5915)[0m wandb: Currently logged in as: angelovtt (use `wandb login --relogin` to force relogin)
[2m[36m(pid=5916)[0m wandb: Currently logged in as: angelovtt (use `wandb login --relogin` to force relogin)
[2m[36m(pid=5915)[0m wandb: Tracking run with wandb version 0.10.23
[2m[36m(pid=5915)[0m wandb: Syncing run SVG Prediction
[2m[36m(pid=5915)[0m wandb: ⭐️ View project at https://wandb.ai/angelovtt/LQG-SVG
[2m[36m(pid=5915)[0m wandb: 🚀 View run at https://wandb.ai/angelovtt/LQG-SVG/runs/nn06qnr9
[2m[36m(pid=5915)[0m wandb: Run data is saved locally in /Users/angelolovatto/ray_results/Experiment_2021-03-24_11-15-34/Experiment_661c5_00000_0_2021-03-24_11-15-34/wandb/run-20210324_111542-nn06qnr9
[2m[36m(pid=5915)[0m wandb: Run `wandb offline` to turn off syncing.
[2m[36m(pid=5916)[0m wandb: Tracking run with wandb version 0.10.23
[2m[36m(pid=5916)[0m wandb: Syncing run SVG Prediction
[2m[36m(pid=5916)[0m wandb: ⭐️ View project at https://wandb

[2m[36m(pid=5915)[0m 
[2m[36m(pid=5916)[0m 


[2m[36m(pid=5915)[0m GPU available: False, used: False
[2m[36m(pid=5915)[0m TPU available: None, using: 0 TPU cores
[2m[36m(pid=5915)[0m 2021-03-24 11:15:55,366	INFO trainable.py:100 -- Trainable.setup took 14.351 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=5916)[0m GPU available: False, used: False
[2m[36m(pid=5916)[0m TPU available: None, using: 0 TPU cores
[2m[36m(pid=5916)[0m 2021-03-24 11:15:55,458	INFO trainable.py:100 -- Trainable.setup took 14.443 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=5915)[0m wandb: Adding directory to artifact (/Users/angelolovatto/ray_results/Experiment_2021-03-24_11-15-34/Experiment_661c5_00000_0_2021-03-24_11-15-34/wandb/run-20210324_111542-nn06qnr9/files/checkpoints)... 
[2m[36m(pid=5915)[0m Done. 0.1s


[2m[36m(pid=5915)[0m --------------------------------------------------------------------------------
[2m[36m(pid=5915)[0m DATALOADER:0 TEST RESULTS
[2m[36m(pid=5915)[0m {'test/analytic_cossim': tensor(0.3583),
[2m[36m(pid=5915)[0m  'test/analytic_diff': tensor(17.5688),
[2m[36m(pid=5915)[0m  'test/analytic_svg_norm': tensor(105.1212),
[2m[36m(pid=5915)[0m  'test/analytic_value': tensor(-331.5576),
[2m[36m(pid=5915)[0m  'test/loss': tensor(296.2312),
[2m[36m(pid=5915)[0m  'test/monte_carlo_cossim': tensor(0.3334),
[2m[36m(pid=5915)[0m  'test/monte_carlo_diff': tensor(32.5031),
[2m[36m(pid=5915)[0m  'test/monte_carlo_svg_norm': tensor(108.0275),
[2m[36m(pid=5915)[0m  'test/monte_carlo_value': tensor(-316.6234),
[2m[36m(pid=5915)[0m  'true_svg_norm': tensor(320.8534),
[2m[36m(pid=5915)[0m  'true_value': tensor(-349.1265)}
[2m[36m(pid=5915)[0m --------------------------------------------------------------------------------
Result for Experiment_66

Trial name,status,loc,iter,total time (s),test/loss,true_value,true_svg_norm
Experiment_661c5_00001,RUNNING,,,,,,
Experiment_661c5_00000,TERMINATED,,1.0,152.663,,-349.126,320.853


[2m[36m(pid=5915)[0m wandb: Waiting for W&B process to finish, PID 5944
[2m[36m(pid=5915)[0m wandb: Program ended successfully.
[2m[36m(pid=5916)[0m wandb: Adding directory to artifact (/Users/angelolovatto/ray_results/Experiment_2021-03-24_11-15-34/Experiment_661c5_00001_1_2021-03-24_11-15-34/wandb/run-20210324_111542-1mw7mitt/files/checkpoints)... 
[2m[36m(pid=5916)[0m Done. 0.0s


[2m[36m(pid=5916)[0m --------------------------------------------------------------------------------
[2m[36m(pid=5916)[0m DATALOADER:0 TEST RESULTS
[2m[36m(pid=5916)[0m {'test/analytic_cossim': tensor(0.3002),
[2m[36m(pid=5916)[0m  'test/analytic_diff': tensor(43.6211),
[2m[36m(pid=5916)[0m  'test/analytic_svg_norm': tensor(212.9159),
[2m[36m(pid=5916)[0m  'test/analytic_value': tensor(-489.2738),
[2m[36m(pid=5916)[0m  'test/loss': tensor(297.5763),
[2m[36m(pid=5916)[0m  'test/monte_carlo_cossim': tensor(0.2982),
[2m[36m(pid=5916)[0m  'test/monte_carlo_diff': tensor(31.6685),
[2m[36m(pid=5916)[0m  'test/monte_carlo_svg_norm': tensor(229.4241),
[2m[36m(pid=5916)[0m  'test/monte_carlo_value': tensor(-501.2264),
[2m[36m(pid=5916)[0m  'true_svg_norm': tensor(731.5237),
[2m[36m(pid=5916)[0m  'true_value': tensor(-532.8949)}
[2m[36m(pid=5916)[0m --------------------------------------------------------------------------------
Result for Experiment_66

Trial name,status,loc,iter,total time (s),test/loss,true_value,true_svg_norm
Experiment_661c5_00000,TERMINATED,,1,152.663,,-349.126,320.853
Experiment_661c5_00001,TERMINATED,,1,153.889,,-532.895,731.524


[2m[36m(pid=5916)[0m wandb: Waiting for W&B process to finish, PID 5942
[2m[36m(pid=5916)[0m wandb: Program ended successfully.
[2m[36m(pid=5915)[0m wandb: - 1.16MB of 1.16MB uploaded (0.00MB deduped)
[2m[36m(pid=5916)[0m wandb: - 1.16MB of 1.16MB uploaded (0.00MB deduped)
wandb:                                                                                
[2m[36m(pid=5915)[0m wandb: Find user logs for this run at: /Users/angelolovatto/ray_results/Experiment_2021-03-24_11-15-34/Experiment_661c5_00000_0_2021-03-24_11-15-34/wandb/run-20210324_111542-nn06qnr9/logs/debug.log
[2m[36m(pid=5915)[0m wandb: Find internal logs for this run at: /Users/angelolovatto/ray_results/Experiment_2021-03-24_11-15-34/Experiment_661c5_00000_0_2021-03-24_11-15-34/wandb/run-20210324_111542-nn06qnr9/logs/debug-internal.log
[2m[36m(pid=5915)[0m wandb: Run summary:
[2m[36m(pid=5915)[0m wandb:                    val/loss 297.48938
[2m[36m(pid=5915)[0m wandb:       val/monte_carlo_valu

[2m[36m(pid=5915)[0m 


[2m[36m(pid=5916)[0m wandb: Find user logs for this run at: /Users/angelolovatto/ray_results/Experiment_2021-03-24_11-15-34/Experiment_661c5_00001_1_2021-03-24_11-15-34/wandb/run-20210324_111542-1mw7mitt/logs/debug.log
[2m[36m(pid=5916)[0m wandb: Find internal logs for this run at: /Users/angelolovatto/ray_results/Experiment_2021-03-24_11-15-34/Experiment_661c5_00001_1_2021-03-24_11-15-34/wandb/run-20210324_111542-1mw7mitt/logs/debug-internal.log
[2m[36m(pid=5916)[0m wandb: Run summary:
[2m[36m(pid=5916)[0m wandb:                    val/loss 300.83276
[2m[36m(pid=5916)[0m wandb:       val/monte_carlo_value -472.69324
[2m[36m(pid=5916)[0m wandb:    val/monte_carlo_svg_norm 199.18625
[2m[36m(pid=5916)[0m wandb:          val/analytic_value -489.27383
[2m[36m(pid=5916)[0m wandb:       val/analytic_svg_norm 212.91592
[2m[36m(pid=5916)[0m wandb:        val/monte_carlo_diff 60.20166
[2m[36m(pid=5916)[0m wandb:           val/analytic_diff 43.62106
[2m[36m(pid=59

In [5]:
ray.shutdown()