# Value gradient error for linear policies in LQG

Experiment description on [Overleaf](https://www.overleaf.com/read/cmbgmxxpxqzr).

**Versioning:** [CalVer](https://calver.org) `MM.DD.MICRO`

In [1]:
from __future__ import annotations
import os.path as osp
from datetime import date

import lqsvg.torch.named as nt
import pytorch_lightning as pl
import torch
import wandb
from raylab.policy.model_based.lightning import LightningTrainerSpec
from torch import Tensor

from data import build_datamodule
from models import LightningModel
from policy import make_worker
from utils import suppress_dataloader_warning

In [2]:
def make_lightning_trainer(run) -> pl.Trainer:
    spec = LightningTrainerSpec(max_epochs=20, patience=3, improvement_delta=0.0)
    logger = pl.loggers.WandbLogger(save_dir=run.dir, log_model=False, experiment=run)

    early_stopping = pl.callbacks.EarlyStopping(
        monitor=LightningModel.early_stop_on,
        min_delta=spec.improvement_delta,
        patience=spec.patience,
        mode="min",
        strict=True,
    )
    checkpointing = pl.callbacks.ModelCheckpoint(
        dirpath=osp.join(run.dir, "checkpoints"),
        monitor=LightningModel.early_stop_on,
        save_top_k=-1, 
        period=10, 
        save_last=True
    )
    trainer = pl.Trainer(
        default_root_dir=run.dir,
        logger=logger,
        num_sanity_val_steps=2,
        callbacks=[early_stopping, checkpointing],
        max_epochs=spec.max_epochs,
        max_steps=spec.max_steps,
    )
    return trainer

In [3]:
def calver() -> str:
    today = date.today()
    return f"{today.month}.{today.day}.0"

print("CalVer:", calver())

CalVer: 3.16.0


In [4]:
run = wandb.init(
    name="SVG Prediction",
    project="LQG-SVG",
    entity="angelovtt",
    tags=[calver()],
    reinit=True,
    mode="online",
    save_code=True,
)

with nt.suppress_named_tensor_warning():
    env_config = dict(n_state=2, n_ctrl=2, horizon=100, num_envs=100)
    worker = make_worker(env_config)

    model = LightningModel(worker.get_policy(), worker.env)
    model.hparams.learning_rate = 1e-3
    model.hparams.update(env_config)
    run.config.update(model.hparams)

    datamodule = build_datamodule(worker, total_trajs=5000)

    trainer = make_lightning_trainer(run)

[34m[1mwandb[0m: Currently logged in as: [33mangelovtt[0m (use `wandb login --relogin` to force relogin)


Collecting:   0%|          | 0/5000 [00:00<?, ?traj/s]

GPU available: False, used: False
TPU available: None, using: 0 TPU cores


In [5]:
with run, suppress_dataloader_warning():
    artifact = wandb.Artifact(f"svg_prediction-lqg{model.mdp.n_state}.{model.mdp.n_ctrl}.{model.mdp.horizon}", type="model")

    trainer.fit(model, datamodule=datamodule)

    results = trainer.test(model, datamodule=datamodule)
    run.log(results[0])
    
    artifact.add_dir(trainer.checkpoint_callback.dirpath)
    run.log_artifact(artifact)


  | Name            | Type           | Params
---------------------------------------------------
0 | actor           | TVLinearPolicy | 600   
1 | model           | LQGModule      | 3.6 K 
2 | mdp             | LQGModule      | 3.6 K 
3 | monte_carlo_svg | MonteCarloSVG  | 4.2 K 
4 | analytic_svg    | AnalyticSVG    | 4.2 K 
---------------------------------------------------
7.8 K     Trainable params
0         Non-trainable params
7.8 K     Total params


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Saving latest checkpoint...


Testing: 0it [00:00, ?it/s]

[34m[1mwandb[0m: Adding directory to artifact (/Users/angelolovatto/Repositories/personal/LQG-SVG/experiments/wandb/run-20210316_154047-16y05sct/files/checkpoints)... Done. 0.0s


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/analytic_cossim': tensor(0.1677),
 'test/analytic_diff': tensor(103.3631),
 'test/analytic_svg_norm': tensor(117.8333),
 'test/analytic_value': tensor(-483.5612),
 'test/loss': tensor(345.0986),
 'test/monte_carlo_cossim': tensor(0.1735),
 'test/monte_carlo_diff': tensor(112.0403),
 'test/monte_carlo_svg_norm': tensor(116.0092),
 'test/monte_carlo_value': tensor(-474.8840),
 'true_svg_norm': tensor(645.4822),
 'true_value': tensor(-586.9243)}
--------------------------------------------------------------------------------


VBox(children=(Label(value=' 2.45MB of 2.45MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train/loss,345.19943
epoch,19.0
_runtime,95.0
_timestamp,1615920142.0
_step,2199.0
val/loss,345.465
val/monte_carlo_value,-485.14645
val/monte_carlo_svg_norm,116.50011
val/analytic_value,-483.56125
val/analytic_svg_norm,117.83326


0,1
train/loss,█▆▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇████
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇████
_step,▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄█
val/loss,█▆▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val/monte_carlo_value,██▇▇▇▆▆▅▅▄▄▄▃▃▃▂▁▁▂▁
val/monte_carlo_svg_norm,▁▁▁▂▂▂▂▃▃▄▄▄▅▆▆▆▇█▇█
val/analytic_value,██▇▇▆▆▆▅▅▄▄▄▃▃▃▂▂▂▁▁
val/analytic_svg_norm,▁▁▁▂▂▂▂▃▃▃▄▄▅▅▆▆▇▇██
