<a href="https://colab.research.google.com/github/NNehmer/stc-alberta/blob/main/STC_Alberta_Agent_V1_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%pip -q install --upgrade torch

In [2]:
import torch, sys
print("Torch:", torch.__version__, "Python:", sys.version.split()[0])

Torch: 2.8.0+cu126 Python: 3.12.11


In [3]:
"""
STC-Alberta Agent V2.0 (Improved)

Key improvements:
- Cleaner dual-optimizer without grad freeze dance
- Hyperparameter dataclass for clarity
- Better logging with projector rank tracking
- All three ablations (A1, A2, A3)
- Automatic correlation analysis
"""

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from collections import deque
import random
from typing import Dict, Tuple, List, Optional
from dataclasses import dataclass

def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)


# ============================================================================
# Configuration
# ============================================================================

@dataclass
class STCConfig:
    """Hyperparameters in one place"""
    # Architecture
    obs_dim: int = 6
    action_dim: int = 4
    latent_dim: int = 64
    intent_rank: int = 16

    # Training
    num_episodes: int = 200
    batch_size: int = 64
    buffer_size: int = 10000
    lr_main: float = 3e-4
    lr_proj: float = 1e-4
    gamma: float = 0.99

    # Exploration
    epsilon_start: float = 0.5
    epsilon_end: float = 0.05
    epsilon_decay: float = 0.992

    # Loss weights (final values)
    lambda_kappa: float = 0.05
    lambda_comm: float = 0.01
    lambda_cons: float = 1.0
    entropy_coef: float = 3e-4

    # Scheduling
    warmup_episodes: int = 30
    ramp_episodes: int = 80

    # Projector update
    proj_update_every: int = 4
    adv_weight_kappa: bool = True
    adv_temp: float = 0.5

    # Stability
    value_tau: float = 0.005
    grad_clip: float = 1.0

    # Environment
    max_steps: int = 200
    progress_bonus: float = 1.0  # Enable reward shaping


# ============================================================================
# Spectral Components
# ============================================================================

class SpectralProjector(nn.Module):
    def __init__(self, latent_dim: int, intent_rank: int):
        super().__init__()
        self.latent_dim = latent_dim
        self.intent_rank = intent_rank
        self.basis = nn.Parameter(torch.randn(latent_dim, intent_rank) * 0.1)

    def forward(self, z: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        Q, _ = torch.linalg.qr(self.basis)
        Pi_S = Q @ Q.T
        z_S = z @ Pi_S
        return z_S, Pi_S

    def get_effective_rank(self) -> float:
        """Compute effective rank via singular values"""
        with torch.no_grad():
            Q, _ = torch.linalg.qr(self.basis)
            Pi_S = Q @ Q.T
            s = torch.linalg.svdvals(Pi_S)
            s_norm = s / s.sum()
            entropy = -(s_norm * torch.log(s_norm + 1e-10)).sum()
            return torch.exp(entropy).item()


def coherence(z: torch.Tensor, z_S: torch.Tensor) -> torch.Tensor:
    norm_z = torch.norm(z, dim=-1, keepdim=True) + 1e-8
    norm_z_S = torch.norm(z_S, dim=-1, keepdim=True)
    return (norm_z_S / norm_z) ** 2


class ValueOperator(nn.Module):
    def __init__(self, latent_dim: int):
        super().__init__()
        self.W = nn.Parameter(torch.randn(latent_dim, latent_dim) * 0.01)

    def forward(self, z_S: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        V = 0.5 * (self.W + self.W.T)
        v = torch.einsum('bi,ij,bj->b', z_S, V, z_S)
        return v.unsqueeze(-1), V


# ============================================================================
# Agent
# ============================================================================

class STCAlbertaAgent(nn.Module):
    def __init__(self, config: STCConfig):
        super().__init__()
        self.config = config

        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(config.obs_dim, 128),
            nn.LayerNorm(128),
            nn.ReLU(),
            nn.Linear(128, config.latent_dim)
        )

        # Spectral core
        self.projector = SpectralProjector(config.latent_dim, config.intent_rank)
        self.value_op = ValueOperator(config.latent_dim)

        # EMA target
        self.value_target = ValueOperator(config.latent_dim)
        self.value_target.load_state_dict(self.value_op.state_dict())
        for p in self.value_target.parameters():
            p.requires_grad = False

        # Policy
        self.policy = nn.Sequential(
            nn.Linear(config.latent_dim, 128),
            nn.ReLU(),
            nn.Linear(128, config.action_dim)
        )

        # World model
        self.transition = nn.Sequential(
            nn.Linear(config.latent_dim + config.action_dim, 128),
            nn.ReLU(),
            nn.Linear(128, config.latent_dim)
        )

        self.reward_head = nn.Sequential(
            nn.Linear(config.latent_dim + config.action_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, obs: torch.Tensor, use_projection: bool = True) -> Dict[str, torch.Tensor]:
        z = self.encoder(obs)
        z_S, Pi_S = self.projector(z)

        # For policy: use z_S only if projection enabled
        policy_input = z_S if use_projection else z

        kappa = coherence(z, z_S)
        value, V = self.value_op(z_S)
        logits = self.policy(policy_input)

        return {
            'latent': z,
            'latent_S': z_S,
            'projector': Pi_S,
            'value_matrix': V,
            'kappa': kappa,
            'value': value,
            'logits': logits
        }

    def forward_proj_only(self, obs: torch.Tensor) -> Dict[str, torch.Tensor]:
        """Forward pass computing only spectral quantities (for separate proj update)"""
        with torch.no_grad():
            z = self.encoder(obs)

        z_S, Pi_S = self.projector(z)
        kappa = coherence(z, z_S)
        _, V = self.value_op(z_S)

        return {
            'latent': z,
            'latent_S': z_S,
            'projector': Pi_S,
            'value_matrix': V,
            'kappa': kappa
        }

    @torch.no_grad()
    def update_value_target(self):
        tau = self.config.value_tau
        for p, pt in zip(self.value_op.parameters(), self.value_target.parameters()):
            pt.data.mul_(1 - tau).add_(tau * p.data)

    def imagine(self, z_S: torch.Tensor, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        if actions.dim() == 1:
            a_oh = F.one_hot(actions.long(), self.config.action_dim).float()
        else:
            a_oh = actions.float()
        za = torch.cat([z_S, a_oh], dim=-1)
        z_S_next = self.transition(za)
        reward = self.reward_head(za)
        return z_S_next, reward

    def select_action(self, obs: torch.Tensor, epsilon: float = 0.0, use_projection: bool = True) -> int:
        with torch.no_grad():
            out = self.forward(obs.unsqueeze(0), use_projection=use_projection)
            if random.random() < epsilon:
                return random.randint(0, self.config.action_dim - 1)
            return out['logits'].argmax(dim=-1).item()


# ============================================================================
# Loss Functions
# ============================================================================

def compute_main_loss(
    agent: STCAlbertaAgent,
    obs: torch.Tensor,
    actions: torch.Tensor,
    rewards: torch.Tensor,
    next_obs: torch.Tensor,
    dones: torch.Tensor,
    lambda_cons: float,
    entropy_coef: float,
    use_projection: bool = True
) -> Dict[str, torch.Tensor]:
    """Main loss: RL + world model consistency"""

    out = agent.forward(obs, use_projection=use_projection)
    out_next = agent.forward(next_obs, use_projection=use_projection)

    z_S = out['latent_S']
    value = out['value']
    logits = out['logits']

    # Policy loss with advantage normalization
    dist = torch.distributions.Categorical(logits=logits)
    log_prob = dist.log_prob(actions)

    with torch.no_grad():
        v_target, _ = agent.value_target(out_next['latent_S'])
        td_target = rewards.unsqueeze(-1) + agent.config.gamma * v_target * (1 - dones.unsqueeze(-1))

    td_error = td_target - value
    adv = td_error.squeeze(-1).detach()
    adv_norm = (adv - adv.mean()) / (adv.std() + 1e-8)

    policy_loss = -(log_prob * adv_norm).mean()
    value_loss = F.mse_loss(value, td_target)
    entropy = dist.entropy().mean()

    L_RL = policy_loss + value_loss - entropy_coef * entropy

    # World model consistency
    z_S_next_pred, reward_pred = agent.imagine(z_S, actions)
    with torch.no_grad():
        v_next_pred, _ = agent.value_target(z_S_next_pred)

    q_pred = reward_pred + agent.config.gamma * v_next_pred * (1 - dones.unsqueeze(-1))
    q_target = rewards.unsqueeze(-1) + agent.config.gamma * v_target * (1 - dones.unsqueeze(-1))
    L_cons = F.mse_loss(q_pred, q_target)

    total = L_RL + lambda_cons * L_cons

    return {
        'total_loss': total,
        'L_RL': L_RL,
        'L_cons': L_cons,
        'policy_loss': policy_loss,
        'value_loss': value_loss,
        'entropy': entropy,
        'td_error': td_error.abs().mean(),
        'adv_mean': adv.mean(),
        'adv_pos_frac': (adv > 0).float().mean()
    }


def compute_proj_loss(
    agent: STCAlbertaAgent,
    obs: torch.Tensor,
    actions: torch.Tensor,
    rewards: torch.Tensor,
    next_obs: torch.Tensor,
    dones: torch.Tensor,
    lambda_kappa: float,
    lambda_comm: float,
    adv_weight: bool = False,
    adv_temp: float = 0.5
) -> Dict[str, torch.Tensor]:
    """Projector loss: coherence + commutator"""

    out = agent.forward_proj_only(obs)

    z_S = out['latent_S']
    Pi_S = out['projector']
    V = out['value_matrix']
    kappa = out['kappa']

    # Coherence loss (optionally advantage-weighted)
    if adv_weight:
        # Compute advantage from frozen main network
        with torch.no_grad():
            out_full = agent.forward(obs, use_projection=True)
            out_next = agent.forward(next_obs, use_projection=True)
            v_target, _ = agent.value_target(out_next['latent_S'])
            td_target = rewards.unsqueeze(-1) + agent.config.gamma * v_target * (1 - dones.unsqueeze(-1))
            adv = (td_target - out_full['value']).squeeze(-1)

        w = torch.sigmoid(adv / adv_temp)
        w = w / (w.mean() + 1e-8)
        L_rel = -(w * kappa.squeeze(-1)).mean()
    else:
        L_rel = -kappa.mean()

    # Commutator penalty
    comm = V @ Pi_S - Pi_S @ V
    L_comm = (torch.norm(comm, p='fro') ** 2) / V.numel()

    total = lambda_kappa * L_rel + lambda_comm * L_comm

    return {
        'total_loss': total,
        'L_rel': L_rel,
        'L_comm': L_comm,
        'kappa': kappa.mean()
    }


# ============================================================================
# Environment
# ============================================================================

class SimpleControlEnv:
    def __init__(self, max_steps: int = 200, progress_bonus: float = 0.0):
        self.state_dim = 6
        self.action_dim = 4
        self.max_steps = max_steps
        self.progress_bonus = progress_bonus
        self.reset()

    def reset(self) -> np.ndarray:
        self.pos = np.random.uniform(-1, 1, 2)
        self.vel = np.zeros(2)
        self.goal = np.random.uniform(-1, 1, 2)
        self.steps = 0
        self.prev_dist = float(np.linalg.norm(self.pos - self.goal))
        return self._get_obs()

    def _get_obs(self) -> np.ndarray:
        return np.concatenate([self.pos, self.goal, self.vel])

    def step(self, action: int):
        acc_map = {0: np.array([0, 0.1]), 1: np.array([0, -0.1]),
                   2: np.array([-0.1, 0]), 3: np.array([0.1, 0])}
        acc = acc_map[action]

        self.vel = np.clip(0.9 * self.vel + acc, -0.5, 0.5)
        self.pos = np.clip(self.pos + self.vel, -1, 1)

        dist = float(np.linalg.norm(self.pos - self.goal))
        progress = self.prev_dist - dist
        reward = -dist + self.progress_bonus * progress
        self.prev_dist = dist

        self.steps += 1
        done = (dist < 0.1) or (self.steps >= self.max_steps)

        return self._get_obs(), reward, done, {'distance': dist}


# ============================================================================
# Replay Buffer
# ============================================================================

class ReplayBuffer:
    def __init__(self, capacity: int = 10000):
        self.buffer = deque(maxlen=capacity)

    def push(self, obs, action, reward, next_obs, done):
        self.buffer.append((obs, action, reward, next_obs, done))

    def sample(self, batch_size: int) -> Dict[str, torch.Tensor]:
        batch = random.sample(self.buffer, batch_size)
        obs, actions, rewards, next_obs, dones = zip(*batch)
        return {
            'obs': torch.FloatTensor(np.array(obs)),
            'actions': torch.LongTensor(actions),
            'rewards': torch.FloatTensor(rewards),
            'next_obs': torch.FloatTensor(np.array(next_obs)),
            'dones': torch.FloatTensor(dones)
        }

    def __len__(self):
        return len(self.buffer)


# ============================================================================
# Training
# ============================================================================

def train_stc_alberta(config: STCConfig, print_every: int = 10):
    env = SimpleControlEnv(max_steps=config.max_steps, progress_bonus=config.progress_bonus)
    agent = STCAlbertaAgent(config)

    # Separate optimizers
    main_params = [p for n, p in agent.named_parameters()
                   if not n.startswith('projector.')]
    proj_params = list(agent.projector.parameters())

    optimizer_main = torch.optim.Adam(main_params, lr=config.lr_main)
    optimizer_proj = torch.optim.Adam(proj_params, lr=config.lr_proj)

    buffer = ReplayBuffer(capacity=config.buffer_size)
    epsilon = config.epsilon_start

    ep_rewards, ep_kappas, ep_dists = [], [], []
    comm_norms = []

    print("=" * 70)
    print("STC-Alberta Agent V2.0 Training")
    print("=" * 70)
    print(f"Config: {config.num_episodes} episodes, rank {config.intent_rank}/{config.latent_dim}")
    print("=" * 70)
    print()

    for episode in range(config.num_episodes):
        # Ramp schedule
        if episode < config.warmup_episodes:
            ramp = 0.0
        else:
            ramp = min(1.0, (episode - config.warmup_episodes) / max(1, config.ramp_episodes))

        lambda_kappa_eff = config.lambda_kappa * ramp
        lambda_comm_eff = config.lambda_comm * ramp

        # Collect episode
        obs = env.reset()
        ep_reward = 0.0
        ep_kappa_list = []
        done = False

        while not done:
            action = agent.select_action(torch.FloatTensor(obs), epsilon, use_projection=True)
            next_obs, reward, done, info = env.step(action)
            buffer.push(obs, action, reward, next_obs, float(done))

            with torch.no_grad():
                out = agent.forward(torch.FloatTensor(obs).unsqueeze(0), use_projection=True)
                ep_kappa_list.append(out['kappa'].item())

            obs = next_obs
            ep_reward += reward

        ep_rewards.append(ep_reward)
        ep_kappas.append(float(np.mean(ep_kappa_list)))
        ep_dists.append(info['distance'])

        # Training updates
        if len(buffer) >= config.batch_size:
            batch = buffer.sample(config.batch_size)

            # Main update
            losses_main = compute_main_loss(
                agent, **batch,
                lambda_cons=config.lambda_cons,
                entropy_coef=config.entropy_coef,
                use_projection=True
            )

            optimizer_main.zero_grad()
            losses_main['total_loss'].backward()
            torch.nn.utils.clip_grad_norm_(main_params, config.grad_clip)
            optimizer_main.step()

            # Projector update (less frequent)
            if ramp > 0 and (episode + 1) % config.proj_update_every == 0:
                losses_proj = compute_proj_loss(
                    agent, **batch,
                    lambda_kappa=lambda_kappa_eff,
                    lambda_comm=lambda_comm_eff,
                    adv_weight=config.adv_weight_kappa,
                    adv_temp=config.adv_temp
                )

                optimizer_proj.zero_grad()
                losses_proj['total_loss'].backward()
                torch.nn.utils.clip_grad_norm_(proj_params, config.grad_clip * 0.5)
                optimizer_proj.step()

                comm_norms.append(losses_proj['L_comm'].item())

            agent.update_value_target()

        epsilon = max(config.epsilon_end, epsilon * config.epsilon_decay)

        # Logging
        if (episode + 1) % print_every == 0:
            avg_r = float(np.mean(ep_rewards[-print_every:]))
            avg_k = float(np.mean(ep_kappas[-print_every:]))
            avg_d = float(np.mean(ep_dists[-print_every:]))
            eff_rank = agent.projector.get_effective_rank()

            print(f"Ep {episode+1:4d} | R: {avg_r:6.2f} | κ: {avg_k:.3f} | "
                  f"D: {avg_d:.3f} | ε: {epsilon:.3f} | ramp: {ramp:.2f} | eff_rank: {eff_rank:.1f}")

            if len(buffer) >= config.batch_size:
                print(f"          | L_RL: {losses_main['L_RL'].item():.4f} | "
                      f"L_cons: {losses_main['L_cons'].item():.4f} | "
                      f"H: {losses_main['entropy'].item():.3f}")
                if 'losses_proj' in locals():
                    print(f"          | L_rel: {losses_proj['L_rel'].item():.4f} | "
                          f"L_comm: {losses_proj['L_comm'].item():.6f}")

    # Final analysis
    k_tail = np.array(ep_kappas[-50:])
    r_tail = np.array(ep_rewards[-50:])
    corr = np.corrcoef(k_tail, r_tail)[0, 1] if len(k_tail) > 1 else 0.0

    print("\n" + "=" * 70)
    print("Training Complete")
    print("=" * 70)
    print(f"Final κ (last 50): {np.mean(k_tail):.3f} ± {np.std(k_tail):.3f}")
    print(f"Final reward (last 50): {np.mean(r_tail):.2f} ± {np.std(r_tail):.2f}")
    print(f"Final distance (last 50): {np.mean(ep_dists[-50:]):.3f}")
    print(f"Corr(κ, Return): {corr:.3f}")
    if comm_norms:
        print(f"Final commutator norm: {np.mean(comm_norms[-10:]):.6f}")
    print("=" * 70)

    return agent, ep_rewards, ep_kappas, ep_dists


# ============================================================================
# Ablations
# ============================================================================

def run_ablation_a1(config: STCConfig, num_episodes: int = 100) -> float:
    """A1: No projector (full latent space)"""
    print("\n" + "=" * 70)
    print("ABLATION A1: No Projector")
    print("=" * 70)

    env = SimpleControlEnv(progress_bonus=config.progress_bonus)
    agent = STCAlbertaAgent(config)
    optimizer = torch.optim.Adam(agent.parameters(), lr=config.lr_main)
    buffer = ReplayBuffer()

    ep_rewards = []
    epsilon = 0.1

    for episode in range(num_episodes):
        obs = env.reset()
        ep_reward = 0.0
        done = False

        while not done:
            action = agent.select_action(torch.FloatTensor(obs), epsilon, use_projection=False)
            next_obs, reward, done, _ = env.step(action)
            buffer.push(obs, action, reward, next_obs, float(done))
            obs = next_obs
            ep_reward += reward

        ep_rewards.append(ep_reward)

        if len(buffer) >= config.batch_size:
            batch = buffer.sample(config.batch_size)
            losses = compute_main_loss(agent, **batch, lambda_cons=config.lambda_cons,
                                      entropy_coef=config.entropy_coef, use_projection=False)
            optimizer.zero_grad()
            losses['total_loss'].backward()
            optimizer.step()
            agent.update_value_target()

    result = float(np.mean(ep_rewards[-30:]))
    print(f"A1 Result (last 30): {result:.2f}")
    return result


def run_ablation_a2(config: STCConfig, num_episodes: int = 100) -> float:
    """A2: Random fixed projector"""
    print("\n" + "=" * 70)
    print("ABLATION A2: Random Fixed Projector")
    print("=" * 70)

    env = SimpleControlEnv(progress_bonus=config.progress_bonus)
    agent = STCAlbertaAgent(config)

    # Freeze projector with random initialization
    for p in agent.projector.parameters():
        p.requires_grad = False

    optimizer = torch.optim.Adam([p for p in agent.parameters() if p.requires_grad],
                                 lr=config.lr_main)
    buffer = ReplayBuffer()

    ep_rewards = []
    epsilon = 0.1

    for episode in range(num_episodes):
        obs = env.reset()
        ep_reward = 0.0
        done = False

        while not done:
            action = agent.select_action(torch.FloatTensor(obs), epsilon, use_projection=True)
            next_obs, reward, done, _ = env.step(action)
            buffer.push(obs, action, reward, next_obs, float(done))
            obs = next_obs
            ep_reward += reward

        ep_rewards.append(ep_reward)

        if len(buffer) >= config.batch_size:
            batch = buffer.sample(config.batch_size)
            losses = compute_main_loss(agent, **batch, lambda_cons=config.lambda_cons,
                                      entropy_coef=config.entropy_coef, use_projection=True)
            optimizer.zero_grad()
            losses['total_loss'].backward()
            optimizer.step()
            agent.update_value_target()

    result = float(np.mean(ep_rewards[-30:]))
    print(f"A2 Result (last 30): {result:.2f}")
    return result


def run_ablation_a3(config: STCConfig, num_episodes: int = 100) -> float:
    """A3: Learned projector but no commutator penalty"""
    print("\n" + "=" * 70)
    print("ABLATION A3: No Commutator Penalty")
    print("=" * 70)

    config_a3 = STCConfig()
    config_a3.__dict__.update(config.__dict__)
    config_a3.lambda_comm = 0.0  # Disable commutator
    config_a3.num_episodes = num_episodes

    _, _, _, _ = train_stc_alberta(config_a3, print_every=20)

    # Return is printed in train function
    return 0.0


# ============================================================================
# Main
# ============================================================================

if __name__ == "__main__":
    config = STCConfig()

    # Main training
    agent, rewards, kappas, dists = train_stc_alberta(config, print_every=10)

    # Run ablations
    a1_result = run_ablation_a1(config, num_episodes=100)
    a2_result = run_ablation_a2(config, num_episodes=100)

    print("\n" + "=" * 70)
    print("FINAL COMPARISON")
    print("=" * 70)
    print(f"STC-Alberta (full):        {np.mean(rewards[-50:]):.2f}")
    print(f"A1 (no projector):         {a1_result:.2f}")
    print(f"A2 (random projector):     {a2_result:.2f}")
    print(f"Improvement over A1:       {np.mean(rewards[-50:]) - a1_result:.2f}")
    print(f"Improvement over A2:       {np.mean(rewards[-50:]) - a2_result:.2f}")
    print("=" * 70)


STC-Alberta Agent V2.0 Training
Config: 200 episodes, rank 16/64

Ep   10 | R: -326.91 | κ: 0.269 | D: 1.468 | ε: 0.461 | ramp: 0.00 | eff_rank: 16.0
          | L_RL: 1.8580 | L_cons: 2.3328 | H: 1.384
Ep   20 | R: -276.90 | κ: 0.400 | D: 1.456 | ε: 0.426 | ramp: 0.00 | eff_rank: 16.0
          | L_RL: 0.3901 | L_cons: 1.5106 | H: 1.381
Ep   30 | R: -219.47 | κ: 0.514 | D: 0.992 | ε: 0.393 | ramp: 0.00 | eff_rank: 16.0
          | L_RL: 0.1250 | L_cons: 0.9646 | H: 1.377
Ep   40 | R: -252.18 | κ: 0.525 | D: 1.138 | ε: 0.363 | ramp: 0.11 | eff_rank: 16.0
          | L_RL: 0.1573 | L_cons: 0.5367 | H: 1.376
          | L_rel: -0.5779 | L_comm: 0.000027
Ep   50 | R: -184.64 | κ: 0.598 | D: 0.735 | ε: 0.335 | ramp: 0.24 | eff_rank: 16.0
          | L_RL: 0.1053 | L_cons: 0.4001 | H: 1.374
          | L_rel: -0.6093 | L_comm: 0.000027
Ep   60 | R: -284.37 | κ: 0.625 | D: 1.404 | ε: 0.309 | ramp: 0.36 | eff_rank: 16.0
          | L_RL: 0.1423 | L_cons: 0.3001 | H: 1.368
          | L_rel: -