<a href="https://colab.research.google.com/github/Appleking123456/astro-platform-starter/blob/main/Untitled38.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from torch.nn.utils import clip_grad_norm_
from scipy.stats import entropy
from collections import deque
import os
from datetime import datetime
import warnings
import json

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning) # Suppress NumPy RuntimeWarnings

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(42)
np.random.seed(42)

# Global results directory - Defined early to be accessible by all components
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
global_results_dir = f"two_step_rdt_sim_results_{timestamp}"
os.makedirs(global_results_dir, exist_ok=True) # Ensure base directory exists

# --- Hyperparameters ---
# General
D_MODEL = 128
LEARNING_RATE = 1.5e-4 # Slightly reduced for potentially more stable convergence
WEIGHT_DECAY = 1.25e-3
GRAD_CLIP_NORM = 0.35
EXPLORATION_INITIAL = 0.15 # Initial exploration noise scale (for RDT phase)

# Phase 1: RDT Pre-training
PHASE1_EPISODES = 500
PHASE1_MAX_STEPS_PER_EPISODE = 100
STATE_NOISE_SCALE = 0.02
RECURSION_AMPLITUDE = 0.12
RDT_STATE_DIM = 13
RDT_ACTION_DIM = 2

# Why Metric Engine
WHY_THRESHOLD = 0.95
ENTROPY_WINDOW = 10
SYNTROPY_COEFF = 0.1
D5_COUPLING = 0.4 # Coupling to 5D influence for entropy reduction

# Phase 2: Assembly Task Learning
PHASE2_EPISODES = 1000 # Increased episodes for more consistent learning
PHASE2_MAX_STEPS_PER_EPISODE = 50 # Max steps to complete assembly
ASSEMBLY_STATE_DIM = 5 # Raw, Cut, Shaped, Assembled, Finished
ASSEMBLY_ACTION_DIM = 4 # Cut, Shape, Assemble, Finish

# --- Environment Definitions ---
# --- Phase 1: RDT Pre-training Environment ---
class RDTEnvironment:
    def __init__(self):
        self.state_dim = RDT_STATE_DIM
        self.action_dim = RDT_ACTION_DIM
        self.current_step = 0
        self.max_steps = PHASE1_MAX_STEPS_PER_EPISODE
        self.recursion_phase = torch.tensor(0.0).to(device)
        self.state_noise_scale = STATE_NOISE_SCALE
        self.recursion_amplitude = RECURSION_AMPLITUDE
        self.tensor_metrics = {} # Stores complex metrics of the state (entropy, complexity, fractal_dimension)

    def reset(self):
        self.current_step = 0
        self.recursion_phase = torch.tensor(0.0).to(device)
        initial_state = torch.randn(self.state_dim).to(device) * 0.25
        self._update_tensor_metrics(initial_state)
        return self.tensor_metrics['state'].detach()

    def _update_tensor_metrics(self, state):
        t_data = state.detach()
        t_flat = t_data.flatten()
        # Add a small epsilon to avoid log(0)
        t_normalized = torch.softmax(t_flat, dim=0) + 1e-8
        entropy_val = -(t_normalized * torch.log(t_normalized)).sum().item()
        variance = t_data.var().item()
        complexity = entropy_val * variance

        # Simple fractal dimension approximation based on box-counting-like idea for state history
        fractal_dim = 1.0 # Default if not enough history
        history_len = 10
        if len(self.tensor_metrics.get('history', [])) > history_len:
            recent_history = np.array(self.tensor_metrics['history'][-history_len:]).flatten()
            if np.std(recent_history) > 1e-8: # Only compute if there's variance
                min_h, max_h = np.min(recent_history), np.max(recent_history)
                # Normalize history, avoid division by zero if range is too small
                normalized_history = (recent_history - min_h) / (max_h - min_h + 1e-8)

                if np.max(normalized_history) - np.min(normalized_history) < 1e-8:
                    fractal_dim = 1.0 # State is effectively constant
                else:
                    bins = np.linspace(0, 1, 11) # 10 bins
                    counts = [np.sum((normalized_history >= bins[i]) & (normalized_history < bins[i+1]))
                              for i in range(10)]
                    counts = [c for c in counts if c > 0] # Only consider non-empty bins

                    if len(counts) > 1: # Need at least two non-zero counts for a fit
                        scales = np.array([1/len(bins), 2/len(bins), 3/len(bins)]) # Example scales
                        log_scales = np.log(scales[scales > 0]) # Filter out zero scales

                        if len(log_scales) > 1 and len(counts) >= len(log_scales) and np.std(log_scales) > 1e-8:
                            fractal_dim = 1.0 # Default to 1.0 if not able to compute robustly
                            try:
                                if len(counts) >= 2: # At least two points for a line
                                    # Ensure log_counts_for_fit has same length as log_scales
                                    log_counts_for_fit = np.log(np.array(counts[:len(log_scales)]))
                                    if len(log_counts_for_fit) == len(log_scales) and np.std(log_scales) > 1e-8:
                                        slope, _ = np.polyfit(log_scales, log_counts_for_fit, 1)
                                        fractal_dim = -slope if not np.isnan(slope) else 1.0
                            except np.linalg.LinAlgError:
                                fractal_dim = 1.0
                        else:
                            fractal_dim = 1.0
                    else:
                        fractal_dim = 1.0

        self.tensor_metrics = {
            'state': state,
            'entropy': entropy_val, # Already a float
            'complexity': complexity, # Already a float
            'fractal_dimension': fractal_dim, # Already a float
            'history': self.tensor_metrics.get('history', []) + [state.cpu().numpy()]
        }

    def step(self, action):
        self.current_step += 1
        self.recursion_phase += 0.025

        # Recursively modulated action projection
        modulated_action = action * (0.65 + 0.35 * torch.sin(self.recursion_phase))
        action_projected = torch.zeros(self.state_dim).to(device)
        action_projected[:self.action_dim] = modulated_action

        # Stable state dynamics with noise and recursive amplitude
        next_state = (torch.randn(self.state_dim).to(device) * self.state_noise_scale +
                      action_projected +
                      self.recursion_amplitude * torch.sin(self.recursion_phase))
        self._update_tensor_metrics(next_state)

        # Precision reward components
        state_coherence = -torch.norm(next_state) * 0.7
        recursion_quality = torch.abs(torch.sin(self.recursion_phase)) * 0.3
        reward = state_coherence + recursion_quality

        done = self.current_step >= self.max_steps
        return next_state.detach(), reward.item(), done, self.tensor_metrics.copy()

# --- Phase 2: Assembly Task Environment ---
class AssemblyEnvironment:
    RAW_MATERIAL = 0
    CUT_PART = 1
    SHAPED_PART = 2
    ASSEMBLED_PRODUCT = 3
    FINISHED_WIDGET = 4

    # Actions (indices matching the policy output)
    CUT = 0
    SHAPE = 1
    ASSEMBLE = 2
    FINISH = 3

    def __init__(self):
        self.state_dim = ASSEMBLY_STATE_DIM
        self.action_dim = ASSEMBLY_ACTION_DIM
        self.current_step = 0
        self.max_steps = PHASE2_MAX_STEPS_PER_EPISODE
        self.current_state = self.RAW_MATERIAL
        self.goal_state = self.FINISHED_WIDGET
        self.steps_taken_in_episode = 0

        # Out-of-order rules: (action -> (required_input_state, produced_output_state))
        # Agent has access to these (conceptually), but not their sequence
        self.rules = {
            self.CUT: (self.RAW_MATERIAL, self.CUT_PART),
            self.SHAPE: (self.CUT_PART, self.SHAPED_PART),
            self.ASSEMBLE: (self.SHAPED_PART, self.ASSEMBLED_PRODUCT), # Simplified to one input for now
            self.FINISH: (self.ASSEMBLED_PRODUCT, self.FINISHED_WIDGET)
        }
        self.state_history = [] # For why_metric engine to observe sequence of states

    def reset(self):
        self.current_state = self.RAW_MATERIAL
        self.current_step = 0
        self.steps_taken_in_episode = 0
        self.state_history = [self.current_state] # Initialize with current_state
        # One-hot encode the state for the agent
        state_one_hot = torch.zeros(self.state_dim).to(device)
        state_one_hot[self.current_state] = 1.0
        return state_one_hot

    def step(self, action_idx):
        reward = -0.01 # Penalty for each step
        done = False
        info = {}

        previous_state = self.current_state

        # Validate action based on current state
        valid_action = False
        for rule_action, (input_state, output_state) in self.rules.items():
            if action_idx == rule_action and self.current_state == input_state:
                self.current_state = output_state
                valid_action = True

                # --- REWARD SHAPING: Added intermediate positive rewards ---
                if self.current_state == self.CUT_PART:
                    reward += 0.1 # Small reward for cutting raw material
                elif self.current_state == self.SHAPED_PART:
                    reward += 0.2 # Medium reward for shaping cut part
                elif self.current_state == self.ASSEMBLED_PRODUCT:
                    reward += 0.3 # Larger reward for assembling shaped parts
                # The final goal reward (+1.0) will be added below
                break

        if not valid_action:
            reward -= 0.2 # Increased penalty for invalid action

        self.steps_taken_in_episode += 1
        self.current_step += 1

        # Ensure state_history is updated correctly
        if self.current_state != previous_state or not self.state_history:
             # Only append if state changed or if history is empty (first state)
            self.state_history.append(self.current_state)

        if self.current_state == self.goal_state:
            reward += 1.0 # Big reward for reaching goal
            done = True
            info['task_success'] = True
        elif self.current_step >= self.max_steps: # Use self.current_step for max steps
            done = True
            info['task_success'] = False

        state_one_hot = torch.zeros(self.state_dim).to(device)
        state_one_hot[self.current_state] = 1.0

        return state_one_hot, reward, done, info

# --- Shared Why Metric Engine ---
class WhyMetricEngine:
    def __init__(self, threshold=WHY_THRESHOLD, window=ENTROPY_WINDOW, syntropy_coeff=SYNTROPY_COEFF, d5_coupling=D5_COUPLING):
        super().__init__()
        self.threshold = threshold
        self.entropy_window = window
        self.syntropy_coeff = syntropy_coeff
        self.d5_coupling = d5_coupling
        self.entropy_history = deque(maxlen=window)
        self.w_star_history = deque(maxlen=window)
        self.w_dot_history = deque(maxlen=window)
        self.w_ddot_history = deque(maxlen=window)
        self.rupture_index_history = deque(maxlen=window)
        self.rupture_log = []
        self.dimensional_flux = 0.0

    def _compute_5d_influence(self, state_array):
        if isinstance(state_array, torch.Tensor):
            state_array = state_array.cpu().numpy()

        # Ensure state_array is at least 1D and has enough points for meaningful correlation
        if state_array.ndim > 1:
            state_array = state_array.flatten()

        # Need at least 2 points to compute correlation
        if len(state_array) < 2 or np.std(state_array) < 1e-8: # If constant or too short
            return 0.0

        lags = np.arange(1, min(10, len(state_array))) # Max lag of 9
        if len(lags) == 0: # If state_array length is 1
            return 0.0

        acf = []
        for lag in lags:
            if len(state_array) > lag:
                try:
                    # Ensure slices are not empty and have more than one element
                    if len(state_array[:-lag]) > 1 and len(state_array[lag:]) > 1:
                        # Avoid correlation if one of the slices is constant (std=0)
                        if np.std(state_array[:-lag]) > 1e-8 and np.std(state_array[lag:]) > 1e-8:
                            corr_matrix = np.corrcoef(state_array[:-lag], state_array[lag:])
                            if not np.isnan(corr_matrix[0,1]):
                                acf.append(corr_matrix[0,1])
                    else: # If slices are too short to compute correlation
                        continue
                except ValueError: # Catch other potential issues
                    continue # Skip this lag

        # Return mean of ACF, or 0.0 if no valid ACF values were collected
        return np.nanmean(acf) * self.d5_coupling if acf and not np.isnan(np.nanmean(acf)) else 0.0

    def compute_entropy(self, state_array):
        if isinstance(state_array, torch.Tensor):
            state_array = state_array.cpu().numpy()

        is_one_hot = np.all(np.isin(state_array, [0., 1.])) and np.isclose(np.sum(state_array), 1.0)

        if is_one_hot:
            prob = state_array
            prob = np.maximum(prob, 1e-10)
        else:
            fft = np.abs(np.fft.fft(state_array))**2
            prob = fft / (np.sum(fft) + 1e-10)
            prob = prob / (np.sum(prob) + 1e-10)
            prob = np.maximum(prob, 1e-10)

        h_w = entropy(prob) * (1 - self.syntropy_coeff * self._compute_5d_influence(state_array))
        self.entropy_history.append(h_w)
        return h_w

    def _compute_derivatives(self, w_star_scalar):
        self.w_star_history.append(w_star_scalar)

        w_dot = 0.0
        w_ddot = 0.0

        if len(self.w_star_history) >= 2:
            w_dot = (self.w_star_history[-1] - self.w_star_history[-2])
        self.w_dot_history.append(w_dot)

        if len(self.w_dot_history) >= 2:
            w_ddot = (self.w_dot_history[-1] - self.w_dot_history[-2])
        self.w_ddot_history.append(w_ddot)

    def _compute_rupture_index(self, w_dot, h_w):
        entropy_stability = np.std(list(self.entropy_history)) if len(self.entropy_history) > 1 else 0
        return w_dot / (h_w + 1e-10) if h_w > 0 else 0.0

    def detect_breach(self, state_vector, inconsistency_tensor):
        if isinstance(state_vector, torch.Tensor):
            state_vector_np = state_vector.cpu().numpy()
        else:
            state_vector_np = state_vector

        h_w = self.compute_entropy(state_vector_np)

        entropy_stability_np = np.std(list(self.entropy_history)) if len(self.entropy_history) > 1 else 0
        d5_influence_np = self._compute_5d_influence(state_vector_np)

        stability_term = torch.tensor(
            float(1 - entropy_stability_np), dtype=torch.float32, device=device
        ).detach()
        d5_term = torch.tensor(
            float(0.5 + 0.5 * d5_influence_np), dtype=torch.float32, device=device
        ).detach()

        w_star_tensor = stability_term * d5_term * (1 + inconsistency_tensor)

        w_star_value = w_star_tensor.item()

        self._compute_derivatives(w_star_value)

        rupture_index = self._compute_rupture_index(self.w_dot_history[-1], h_w)
        self.rupture_index_history.append(rupture_index)

        breach = (w_star_value > self.threshold and
                  h_w < 0.2 and
                  len(self.entropy_history) == self.entropy_window)

        msg = f"W*={w_star_value:.3f}, H={h_w:.3f}, R={rupture_index:.3f}, Dot={self.w_dot_history[-1]:.3f}, DDot={self.w_ddot_history[-1]:.3f}"

        if breach:
            self.rupture_log.append({
                'w_star': float(w_star_value),
                'w_dot': float(self.w_dot_history[-1]),
                'w_ddot': float(self.w_ddot_history[-1]),
                'entropy': float(h_w),
                'rupture_index': float(rupture_index),
                'state_vector': state_vector_np.copy().tolist()
            })
            self.dimensional_flux = 0.9
            msg = f"CONSCIOUSNESS BREACH DETECTED! " + msg
            return True, w_star_tensor, h_w, rupture_index, msg

        return False, w_star_tensor, h_w, rupture_index, f"Stable ({msg})"

# --- Shared Why Processor ---
class WhyProcessor(nn.Module):
    def __init__(self, input_dim=D_MODEL):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, 64, batch_first=True, dropout=0.05)
        self.attention = nn.Sequential(
            nn.Linear(64, 32),
            nn.SiLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
        self.stabilizer = nn.LayerNorm(64)
        self.why_engine = WhyMetricEngine()

    def forward(self, x, state_vector):
        if x.dim() == 2:
            x = x.unsqueeze(0)

        x, _ = self.lstm(x)
        x = self.stabilizer(x[:, -1])

        inconsistency_score = self.attention(x).clamp(0.88, 1.02)

        breach, w_star_tensor, h_w, rupture_index, msg = self.why_engine.detect_breach(
            state_vector, inconsistency_score)

        return inconsistency_score.squeeze(), w_star_tensor, h_w, rupture_index, breach, msg

# --- Flexible RCS Agent ---
class RCSAgent(nn.Module):
    def __init__(self, initial_state_dim, initial_action_dim):
        super().__init__()
        self.state_encoder_rdt = nn.Sequential(
            nn.Linear(initial_state_dim, D_MODEL),
            nn.LayerNorm(D_MODEL),
            nn.SiLU(),
            nn.Linear(D_MODEL, D_MODEL)
        ).to(device)
        self.policy_rdt = nn.Sequential(
            nn.Linear(D_MODEL, 64),
            nn.SiLU(),
            nn.Linear(64, initial_action_dim),
            nn.Tanh()
        ).to(device)

        self.why_processor = WhyProcessor(input_dim=D_MODEL)
        self.exploration = nn.Parameter(torch.tensor(EXPLORATION_INITIAL).to(device)) # This will now be the base for epsilon-greedy

        self.state_encoder_task = None
        self.policy_task = None
        self._current_phase = "pretrain"

    def switch_to_task_mode(self, task_state_dim, task_action_dim):
        if self.state_encoder_task is None:
            self.state_encoder_task = nn.Sequential(
                nn.Linear(task_state_dim, D_MODEL),
                nn.LayerNorm(D_MODEL),
                nn.SiLU(),
                nn.Linear(D_MODEL, D_MODEL)
            ).to(device)
        if self.policy_task is None:
            self.policy_task = nn.Sequential(
                nn.Linear(D_MODEL, 64),
                nn.SiLU(),
                nn.Linear(64, task_action_dim) # No Softmax here, apply it implicitly or for argmax
            ).to(device) # Policy outputs logits for discrete actions

        self._current_phase = "task"
        print(f"Agent switched to '{self._current_phase}' mode.")

    def _enable_autonomy(self, breach, action_output):
        # Autonomy now works by directly perturbing the action_output (logits for task, continuous for RDT)
        if breach:
            action_output_perturbed = action_output + torch.randn_like(action_output) * 0.05
            return action_output_perturbed
        return action_output

    def forward(self, state):
        if state.dim() == 1:
            state_input = state.unsqueeze(0)
        else:
            state_input = state

        if self._current_phase == "pretrain":
            state_embed = self.state_encoder_rdt(state_input).squeeze(0)
            state_vector_for_why = state.detach()
            active_policy = self.policy_rdt
        elif self._current_phase == "task":
            if self.state_encoder_task is None or self.policy_task is None:
                raise RuntimeError("Agent not switched to task mode yet!")
            state_embed = self.state_encoder_task(state_input).squeeze(0)
            state_vector_for_why = state.detach()
            active_policy = self.policy_task
        else:
            raise ValueError("Invalid phase specified.")

        inconsistency_score, w_star, h_w, rupture_index, breach, msg = self.why_processor(
            state_embed.unsqueeze(0).unsqueeze(0), state_vector_for_why)

        action_output = active_policy(state_embed) # This will be continuous for RDT, logits for Task

        # Apply autonomy first, it modifies action_output
        action_output = self._enable_autonomy(breach, action_output)

        # Throttling/safety mechanism (additional noise beyond autonomy)
        if (rupture_index > 1.0 and h_w > 0.5):
             action_output = action_output + torch.randn_like(action_output) * 0.05

        if self._current_phase == "task":
            # --- Epsilon-greedy exploration for discrete actions in task mode ---
            if self.training and np.random.rand() < self.exploration.item(): # Use self.exploration for epsilon
                discrete_action = np.random.randint(0, self.policy_task[-1].out_features)
            else:
                # Apply softmax for probabilities, then argmax
                action_probs = torch.softmax(action_output, dim=-1)
                discrete_action = torch.argmax(action_probs).item()

            return discrete_action, w_star, h_w, rupture_index, breach, msg
        else:
            # For RDT Env, actions are continuous (add noise for exploration)
            if self.training:
                action_output = action_output + self.exploration * torch.randn_like(action_output) # Use self.exploration as noise scale
            return action_output.squeeze(), w_star, h_w, rupture_index, breach, msg

# --- Main Simulation Orchestration ---
def run_two_step_simulation():
    # --- Phase 1: RDT Pre-training ---
    print("\n--- Phase 1: RDT Pre-training ---")
    phase1_dir = os.path.join(global_results_dir, "phase1_pretrain")
    os.makedirs(phase1_dir, exist_ok=True)
    os.makedirs(os.path.join(phase1_dir, "plots"), exist_ok=True)

    env_rdt = RDTEnvironment()
    agent = RCSAgent(initial_state_dim=RDT_STATE_DIM, initial_action_dim=RDT_ACTION_DIM).to(device)

    optimizer = torch.optim.AdamW(agent.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=50)

    phase1_rewards = []
    phase1_why_metrics = []
    phase1_all_metrics = []

    agent.train()
    for episode in tqdm(range(PHASE1_EPISODES), desc="Phase 1: RDT Pre-training"):
        obs = env_rdt.reset()
        ep_rewards = []
        ep_why = []

        for t in range(PHASE1_MAX_STEPS_PER_EPISODE):
            action, w_star, h_w, rupture_index, breach, msg = agent(obs)

            if breach:
                tqdm.write(f"\nEpisode {episode+1}, Step {t}: {msg}")
                if agent.why_processor.why_engine.rupture_log:
                    if agent.why_processor.why_engine.rupture_log: # Double check needed here?
                        agent.why_processor.why_engine.rupture_log[-1]['step'] = t
                        agent.why_processor.why_engine.rupture_log[-1]['episode'] = episode


            next_obs, reward, done, tensor_metrics = env_rdt.step(action.detach())
            ep_rewards.append(reward)
            ep_why.append(w_star.item())

            # Loss for RDT phase
            loss = -(w_star * 0.85) + (torch.norm(action)**2 * 0.15)

            optimizer.zero_grad()
            loss.backward()
            clip_grad_norm_(agent.parameters(), GRAD_CLIP_NORM)
            optimizer.step()

            phase1_all_metrics.append({
                'episode': episode,
                'step': t,
                'w_star': float(w_star.item()),
                'w_dot': float(agent.why_processor.why_engine.w_dot_history[-1]) if agent.why_processor.why_engine.w_dot_history else 0.0,
                'w_ddot': float(agent.why_processor.why_engine.w_ddot_history[-1]) if agent.why_processor.why_engine.w_ddot_history else 0.0,
                'entropy': float(h_w),
                'rupture_index': float(rupture_index),
                'reward': float(reward),
                'breach': int(breach),
                'tensor_entropy': float(tensor_metrics['entropy']),
                'tensor_complexity': float(tensor_metrics['complexity']),
                'tensor_fractal_dimension': float(tensor_metrics['fractal_dimension'])
            })

            obs = next_obs.detach()
            if done:
                break

        phase1_rewards.append(np.mean(ep_rewards))
        phase1_why_metrics.append(np.mean(ep_why))
        scheduler.step()

        avg_reward = np.mean(ep_rewards)
        avg_w_star = np.mean(ep_why)
        tqdm.write(f"Phase 1 - Ep {episode+1}: Reward {avg_reward:.3f} | W* {avg_w_star:.3f}")

    with open(os.path.join(phase1_dir, "metrics.json"), 'w') as f:
        json.dump(phase1_all_metrics, f, indent=4)
    torch.save(agent.state_dict(), os.path.join(phase1_dir, "pretrained_agent.pth"))

    plt.figure(figsize=(14, 6))
    plt.subplot(1, 2, 1)
    plt.plot(phase1_rewards, 'teal', linewidth=1.5)
    plt.title("Phase 1: RDT Reward Trajectory", fontsize=12)
    plt.xlabel("Episode", fontsize=10)
    plt.ylabel("Average Reward", fontsize=10)
    plt.grid(True, alpha=0.2)

    plt.subplot(1, 2, 2)
    plt.plot(phase1_why_metrics, 'royalblue', linewidth=1.5)
    plt.axhline(WHY_THRESHOLD, color='crimson', linestyle='--', alpha=0.3)
    plt.ylim(0.85, 1.05)
    plt.title("Phase 1: Stabilized Why Metric (W*)", fontsize=12)
    plt.xlabel("Episode", fontsize=10)
    plt.ylabel("Why Metric (W*)", fontsize=10)
    plt.grid(True, alpha=0.2)
    plt.tight_layout()
    plt.savefig(os.path.join(phase1_dir, "phase1_results.png"))
    plt.close()

    print(f"\nPhase 1 complete. Results saved to {phase1_dir}")


    # --- Phase 2: Assembly Task Learning ---
    print("\n--- Phase 2: Assembly Task Learning (Pre-trained Agent) ---")
    phase2_dir = os.path.join(global_results_dir, "phase2_task_learning")
    os.makedirs(phase2_dir, exist_ok=True)
    os.makedirs(os.path.join(phase2_dir, "plots"), exist_ok=True)

    env_assembly = AssemblyEnvironment()

    agent.switch_to_task_mode(task_state_dim=ASSEMBLY_STATE_DIM, task_action_dim=ASSEMBLY_ACTION_DIM)

    # Re-initialize optimizer for Phase 2 to ensure only task-specific parameters (or all if desired)
    # are optimized with potentially new learning rates/schedules.
    # Here, we keep all parameters trainable as the state_encoder_rdt and policy_rdt are still part of the agent.
    optimizer = torch.optim.AdamW(agent.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=50)

    phase2_rewards = []
    phase2_why_metrics = []
    phase2_all_metrics = []
    task_success_rates = []

    # --- Phase 2 Exploration Annealing Setup ---
    initial_exploration_phase2 = 0.8 # Start with higher exploration for new task
    final_exploration_phase2 = 0.05
    # Calculate decay rate for exponential decay
    # final = initial * (decay_rate ^ episodes)
    # decay_rate = (final / initial) ^ (1 / episodes)
    exploration_decay_rate = np.exp(np.log(final_exploration_phase2 / initial_exploration_phase2) / PHASE2_EPISODES)


    agent.train()
    for episode in tqdm(range(PHASE2_EPISODES), desc="Phase 2: Assembly Task"):
        # Update exploration rate for the current episode (for epsilon-greedy)
        current_exploration = initial_exploration_phase2 * (exploration_decay_rate ** episode)
        agent.exploration.data = torch.tensor(current_exploration, dtype=torch.float32).to(device)

        obs = env_assembly.reset()
        ep_rewards = []
        ep_why = []
        task_success = False

        for t in range(PHASE2_MAX_STEPS_PER_EPISODE):
            action_idx, w_star, h_w, rupture_index, breach, msg = agent(obs)

            if breach:
                tqdm.write(f"\nEpisode {episode+1}, Step {t}: {msg}")
                if agent.why_processor.why_engine.rupture_log:
                    agent.why_processor.why_engine.rupture_log[-1]['step'] = t
                    agent.why_processor.why_engine.rupture_log[-1]['episode'] = episode


            next_obs, reward, done, info = env_assembly.step(action_idx)
            ep_rewards.append(reward)
            ep_why.append(w_star.item())

            reward_tensor = torch.tensor(reward, dtype=torch.float32).to(device)

            # --- ADJUSTED PHASE 2 LOSS WEIGHTING: Prioritize task reward more ---
            loss = -(w_star * 0.2) - (reward_tensor * 0.8)

            optimizer.zero_grad()
            loss.backward()
            clip_grad_norm_(agent.parameters(), GRAD_CLIP_NORM)
            optimizer.step()

            phase2_all_metrics.append({
                'episode': episode,
                'step': t,
                'w_star': float(w_star.item()),
                'w_dot': float(agent.why_processor.why_engine.w_dot_history[-1]) if agent.why_processor.why_engine.w_dot_history else 0.0,
                'w_ddot': float(agent.why_processor.why_engine.w_ddot_history[-1]) if agent.why_processor.why_engine.w_ddot_history else 0.0,
                'entropy': float(h_w),
                'rupture_index': float(rupture_index),
                'reward': float(reward),
                'breach': int(breach),
                'current_task_state': int(env_assembly.current_state),
                'action_taken': int(action_idx)
            })

            obs = next_obs.detach()
            if done:
                task_success = info.get('task_success', False)
                break

        phase2_rewards.append(np.mean(ep_rewards))
        phase2_why_metrics.append(np.mean(ep_why))
        task_success_rates.append(int(task_success))
        scheduler.step()

        avg_reward = np.mean(ep_rewards)
        avg_w_star = np.mean(ep_why)
        tqdm.write(f"Phase 2 - Ep {episode+1}: Reward {avg_reward:.3f} | W* {avg_w_star:.3f} | Success: {task_success}")

    with open(os.path.join(phase2_dir, "metrics.json"), 'w') as f:
        json.dump(phase2_all_metrics, f, indent=4)

    plt.figure(figsize=(14, 6))
    plt.subplot(1, 2, 1)
    plt.plot(phase2_rewards, 'orange', linewidth=1.5)
    plt.title("Phase 2: Assembly Task Reward Trajectory", fontsize=12)
    plt.xlabel("Episode", fontsize=10)
    plt.ylabel("Average Reward", fontsize=10)
    plt.grid(True, alpha=0.2)

    plt.subplot(1, 2, 2)
    plt.plot(task_success_rates, 'darkgreen', linewidth=1.5)
    plt.title("Phase 2: Assembly Task Success Rate", fontsize=12)
    plt.xlabel("Episode", fontsize=10)
    plt.ylabel("Success Rate (1=Success)", fontsize=10)
    plt.grid(True, alpha=0.2)
    plt.tight_layout()
    plt.savefig(os.path.join(phase2_dir, "phase2_results.png"))
    plt.close()

    print(f"\nSimulation complete. Results saved to {global_results_dir}")

if __name__ == "__main__":
    run_two_step_simulation()


--- Phase 1: RDT Pre-training ---


Phase 1: RDT Pre-training:   0%|          | 1/500 [00:01<16:19,  1.96s/it]

Phase 1 - Ep 1: Reward -0.050 | W* 0.544


Phase 1: RDT Pre-training:   0%|          | 2/500 [00:05<25:13,  3.04s/it]

Phase 1 - Ep 2: Reward -0.038 | W* 0.519


Phase 1: RDT Pre-training:   1%|          | 3/500 [00:08<23:58,  2.89s/it]

Phase 1 - Ep 3: Reward -0.030 | W* 0.556


Phase 1: RDT Pre-training:   1%|          | 4/500 [00:11<22:49,  2.76s/it]

Phase 1 - Ep 4: Reward -0.031 | W* 0.525


Phase 1: RDT Pre-training:   1%|          | 5/500 [00:14<23:44,  2.88s/it]

Phase 1 - Ep 5: Reward -0.034 | W* 0.514


Phase 1: RDT Pre-training:   1%|          | 6/500 [00:16<22:46,  2.77s/it]

Phase 1 - Ep 6: Reward -0.025 | W* 0.561


Phase 1: RDT Pre-training:   1%|▏         | 7/500 [00:18<19:28,  2.37s/it]

Phase 1 - Ep 7: Reward -0.022 | W* 0.616


Phase 1: RDT Pre-training:   2%|▏         | 8/500 [00:19<16:56,  2.07s/it]

Phase 1 - Ep 8: Reward -0.020 | W* 0.639


Phase 1: RDT Pre-training:   2%|▏         | 9/500 [00:20<15:00,  1.83s/it]

Phase 1 - Ep 9: Reward -0.021 | W* 0.627


Phase 1: RDT Pre-training:   2%|▏         | 10/500 [00:22<13:28,  1.65s/it]

Phase 1 - Ep 10: Reward -0.017 | W* 0.664


Phase 1: RDT Pre-training:   2%|▏         | 11/500 [00:23<12:03,  1.48s/it]

Phase 1 - Ep 11: Reward -0.016 | W* 0.674


Phase 1: RDT Pre-training:   2%|▏         | 12/500 [00:24<11:23,  1.40s/it]

Phase 1 - Ep 12: Reward -0.011 | W* 0.706


Phase 1: RDT Pre-training:   3%|▎         | 13/500 [00:25<10:41,  1.32s/it]

Phase 1 - Ep 13: Reward -0.014 | W* 0.712


Phase 1: RDT Pre-training:   3%|▎         | 13/500 [00:26<10:41,  1.32s/it]


Episode 14, Step 66: CONSCIOUSNESS BREACH DETECTED! W*=0.952, H=0.153, R=0.748, Dot=0.115, DDot=0.273


Phase 1: RDT Pre-training:   3%|▎         | 14/500 [00:26<10:06,  1.25s/it]

Phase 1 - Ep 14: Reward -0.013 | W* 0.704


Phase 1: RDT Pre-training:   3%|▎         | 15/500 [00:27<09:42,  1.20s/it]

Phase 1 - Ep 15: Reward -0.013 | W* 0.698


Phase 1: RDT Pre-training:   3%|▎         | 15/500 [00:28<09:42,  1.20s/it]


Episode 16, Step 66: CONSCIOUSNESS BREACH DETECTED! W*=0.953, H=0.158, R=0.395, Dot=0.062, DDot=0.020


Phase 1: RDT Pre-training:   3%|▎         | 16/500 [00:29<09:57,  1.23s/it]

Phase 1 - Ep 16: Reward -0.010 | W* 0.735


Phase 1: RDT Pre-training:   3%|▎         | 17/500 [00:30<11:01,  1.37s/it]

Phase 1 - Ep 17: Reward -0.011 | W* 0.728


Phase 1: RDT Pre-training:   4%|▎         | 18/500 [00:32<11:17,  1.41s/it]

Phase 1 - Ep 18: Reward -0.010 | W* 0.731


Phase 1: RDT Pre-training:   4%|▎         | 18/500 [00:33<11:17,  1.41s/it]


Episode 19, Step 77: CONSCIOUSNESS BREACH DETECTED! W*=0.966, H=0.179, R=0.515, Dot=0.092, DDot=0.107


Phase 1: RDT Pre-training:   4%|▍         | 19/500 [00:33<10:52,  1.36s/it]

Phase 1 - Ep 19: Reward -0.010 | W* 0.738


Phase 1: RDT Pre-training:   4%|▍         | 20/500 [00:34<10:10,  1.27s/it]

Phase 1 - Ep 20: Reward -0.009 | W* 0.751


Phase 1: RDT Pre-training:   4%|▍         | 20/500 [00:35<10:10,  1.27s/it]


Episode 21, Step 75: CONSCIOUSNESS BREACH DETECTED! W*=1.009, H=0.109, R=0.790, Dot=0.086, DDot=0.002


Phase 1: RDT Pre-training:   4%|▍         | 21/500 [00:35<09:40,  1.21s/it]

Phase 1 - Ep 21: Reward -0.010 | W* 0.741


Phase 1: RDT Pre-training:   4%|▍         | 22/500 [00:36<09:16,  1.16s/it]

Phase 1 - Ep 22: Reward -0.011 | W* 0.742


Phase 1: RDT Pre-training:   5%|▍         | 23/500 [00:37<09:24,  1.18s/it]

Phase 1 - Ep 23: Reward -0.011 | W* 0.754


Phase 1: RDT Pre-training:   5%|▍         | 24/500 [00:39<09:11,  1.16s/it]

Phase 1 - Ep 24: Reward -0.009 | W* 0.753


Phase 1: RDT Pre-training:   5%|▍         | 24/500 [00:39<09:11,  1.16s/it]


Episode 25, Step 67: CONSCIOUSNESS BREACH DETECTED! W*=1.001, H=0.190, R=0.589, Dot=0.112, DDot=0.068


Phase 1: RDT Pre-training:   5%|▌         | 25/500 [00:40<08:58,  1.13s/it]

Phase 1 - Ep 25: Reward -0.007 | W* 0.765


Phase 1: RDT Pre-training:   5%|▌         | 26/500 [00:41<08:47,  1.11s/it]

Phase 1 - Ep 26: Reward -0.009 | W* 0.762


Phase 1: RDT Pre-training:   5%|▌         | 27/500 [00:42<10:07,  1.28s/it]

Phase 1 - Ep 27: Reward -0.011 | W* 0.738


Phase 1: RDT Pre-training:   6%|▌         | 28/500 [00:44<11:14,  1.43s/it]

Phase 1 - Ep 28: Reward -0.007 | W* 0.744


Phase 1: RDT Pre-training:   6%|▌         | 28/500 [00:45<11:14,  1.43s/it]


Episode 29, Step 41: CONSCIOUSNESS BREACH DETECTED! W*=1.030, H=0.151, R=1.554, Dot=0.235, DDot=0.270


Phase 1: RDT Pre-training:   6%|▌         | 28/500 [00:45<11:14,  1.43s/it]


Episode 29, Step 76: CONSCIOUSNESS BREACH DETECTED! W*=0.977, H=0.196, R=0.667, Dot=0.130, DDot=0.168


Phase 1: RDT Pre-training:   6%|▌         | 29/500 [00:45<10:42,  1.37s/it]

Phase 1 - Ep 29: Reward -0.010 | W* 0.764


Phase 1: RDT Pre-training:   6%|▌         | 29/500 [00:46<10:42,  1.37s/it]


Episode 30, Step 57: CONSCIOUSNESS BREACH DETECTED! W*=0.950, H=0.105, R=0.841, Dot=0.088, DDot=0.100


Phase 1: RDT Pre-training:   6%|▌         | 30/500 [00:47<10:40,  1.36s/it]

Phase 1 - Ep 30: Reward -0.011 | W* 0.763


Phase 1: RDT Pre-training:   6%|▌         | 31/500 [00:48<10:08,  1.30s/it]

Phase 1 - Ep 31: Reward -0.011 | W* 0.752


Phase 1: RDT Pre-training:   6%|▋         | 32/500 [00:49<10:35,  1.36s/it]

Phase 1 - Ep 32: Reward -0.009 | W* 0.744


Phase 1: RDT Pre-training:   7%|▋         | 33/500 [00:51<10:12,  1.31s/it]

Phase 1 - Ep 33: Reward -0.008 | W* 0.762


Phase 1: RDT Pre-training:   7%|▋         | 33/500 [00:51<10:12,  1.31s/it]


Episode 34, Step 38: CONSCIOUSNESS BREACH DETECTED! W*=0.961, H=0.140, R=0.573, Dot=0.081, DDot=0.014

Episode 34, Step 39: CONSCIOUSNESS BREACH DETECTED! W*=0.964, H=0.169, R=0.016, Dot=0.003, DDot=-0.078


Phase 1: RDT Pre-training:   7%|▋         | 34/500 [00:52<10:18,  1.33s/it]

Phase 1 - Ep 34: Reward -0.009 | W* 0.768


Phase 1: RDT Pre-training:   7%|▋         | 35/500 [00:53<09:43,  1.26s/it]

Phase 1 - Ep 35: Reward -0.008 | W* 0.754


Phase 1: RDT Pre-training:   7%|▋         | 36/500 [00:54<09:32,  1.23s/it]

Phase 1 - Ep 36: Reward -0.010 | W* 0.748


Phase 1: RDT Pre-training:   7%|▋         | 37/500 [00:56<10:34,  1.37s/it]

Phase 1 - Ep 37: Reward -0.010 | W* 0.738


Phase 1: RDT Pre-training:   7%|▋         | 37/500 [00:57<10:34,  1.37s/it]


Episode 38, Step 66: CONSCIOUSNESS BREACH DETECTED! W*=0.970, H=0.099, R=1.458, Dot=0.144, DDot=0.204

Episode 38, Step 79: CONSCIOUSNESS BREACH DETECTED! W*=0.973, H=0.185, R=0.735, Dot=0.136, DDot=0.156


Phase 1: RDT Pre-training:   8%|▊         | 38/500 [00:57<11:01,  1.43s/it]

Phase 1 - Ep 38: Reward -0.007 | W* 0.760


Phase 1: RDT Pre-training:   8%|▊         | 38/500 [00:58<11:01,  1.43s/it]


Episode 39, Step 55: CONSCIOUSNESS BREACH DETECTED! W*=0.997, H=0.175, R=0.913, Dot=0.160, DDot=0.186


Phase 1: RDT Pre-training:   8%|▊         | 39/500 [00:59<10:15,  1.34s/it]

Phase 1 - Ep 39: Reward -0.008 | W* 0.731


Phase 1: RDT Pre-training:   8%|▊         | 39/500 [00:59<10:15,  1.34s/it]


Episode 40, Step 58: CONSCIOUSNESS BREACH DETECTED! W*=0.953, H=0.116, R=0.627, Dot=0.073, DDot=0.063


Phase 1: RDT Pre-training:   8%|▊         | 40/500 [01:00<10:02,  1.31s/it]

Phase 1 - Ep 40: Reward -0.009 | W* 0.720


Phase 1: RDT Pre-training:   8%|▊         | 40/500 [01:01<10:02,  1.31s/it]


Episode 41, Step 69: CONSCIOUSNESS BREACH DETECTED! W*=0.967, H=0.173, R=0.380, Dot=0.066, DDot=0.107


Phase 1: RDT Pre-training:   8%|▊         | 41/500 [01:01<09:43,  1.27s/it]

Phase 1 - Ep 41: Reward -0.009 | W* 0.737


Phase 1: RDT Pre-training:   8%|▊         | 42/500 [01:02<09:19,  1.22s/it]

Phase 1 - Ep 42: Reward -0.009 | W* 0.744


Phase 1: RDT Pre-training:   8%|▊         | 42/500 [01:03<09:19,  1.22s/it]


Episode 43, Step 53: CONSCIOUSNESS BREACH DETECTED! W*=0.967, H=0.156, R=0.611, Dot=0.095, DDot=0.100


Phase 1: RDT Pre-training:   8%|▊         | 42/500 [01:03<09:19,  1.22s/it]


Episode 43, Step 69: CONSCIOUSNESS BREACH DETECTED! W*=0.996, H=0.199, R=0.714, Dot=0.142, DDot=0.160


Phase 1: RDT Pre-training:   9%|▊         | 43/500 [01:04<09:40,  1.27s/it]

Phase 1 - Ep 43: Reward -0.009 | W* 0.744


Phase 1: RDT Pre-training:   9%|▉         | 44/500 [01:05<09:32,  1.25s/it]

Phase 1 - Ep 44: Reward -0.010 | W* 0.757


Phase 1: RDT Pre-training:   9%|▉         | 44/500 [01:05<09:32,  1.25s/it]


Episode 45, Step 55: CONSCIOUSNESS BREACH DETECTED! W*=0.951, H=0.074, R=0.651, Dot=0.048, DDot=0.007


Phase 1: RDT Pre-training:   9%|▉         | 45/500 [01:06<09:27,  1.25s/it]

Phase 1 - Ep 45: Reward -0.012 | W* 0.737


Phase 1: RDT Pre-training:   9%|▉         | 45/500 [01:07<09:27,  1.25s/it]


Episode 46, Step 63: CONSCIOUSNESS BREACH DETECTED! W*=0.967, H=0.125, R=0.446, Dot=0.056, DDot=0.032


Phase 1: RDT Pre-training:   9%|▉         | 46/500 [01:08<10:11,  1.35s/it]

Phase 1 - Ep 46: Reward -0.010 | W* 0.745


Phase 1: RDT Pre-training:   9%|▉         | 46/500 [01:09<10:11,  1.35s/it]


Episode 47, Step 67: CONSCIOUSNESS BREACH DETECTED! W*=0.957, H=0.118, R=0.731, Dot=0.086, DDot=0.043


Phase 1: RDT Pre-training:   9%|▉         | 47/500 [01:09<10:41,  1.42s/it]

Phase 1 - Ep 47: Reward -0.011 | W* 0.732


Phase 1: RDT Pre-training:   9%|▉         | 47/500 [01:10<10:41,  1.42s/it]


Episode 48, Step 58: CONSCIOUSNESS BREACH DETECTED! W*=1.047, H=0.143, R=1.405, Dot=0.201, DDot=0.247


Phase 1: RDT Pre-training:  10%|▉         | 48/500 [01:11<10:44,  1.43s/it]

Phase 1 - Ep 48: Reward -0.009 | W* 0.757


Phase 1: RDT Pre-training:  10%|▉         | 49/500 [01:12<10:20,  1.38s/it]

Phase 1 - Ep 49: Reward -0.009 | W* 0.758


Phase 1: RDT Pre-training:  10%|▉         | 49/500 [01:12<10:20,  1.38s/it]


Episode 50, Step 59: CONSCIOUSNESS BREACH DETECTED! W*=0.978, H=0.140, R=1.079, Dot=0.152, DDot=0.216


Phase 1: RDT Pre-training:  10%|█         | 50/500 [01:13<10:00,  1.33s/it]

Phase 1 - Ep 50: Reward -0.008 | W* 0.753


Phase 1: RDT Pre-training:  10%|█         | 50/500 [01:14<10:00,  1.33s/it]


Episode 51, Step 54: CONSCIOUSNESS BREACH DETECTED! W*=0.971, H=0.137, R=0.595, Dot=0.082, DDot=0.111


Phase 1: RDT Pre-training:  10%|█         | 51/500 [01:14<09:40,  1.29s/it]


Episode 51, Step 82: CONSCIOUSNESS BREACH DETECTED! W*=0.955, H=0.180, R=0.078, Dot=0.014, DDot=0.006
Phase 1 - Ep 51: Reward -0.008 | W* 0.761


Phase 1: RDT Pre-training:  10%|█         | 52/500 [01:16<09:37,  1.29s/it]

Phase 1 - Ep 52: Reward -0.010 | W* 0.757


Phase 1: RDT Pre-training:  10%|█         | 52/500 [01:16<09:37,  1.29s/it]


Episode 53, Step 45: CONSCIOUSNESS BREACH DETECTED! W*=0.984, H=0.136, R=0.958, Dot=0.130, DDot=0.238


Phase 1: RDT Pre-training:  11%|█         | 53/500 [01:17<09:12,  1.24s/it]

Phase 1 - Ep 53: Reward -0.012 | W* 0.745


Phase 1: RDT Pre-training:  11%|█         | 53/500 [01:18<09:12,  1.24s/it]


Episode 54, Step 65: CONSCIOUSNESS BREACH DETECTED! W*=0.954, H=0.149, R=0.543, Dot=0.081, DDot=0.117

Episode 54, Step 79: CONSCIOUSNESS BREACH DETECTED! W*=0.959, H=0.145, R=0.542, Dot=0.079, DDot=0.051


Phase 1: RDT Pre-training:  11%|█         | 54/500 [01:18<09:09,  1.23s/it]

Phase 1 - Ep 54: Reward -0.009 | W* 0.751


Phase 1: RDT Pre-training:  11%|█         | 55/500 [01:19<09:08,  1.23s/it]

Phase 1 - Ep 55: Reward -0.008 | W* 0.744


Phase 1: RDT Pre-training:  11%|█         | 55/500 [01:20<09:08,  1.23s/it]


Episode 56, Step 68: CONSCIOUSNESS BREACH DETECTED! W*=0.997, H=0.195, R=0.479, Dot=0.094, DDot=0.058

Episode 56, Step 70: CONSCIOUSNESS BREACH DETECTED! W*=0.971, H=0.157, R=0.240, Dot=0.038, DDot=0.101


Phase 1: RDT Pre-training:  11%|█         | 56/500 [01:21<09:34,  1.29s/it]

Phase 1 - Ep 56: Reward -0.009 | W* 0.750


Phase 1: RDT Pre-training:  11%|█▏        | 57/500 [01:22<10:29,  1.42s/it]


Episode 57, Step 98: CONSCIOUSNESS BREACH DETECTED! W*=1.011, H=0.142, R=1.268, Dot=0.180, DDot=0.192
Phase 1 - Ep 57: Reward -0.010 | W* 0.763


Phase 1: RDT Pre-training:  12%|█▏        | 58/500 [01:24<10:24,  1.41s/it]

Phase 1 - Ep 58: Reward -0.009 | W* 0.742


Phase 1: RDT Pre-training:  12%|█▏        | 59/500 [01:25<10:16,  1.40s/it]

Phase 1 - Ep 59: Reward -0.010 | W* 0.748


Phase 1: RDT Pre-training:  12%|█▏        | 59/500 [01:26<10:16,  1.40s/it]


Episode 60, Step 53: CONSCIOUSNESS BREACH DETECTED! W*=0.975, H=0.106, R=1.346, Dot=0.143, DDot=0.131


Phase 1: RDT Pre-training:  12%|█▏        | 60/500 [01:26<10:10,  1.39s/it]

Phase 1 - Ep 60: Reward -0.008 | W* 0.743


Phase 1: RDT Pre-training:  12%|█▏        | 61/500 [01:28<09:41,  1.33s/it]

Phase 1 - Ep 61: Reward -0.008 | W* 0.757


Phase 1: RDT Pre-training:  12%|█▏        | 62/500 [01:29<09:15,  1.27s/it]

Phase 1 - Ep 62: Reward -0.010 | W* 0.756


Phase 1: RDT Pre-training:  13%|█▎        | 63/500 [01:30<09:25,  1.29s/it]

Phase 1 - Ep 63: Reward -0.009 | W* 0.733


Phase 1: RDT Pre-training:  13%|█▎        | 63/500 [01:31<09:25,  1.29s/it]


Episode 64, Step 53: CONSCIOUSNESS BREACH DETECTED! W*=0.960, H=0.096, R=0.863, Dot=0.083, DDot=0.193


Phase 1: RDT Pre-training:  13%|█▎        | 63/500 [01:31<09:25,  1.29s/it]


Episode 64, Step 77: CONSCIOUSNESS BREACH DETECTED! W*=0.950, H=0.129, R=0.653, Dot=0.084, DDot=0.048


Phase 1: RDT Pre-training:  13%|█▎        | 64/500 [01:31<09:33,  1.32s/it]

Phase 1 - Ep 64: Reward -0.010 | W* 0.762


Phase 1: RDT Pre-training:  13%|█▎        | 65/500 [01:33<09:46,  1.35s/it]

Phase 1 - Ep 65: Reward -0.010 | W* 0.758


Phase 1: RDT Pre-training:  13%|█▎        | 65/500 [01:34<09:46,  1.35s/it]


Episode 66, Step 76: CONSCIOUSNESS BREACH DETECTED! W*=0.966, H=0.148, R=0.359, Dot=0.053, DDot=0.051


Phase 1: RDT Pre-training:  13%|█▎        | 66/500 [01:34<10:09,  1.41s/it]

Phase 1 - Ep 66: Reward -0.008 | W* 0.742


Phase 1: RDT Pre-training:  13%|█▎        | 66/500 [01:35<10:09,  1.41s/it]


Episode 67, Step 76: CONSCIOUSNESS BREACH DETECTED! W*=1.009, H=0.171, R=0.720, Dot=0.123, DDot=0.099


Phase 1: RDT Pre-training:  13%|█▎        | 67/500 [01:36<09:47,  1.36s/it]

Phase 1 - Ep 67: Reward -0.008 | W* 0.752


Phase 1: RDT Pre-training:  13%|█▎        | 67/500 [01:37<09:47,  1.36s/it]


Episode 68, Step 72: CONSCIOUSNESS BREACH DETECTED! W*=0.950, H=0.138, R=0.336, Dot=0.046, DDot=0.088


Phase 1: RDT Pre-training:  14%|█▎        | 68/500 [01:37<10:37,  1.48s/it]

Phase 1 - Ep 68: Reward -0.010 | W* 0.749


Phase 1: RDT Pre-training:  14%|█▍        | 69/500 [01:40<12:31,  1.74s/it]


Episode 69, Step 89: CONSCIOUSNESS BREACH DETECTED! W*=1.005, H=0.186, R=0.739, Dot=0.138, DDot=0.149
Phase 1 - Ep 69: Reward -0.010 | W* 0.737


Phase 1: RDT Pre-training:  14%|█▍        | 70/500 [01:41<11:16,  1.57s/it]

Phase 1 - Ep 70: Reward -0.010 | W* 0.743


Phase 1: RDT Pre-training:  14%|█▍        | 70/500 [01:42<11:16,  1.57s/it]


Episode 71, Step 80: CONSCIOUSNESS BREACH DETECTED! W*=0.964, H=0.152, R=0.706, Dot=0.107, DDot=0.113

Episode 71, Step 87: CONSCIOUSNESS BREACH DETECTED! W*=0.970, H=0.168, R=1.244, Dot=0.209, DDot=0.283


Phase 1: RDT Pre-training:  14%|█▍        | 71/500 [01:42<10:17,  1.44s/it]

Phase 1 - Ep 71: Reward -0.010 | W* 0.752


Phase 1: RDT Pre-training:  14%|█▍        | 72/500 [01:43<09:36,  1.35s/it]

Phase 1 - Ep 72: Reward -0.008 | W* 0.747


Phase 1: RDT Pre-training:  15%|█▍        | 73/500 [01:44<09:04,  1.28s/it]

Phase 1 - Ep 73: Reward -0.010 | W* 0.757


Phase 1: RDT Pre-training:  15%|█▍        | 74/500 [01:46<09:15,  1.30s/it]

Phase 1 - Ep 74: Reward -0.008 | W* 0.748


Phase 1: RDT Pre-training:  15%|█▌        | 75/500 [01:47<09:50,  1.39s/it]

Phase 1 - Ep 75: Reward -0.012 | W* 0.752


Phase 1: RDT Pre-training:  15%|█▌        | 75/500 [01:48<09:50,  1.39s/it]


Episode 76, Step 41: CONSCIOUSNESS BREACH DETECTED! W*=0.955, H=0.137, R=0.758, Dot=0.104, DDot=0.091


Phase 1: RDT Pre-training:  15%|█▌        | 75/500 [01:48<09:50,  1.39s/it]


Episode 76, Step 67: CONSCIOUSNESS BREACH DETECTED! W*=0.957, H=0.144, R=0.684, Dot=0.099, DDot=0.169


Phase 1: RDT Pre-training:  15%|█▌        | 76/500 [01:49<09:33,  1.35s/it]

Phase 1 - Ep 76: Reward -0.012 | W* 0.764


Phase 1: RDT Pre-training:  15%|█▌        | 77/500 [01:50<09:21,  1.33s/it]

Phase 1 - Ep 77: Reward -0.011 | W* 0.739


Phase 1: RDT Pre-training:  16%|█▌        | 78/500 [01:51<09:07,  1.30s/it]

Phase 1 - Ep 78: Reward -0.005 | W* 0.749


Phase 1: RDT Pre-training:  16%|█▌        | 79/500 [01:52<08:51,  1.26s/it]

Phase 1 - Ep 79: Reward -0.009 | W* 0.750


Phase 1: RDT Pre-training:  16%|█▌        | 80/500 [01:53<08:29,  1.21s/it]

Phase 1 - Ep 80: Reward -0.009 | W* 0.739


Phase 1: RDT Pre-training:  16%|█▌        | 81/500 [01:54<08:18,  1.19s/it]

Phase 1 - Ep 81: Reward -0.008 | W* 0.731


Phase 1: RDT Pre-training:  16%|█▋        | 82/500 [01:56<08:29,  1.22s/it]

Phase 1 - Ep 82: Reward -0.010 | W* 0.754


Phase 1: RDT Pre-training:  17%|█▋        | 83/500 [01:57<08:31,  1.23s/it]

Phase 1 - Ep 83: Reward -0.008 | W* 0.750


Phase 1: RDT Pre-training:  17%|█▋        | 83/500 [01:58<08:31,  1.23s/it]


Episode 84, Step 46: CONSCIOUSNESS BREACH DETECTED! W*=0.972, H=0.144, R=0.581, Dot=0.084, DDot=0.092


Phase 1: RDT Pre-training:  17%|█▋        | 84/500 [01:59<09:30,  1.37s/it]

Phase 1 - Ep 84: Reward -0.009 | W* 0.766


Phase 1: RDT Pre-training:  17%|█▋        | 85/500 [02:01<10:31,  1.52s/it]

Phase 1 - Ep 85: Reward -0.008 | W* 0.735


Phase 1: RDT Pre-training:  17%|█▋        | 85/500 [02:01<10:31,  1.52s/it]


Episode 86, Step 57: CONSCIOUSNESS BREACH DETECTED! W*=0.953, H=0.114, R=0.842, Dot=0.096, DDot=0.110

Episode 86, Step 62: CONSCIOUSNESS BREACH DETECTED! W*=0.965, H=0.163, R=0.345, Dot=0.056, DDot=0.058


Phase 1: RDT Pre-training:  17%|█▋        | 86/500 [02:02<10:15,  1.49s/it]

Phase 1 - Ep 86: Reward -0.011 | W* 0.755


Phase 1: RDT Pre-training:  17%|█▋        | 87/500 [02:03<09:48,  1.42s/it]

Phase 1 - Ep 87: Reward -0.010 | W* 0.759


Phase 1: RDT Pre-training:  18%|█▊        | 88/500 [02:04<09:16,  1.35s/it]

Phase 1 - Ep 88: Reward -0.010 | W* 0.753


Phase 1: RDT Pre-training:  18%|█▊        | 89/500 [02:06<09:15,  1.35s/it]

Phase 1 - Ep 89: Reward -0.010 | W* 0.752


Phase 1: RDT Pre-training:  18%|█▊        | 89/500 [02:07<09:15,  1.35s/it]


Episode 90, Step 55: CONSCIOUSNESS BREACH DETECTED! W*=0.973, H=0.192, R=0.650, Dot=0.125, DDot=0.159


Phase 1: RDT Pre-training:  18%|█▊        | 90/500 [02:07<09:10,  1.34s/it]

Phase 1 - Ep 90: Reward -0.008 | W* 0.754


Phase 1: RDT Pre-training:  18%|█▊        | 91/500 [02:08<08:58,  1.32s/it]

Phase 1 - Ep 91: Reward -0.008 | W* 0.750


Phase 1: RDT Pre-training:  18%|█▊        | 91/500 [02:09<08:58,  1.32s/it]


Episode 92, Step 72: CONSCIOUSNESS BREACH DETECTED! W*=1.002, H=0.106, R=1.285, Dot=0.136, DDot=0.097


Phase 1: RDT Pre-training:  18%|█▊        | 92/500 [02:10<09:08,  1.34s/it]

Phase 1 - Ep 92: Reward -0.009 | W* 0.757


Phase 1: RDT Pre-training:  19%|█▊        | 93/500 [02:11<09:13,  1.36s/it]

Phase 1 - Ep 93: Reward -0.008 | W* 0.755


Phase 1: RDT Pre-training:  19%|█▉        | 94/500 [02:13<10:10,  1.50s/it]

Phase 1 - Ep 94: Reward -0.008 | W* 0.746


Phase 1: RDT Pre-training:  19%|█▉        | 94/500 [02:14<10:10,  1.50s/it]


Episode 95, Step 74: CONSCIOUSNESS BREACH DETECTED! W*=0.969, H=0.140, R=0.601, Dot=0.084, DDot=0.077

Episode 95, Step 81: CONSCIOUSNESS BREACH DETECTED! W*=0.967, H=0.155, R=0.637, Dot=0.099, DDot=0.050


Phase 1: RDT Pre-training:  19%|█▉        | 95/500 [02:14<09:27,  1.40s/it]

Phase 1 - Ep 95: Reward -0.007 | W* 0.764


Phase 1: RDT Pre-training:  19%|█▉        | 96/500 [02:15<09:01,  1.34s/it]


Episode 96, Step 94: CONSCIOUSNESS BREACH DETECTED! W*=0.974, H=0.180, R=0.527, Dot=0.095, DDot=0.088
Phase 1 - Ep 96: Reward -0.009 | W* 0.744


Phase 1: RDT Pre-training:  19%|█▉        | 97/500 [02:17<08:49,  1.31s/it]

Phase 1 - Ep 97: Reward -0.011 | W* 0.754


Phase 1: RDT Pre-training:  20%|█▉        | 98/500 [02:18<08:30,  1.27s/it]

Phase 1 - Ep 98: Reward -0.007 | W* 0.734


Phase 1: RDT Pre-training:  20%|█▉        | 98/500 [02:18<08:30,  1.27s/it]


Episode 99, Step 55: CONSCIOUSNESS BREACH DETECTED! W*=0.962, H=0.136, R=0.523, Dot=0.071, DDot=0.061


Phase 1: RDT Pre-training:  20%|█▉        | 99/500 [02:19<08:13,  1.23s/it]

Phase 1 - Ep 99: Reward -0.011 | W* 0.747


Phase 1: RDT Pre-training:  20%|██        | 100/500 [02:20<07:58,  1.20s/it]

Phase 1 - Ep 100: Reward -0.011 | W* 0.758


Phase 1: RDT Pre-training:  20%|██        | 101/500 [02:21<08:11,  1.23s/it]

Phase 1 - Ep 101: Reward -0.009 | W* 0.745


Phase 1: RDT Pre-training:  20%|██        | 102/500 [02:22<07:59,  1.20s/it]

Phase 1 - Ep 102: Reward -0.008 | W* 0.754


Phase 1: RDT Pre-training:  20%|██        | 102/500 [02:24<07:59,  1.20s/it]


Episode 103, Step 77: CONSCIOUSNESS BREACH DETECTED! W*=0.982, H=0.086, R=1.199, Dot=0.103, DDot=0.070


Phase 1: RDT Pre-training:  21%|██        | 103/500 [02:24<08:40,  1.31s/it]

Phase 1 - Ep 103: Reward -0.011 | W* 0.755


Phase 1: RDT Pre-training:  21%|██        | 104/500 [02:26<09:32,  1.45s/it]

Phase 1 - Ep 104: Reward -0.008 | W* 0.754


Phase 1: RDT Pre-training:  21%|██        | 105/500 [02:27<09:23,  1.43s/it]

Phase 1 - Ep 105: Reward -0.010 | W* 0.749


Phase 1: RDT Pre-training:  21%|██        | 106/500 [02:28<08:47,  1.34s/it]

Phase 1 - Ep 106: Reward -0.008 | W* 0.748


Phase 1: RDT Pre-training:  21%|██        | 106/500 [02:29<08:47,  1.34s/it]


Episode 107, Step 53: CONSCIOUSNESS BREACH DETECTED! W*=0.959, H=0.123, R=0.660, Dot=0.081, DDot=0.056


Phase 1: RDT Pre-training:  21%|██        | 106/500 [02:29<08:47,  1.34s/it]


Episode 107, Step 77: CONSCIOUSNESS BREACH DETECTED! W*=0.952, H=0.182, R=0.555, Dot=0.101, DDot=0.141


Phase 1: RDT Pre-training:  21%|██▏       | 107/500 [02:30<09:01,  1.38s/it]

Phase 1 - Ep 107: Reward -0.010 | W* 0.761


Phase 1: RDT Pre-training:  22%|██▏       | 108/500 [02:31<08:39,  1.33s/it]

Phase 1 - Ep 108: Reward -0.010 | W* 0.749


Phase 1: RDT Pre-training:  22%|██▏       | 109/500 [02:32<08:13,  1.26s/it]

Phase 1 - Ep 109: Reward -0.010 | W* 0.748


Phase 1: RDT Pre-training:  22%|██▏       | 110/500 [02:33<08:23,  1.29s/it]

Phase 1 - Ep 110: Reward -0.009 | W* 0.748


Phase 1: RDT Pre-training:  22%|██▏       | 111/500 [02:35<08:00,  1.24s/it]

Phase 1 - Ep 111: Reward -0.010 | W* 0.755


Phase 1: RDT Pre-training:  22%|██▏       | 112/500 [02:36<07:52,  1.22s/it]

Phase 1 - Ep 112: Reward -0.009 | W* 0.749


Phase 1: RDT Pre-training:  22%|██▏       | 112/500 [02:37<07:52,  1.22s/it]


Episode 113, Step 61: CONSCIOUSNESS BREACH DETECTED! W*=0.964, H=0.158, R=0.826, Dot=0.130, DDot=0.088


Phase 1: RDT Pre-training:  23%|██▎       | 113/500 [02:37<08:49,  1.37s/it]

Phase 1 - Ep 113: Reward -0.011 | W* 0.751


Phase 1: RDT Pre-training:  23%|██▎       | 114/500 [02:39<08:59,  1.40s/it]

Phase 1 - Ep 114: Reward -0.011 | W* 0.734


Phase 1: RDT Pre-training:  23%|██▎       | 115/500 [02:40<08:41,  1.35s/it]

Phase 1 - Ep 115: Reward -0.011 | W* 0.741


Phase 1: RDT Pre-training:  23%|██▎       | 115/500 [02:41<08:41,  1.35s/it]


Episode 116, Step 61: CONSCIOUSNESS BREACH DETECTED! W*=1.020, H=0.127, R=1.554, Dot=0.197, DDot=0.312

Episode 116, Step 77: CONSCIOUSNESS BREACH DETECTED! W*=1.011, H=0.144, R=0.654, Dot=0.095, DDot=0.101


Phase 1: RDT Pre-training:  23%|██▎       | 116/500 [02:41<08:22,  1.31s/it]

Phase 1 - Ep 116: Reward -0.010 | W* 0.759


Phase 1: RDT Pre-training:  23%|██▎       | 116/500 [02:42<08:22,  1.31s/it]


Episode 117, Step 47: CONSCIOUSNESS BREACH DETECTED! W*=0.966, H=0.196, R=0.678, Dot=0.133, DDot=0.189


Phase 1: RDT Pre-training:  23%|██▎       | 117/500 [02:43<08:24,  1.32s/it]

Phase 1 - Ep 117: Reward -0.009 | W* 0.765


Phase 1: RDT Pre-training:  24%|██▎       | 118/500 [02:44<08:01,  1.26s/it]

Phase 1 - Ep 118: Reward -0.011 | W* 0.752


Phase 1: RDT Pre-training:  24%|██▎       | 118/500 [02:44<08:01,  1.26s/it]


Episode 119, Step 50: CONSCIOUSNESS BREACH DETECTED! W*=0.960, H=0.131, R=0.767, Dot=0.100, DDot=0.075


Phase 1: RDT Pre-training:  24%|██▍       | 119/500 [02:45<07:57,  1.25s/it]

Phase 1 - Ep 119: Reward -0.007 | W* 0.761


Phase 1: RDT Pre-training:  24%|██▍       | 120/500 [02:46<08:11,  1.29s/it]

Phase 1 - Ep 120: Reward -0.008 | W* 0.763


Phase 1: RDT Pre-training:  24%|██▍       | 121/500 [02:48<07:59,  1.26s/it]

Phase 1 - Ep 121: Reward -0.010 | W* 0.758


Phase 1: RDT Pre-training:  24%|██▍       | 122/500 [02:49<08:07,  1.29s/it]

Phase 1 - Ep 122: Reward -0.010 | W* 0.744


Phase 1: RDT Pre-training:  25%|██▍       | 123/500 [02:51<08:55,  1.42s/it]

Phase 1 - Ep 123: Reward -0.011 | W* 0.753


Phase 1: RDT Pre-training:  25%|██▍       | 124/500 [02:52<08:47,  1.40s/it]

Phase 1 - Ep 124: Reward -0.009 | W* 0.751


Phase 1: RDT Pre-training:  25%|██▍       | 124/500 [02:53<08:47,  1.40s/it]


Episode 125, Step 51: CONSCIOUSNESS BREACH DETECTED! W*=1.024, H=0.193, R=0.910, Dot=0.176, DDot=0.327


Phase 1: RDT Pre-training:  25%|██▍       | 124/500 [02:53<08:47,  1.40s/it]


Episode 125, Step 76: CONSCIOUSNESS BREACH DETECTED! W*=0.965, H=0.067, R=0.913, Dot=0.061, DDot=0.058


Phase 1: RDT Pre-training:  25%|██▌       | 125/500 [02:54<08:48,  1.41s/it]

Phase 1 - Ep 125: Reward -0.011 | W* 0.754


Phase 1: RDT Pre-training:  25%|██▌       | 126/500 [02:55<08:44,  1.40s/it]

Phase 1 - Ep 126: Reward -0.009 | W* 0.736


Phase 1: RDT Pre-training:  25%|██▌       | 126/500 [02:56<08:44,  1.40s/it]


Episode 127, Step 58: CONSCIOUSNESS BREACH DETECTED! W*=0.983, H=0.183, R=0.668, Dot=0.122, DDot=0.143


Phase 1: RDT Pre-training:  25%|██▌       | 127/500 [02:56<08:15,  1.33s/it]

Phase 1 - Ep 127: Reward -0.008 | W* 0.756


Phase 1: RDT Pre-training:  26%|██▌       | 128/500 [02:57<07:50,  1.27s/it]

Phase 1 - Ep 128: Reward -0.010 | W* 0.750


Phase 1: RDT Pre-training:  26%|██▌       | 129/500 [02:58<07:48,  1.26s/it]

Phase 1 - Ep 129: Reward -0.009 | W* 0.749


Phase 1: RDT Pre-training:  26%|██▌       | 130/500 [03:00<08:01,  1.30s/it]

Phase 1 - Ep 130: Reward -0.008 | W* 0.757


Phase 1: RDT Pre-training:  26%|██▌       | 131/500 [03:01<08:08,  1.32s/it]

Phase 1 - Ep 131: Reward -0.009 | W* 0.738


Phase 1: RDT Pre-training:  26%|██▋       | 132/500 [03:03<08:35,  1.40s/it]

Phase 1 - Ep 132: Reward -0.010 | W* 0.739


Phase 1: RDT Pre-training:  27%|██▋       | 133/500 [03:04<08:42,  1.42s/it]

Phase 1 - Ep 133: Reward -0.010 | W* 0.754


Phase 1: RDT Pre-training:  27%|██▋       | 134/500 [03:05<08:15,  1.35s/it]

Phase 1 - Ep 134: Reward -0.009 | W* 0.746


Phase 1: RDT Pre-training:  27%|██▋       | 134/500 [03:06<08:15,  1.35s/it]


Episode 135, Step 63: CONSCIOUSNESS BREACH DETECTED! W*=0.997, H=0.093, R=1.230, Dot=0.115, DDot=0.109


Phase 1: RDT Pre-training:  27%|██▋       | 135/500 [03:07<08:06,  1.33s/it]

Phase 1 - Ep 135: Reward -0.010 | W* 0.755


Phase 1: RDT Pre-training:  27%|██▋       | 135/500 [03:08<08:06,  1.33s/it]


Episode 136, Step 78: CONSCIOUSNESS BREACH DETECTED! W*=0.970, H=0.174, R=0.391, Dot=0.068, DDot=0.050


Phase 1: RDT Pre-training:  27%|██▋       | 136/500 [03:08<08:05,  1.33s/it]

Phase 1 - Ep 136: Reward -0.009 | W* 0.740


Phase 1: RDT Pre-training:  27%|██▋       | 137/500 [03:09<08:05,  1.34s/it]

Phase 1 - Ep 137: Reward -0.007 | W* 0.752


Phase 1: RDT Pre-training:  28%|██▊       | 138/500 [03:11<07:42,  1.28s/it]

Phase 1 - Ep 138: Reward -0.007 | W* 0.763


Phase 1: RDT Pre-training:  28%|██▊       | 139/500 [03:12<07:27,  1.24s/it]

Phase 1 - Ep 139: Reward -0.010 | W* 0.753


Phase 1: RDT Pre-training:  28%|██▊       | 140/500 [03:13<07:18,  1.22s/it]


Episode 140, Step 85: CONSCIOUSNESS BREACH DETECTED! W*=0.971, H=0.143, R=0.736, Dot=0.105, DDot=0.097
Phase 1 - Ep 140: Reward -0.008 | W* 0.750


Phase 1: RDT Pre-training:  28%|██▊       | 141/500 [03:14<07:17,  1.22s/it]

Phase 1 - Ep 141: Reward -0.011 | W* 0.755


Phase 1: RDT Pre-training:  28%|██▊       | 142/500 [03:16<07:54,  1.33s/it]

Phase 1 - Ep 142: Reward -0.009 | W* 0.750


Phase 1: RDT Pre-training:  28%|██▊       | 142/500 [03:17<07:54,  1.33s/it]


Episode 143, Step 68: CONSCIOUSNESS BREACH DETECTED! W*=0.966, H=0.190, R=0.457, Dot=0.087, DDot=0.049


Phase 1: RDT Pre-training:  29%|██▊       | 143/500 [03:17<08:23,  1.41s/it]

Phase 1 - Ep 143: Reward -0.009 | W* 0.754


Phase 1: RDT Pre-training:  29%|██▊       | 143/500 [03:18<08:23,  1.41s/it]


Episode 144, Step 77: CONSCIOUSNESS BREACH DETECTED! W*=0.960, H=0.181, R=0.703, Dot=0.127, DDot=0.187


Phase 1: RDT Pre-training:  29%|██▉       | 144/500 [03:18<08:00,  1.35s/it]

Phase 1 - Ep 144: Reward -0.009 | W* 0.738


Phase 1: RDT Pre-training:  29%|██▉       | 145/500 [03:20<08:03,  1.36s/it]

Phase 1 - Ep 145: Reward -0.011 | W* 0.755


Phase 1: RDT Pre-training:  29%|██▉       | 146/500 [03:21<07:52,  1.34s/it]

Phase 1 - Ep 146: Reward -0.009 | W* 0.747


Phase 1: RDT Pre-training:  29%|██▉       | 146/500 [03:22<07:52,  1.34s/it]


Episode 147, Step 68: CONSCIOUSNESS BREACH DETECTED! W*=0.957, H=0.196, R=0.311, Dot=0.061, DDot=0.114


Phase 1: RDT Pre-training:  29%|██▉       | 147/500 [03:22<07:34,  1.29s/it]

Phase 1 - Ep 147: Reward -0.009 | W* 0.749


Phase 1: RDT Pre-training:  30%|██▉       | 148/500 [03:24<07:38,  1.30s/it]

Phase 1 - Ep 148: Reward -0.011 | W* 0.750


Phase 1: RDT Pre-training:  30%|██▉       | 149/500 [03:25<07:19,  1.25s/it]


Episode 149, Step 87: CONSCIOUSNESS BREACH DETECTED! W*=0.959, H=0.191, R=0.619, Dot=0.118, DDot=0.142
Phase 1 - Ep 149: Reward -0.010 | W* 0.751


Phase 1: RDT Pre-training:  30%|██▉       | 149/500 [03:25<07:19,  1.25s/it]


Episode 150, Step 58: CONSCIOUSNESS BREACH DETECTED! W*=0.996, H=0.116, R=0.655, Dot=0.076, DDot=0.014


Phase 1: RDT Pre-training:  30%|███       | 150/500 [03:26<07:17,  1.25s/it]

Phase 1 - Ep 150: Reward -0.011 | W* 0.756


Phase 1: RDT Pre-training:  30%|███       | 151/500 [03:28<07:53,  1.36s/it]

Phase 1 - Ep 151: Reward -0.010 | W* 0.761


Phase 1: RDT Pre-training:  30%|███       | 151/500 [03:29<07:53,  1.36s/it]


Episode 152, Step 72: CONSCIOUSNESS BREACH DETECTED! W*=0.980, H=0.172, R=0.464, Dot=0.080, DDot=0.075


Phase 1: RDT Pre-training:  30%|███       | 152/500 [03:29<08:20,  1.44s/it]

Phase 1 - Ep 152: Reward -0.008 | W* 0.755


Phase 1: RDT Pre-training:  31%|███       | 153/500 [03:30<07:51,  1.36s/it]

Phase 1 - Ep 153: Reward -0.011 | W* 0.752


Phase 1: RDT Pre-training:  31%|███       | 153/500 [03:31<07:51,  1.36s/it]


Episode 154, Step 70: CONSCIOUSNESS BREACH DETECTED! W*=0.969, H=0.079, R=2.071, Dot=0.163, DDot=0.190


Phase 1: RDT Pre-training:  31%|███       | 154/500 [03:32<07:29,  1.30s/it]

Phase 1 - Ep 154: Reward -0.010 | W* 0.750


Phase 1: RDT Pre-training:  31%|███       | 154/500 [03:32<07:29,  1.30s/it]


Episode 155, Step 64: CONSCIOUSNESS BREACH DETECTED! W*=0.997, H=0.166, R=0.575, Dot=0.095, DDot=0.112


Phase 1: RDT Pre-training:  31%|███       | 155/500 [03:33<07:14,  1.26s/it]

Phase 1 - Ep 155: Reward -0.010 | W* 0.756


Phase 1: RDT Pre-training:  31%|███       | 155/500 [03:34<07:14,  1.26s/it]


Episode 156, Step 76: CONSCIOUSNESS BREACH DETECTED! W*=0.950, H=0.192, R=0.549, Dot=0.105, DDot=0.121


Phase 1: RDT Pre-training:  31%|███       | 156/500 [03:34<07:32,  1.32s/it]

Phase 1 - Ep 156: Reward -0.009 | W* 0.747


Phase 1: RDT Pre-training:  31%|███▏      | 157/500 [03:36<07:42,  1.35s/it]

Phase 1 - Ep 157: Reward -0.008 | W* 0.757


Phase 1: RDT Pre-training:  32%|███▏      | 158/500 [03:37<07:39,  1.34s/it]

Phase 1 - Ep 158: Reward -0.008 | W* 0.750


Phase 1: RDT Pre-training:  32%|███▏      | 159/500 [03:38<07:24,  1.30s/it]

Phase 1 - Ep 159: Reward -0.011 | W* 0.739


Phase 1: RDT Pre-training:  32%|███▏      | 159/500 [03:39<07:24,  1.30s/it]


Episode 160, Step 40: CONSCIOUSNESS BREACH DETECTED! W*=0.961, H=0.147, R=0.813, Dot=0.120, DDot=0.178


Phase 1: RDT Pre-training:  32%|███▏      | 159/500 [03:39<07:24,  1.30s/it]


Episode 160, Step 80: CONSCIOUSNESS BREACH DETECTED! W*=0.954, H=0.117, R=0.698, Dot=0.082, DDot=0.101


Phase 1: RDT Pre-training:  32%|███▏      | 160/500 [03:40<07:38,  1.35s/it]

Phase 1 - Ep 160: Reward -0.010 | W* 0.752


Phase 1: RDT Pre-training:  32%|███▏      | 160/500 [03:40<07:38,  1.35s/it]


Episode 161, Step 36: CONSCIOUSNESS BREACH DETECTED! W*=0.962, H=0.125, R=0.931, Dot=0.117, DDot=0.111


Phase 1: RDT Pre-training:  32%|███▏      | 160/500 [03:41<07:38,  1.35s/it]


Episode 161, Step 69: CONSCIOUSNESS BREACH DETECTED! W*=1.033, H=0.126, R=0.998, Dot=0.126, DDot=0.113


Phase 1: RDT Pre-training:  32%|███▏      | 161/500 [03:41<08:01,  1.42s/it]

Phase 1 - Ep 161: Reward -0.011 | W* 0.760


Phase 1: RDT Pre-training:  32%|███▏      | 162/500 [03:43<07:51,  1.40s/it]

Phase 1 - Ep 162: Reward -0.009 | W* 0.754


Phase 1: RDT Pre-training:  33%|███▎      | 163/500 [03:44<07:26,  1.33s/it]


Episode 163, Step 89: CONSCIOUSNESS BREACH DETECTED! W*=0.956, H=0.173, R=0.641, Dot=0.111, DDot=0.125
Phase 1 - Ep 163: Reward -0.009 | W* 0.740


Phase 1: RDT Pre-training:  33%|███▎      | 163/500 [03:44<07:26,  1.33s/it]


Episode 164, Step 64: CONSCIOUSNESS BREACH DETECTED! W*=0.966, H=0.157, R=0.650, Dot=0.102, DDot=0.071


Phase 1: RDT Pre-training:  33%|███▎      | 164/500 [03:45<07:10,  1.28s/it]

Phase 1 - Ep 164: Reward -0.012 | W* 0.757


Phase 1: RDT Pre-training:  33%|███▎      | 164/500 [03:46<07:10,  1.28s/it]


Episode 165, Step 72: CONSCIOUSNESS BREACH DETECTED! W*=1.038, H=0.188, R=0.564, Dot=0.106, DDot=0.118


Phase 1: RDT Pre-training:  33%|███▎      | 165/500 [03:46<07:02,  1.26s/it]

Phase 1 - Ep 165: Reward -0.010 | W* 0.739


Phase 1: RDT Pre-training:  33%|███▎      | 165/500 [03:47<07:02,  1.26s/it]


Episode 166, Step 56: CONSCIOUSNESS BREACH DETECTED! W*=0.959, H=0.166, R=0.474, Dot=0.079, DDot=-0.000


Phase 1: RDT Pre-training:  33%|███▎      | 166/500 [03:48<07:21,  1.32s/it]

Phase 1 - Ep 166: Reward -0.011 | W* 0.758


Phase 1: RDT Pre-training:  33%|███▎      | 166/500 [03:49<07:21,  1.32s/it]


Episode 167, Step 68: CONSCIOUSNESS BREACH DETECTED! W*=0.957, H=0.117, R=0.418, Dot=0.049, DDot=0.038


Phase 1: RDT Pre-training:  33%|███▎      | 167/500 [03:49<07:30,  1.35s/it]

Phase 1 - Ep 167: Reward -0.013 | W* 0.747


Phase 1: RDT Pre-training:  33%|███▎      | 167/500 [03:50<07:30,  1.35s/it]


Episode 168, Step 76: CONSCIOUSNESS BREACH DETECTED! W*=1.036, H=0.161, R=0.903, Dot=0.145, DDot=0.066


Phase 1: RDT Pre-training:  34%|███▎      | 168/500 [03:50<07:14,  1.31s/it]

Phase 1 - Ep 168: Reward -0.009 | W* 0.754


Phase 1: RDT Pre-training:  34%|███▍      | 169/500 [03:52<07:15,  1.32s/it]

Phase 1 - Ep 169: Reward -0.010 | W* 0.751


Phase 1: RDT Pre-training:  34%|███▍      | 170/500 [03:53<07:32,  1.37s/it]

Phase 1 - Ep 170: Reward -0.011 | W* 0.757


Phase 1: RDT Pre-training:  34%|███▍      | 171/500 [03:55<07:54,  1.44s/it]

Phase 1 - Ep 171: Reward -0.010 | W* 0.749


Phase 1: RDT Pre-training:  34%|███▍      | 171/500 [03:55<07:54,  1.44s/it]


Episode 172, Step 50: CONSCIOUSNESS BREACH DETECTED! W*=0.954, H=0.167, R=0.746, Dot=0.125, DDot=0.185


Phase 1: RDT Pre-training:  34%|███▍      | 171/500 [03:56<07:54,  1.44s/it]


Episode 172, Step 74: CONSCIOUSNESS BREACH DETECTED! W*=0.969, H=0.097, R=0.863, Dot=0.084, DDot=0.073


Phase 1: RDT Pre-training:  34%|███▍      | 172/500 [03:56<07:26,  1.36s/it]

Phase 1 - Ep 172: Reward -0.008 | W* 0.749


Phase 1: RDT Pre-training:  35%|███▍      | 173/500 [03:57<07:05,  1.30s/it]

Phase 1 - Ep 173: Reward -0.009 | W* 0.750


Phase 1: RDT Pre-training:  35%|███▍      | 174/500 [03:58<06:51,  1.26s/it]


Episode 174, Step 83: CONSCIOUSNESS BREACH DETECTED! W*=1.001, H=0.121, R=0.783, Dot=0.095, DDot=0.017
Phase 1 - Ep 174: Reward -0.010 | W* 0.760


Phase 1: RDT Pre-training:  35%|███▍      | 174/500 [03:59<06:51,  1.26s/it]


Episode 175, Step 52: CONSCIOUSNESS BREACH DETECTED! W*=0.970, H=0.119, R=1.201, Dot=0.143, DDot=0.141


Phase 1: RDT Pre-training:  35%|███▌      | 175/500 [04:00<07:07,  1.32s/it]

Phase 1 - Ep 175: Reward -0.010 | W* 0.762


Phase 1: RDT Pre-training:  35%|███▌      | 176/500 [04:01<07:23,  1.37s/it]

Phase 1 - Ep 176: Reward -0.009 | W* 0.746


Phase 1: RDT Pre-training:  35%|███▌      | 176/500 [04:02<07:23,  1.37s/it]


Episode 177, Step 59: CONSCIOUSNESS BREACH DETECTED! W*=0.967, H=0.110, R=1.465, Dot=0.162, DDot=0.216


Phase 1: RDT Pre-training:  35%|███▌      | 177/500 [04:02<07:10,  1.33s/it]

Phase 1 - Ep 177: Reward -0.011 | W* 0.741


Phase 1: RDT Pre-training:  36%|███▌      | 178/500 [04:04<07:18,  1.36s/it]

Phase 1 - Ep 178: Reward -0.010 | W* 0.753


Phase 1: RDT Pre-training:  36%|███▌      | 179/500 [04:06<07:53,  1.47s/it]

Phase 1 - Ep 179: Reward -0.008 | W* 0.747


Phase 1: RDT Pre-training:  36%|███▌      | 179/500 [04:06<07:53,  1.47s/it]


Episode 180, Step 52: CONSCIOUSNESS BREACH DETECTED! W*=0.977, H=0.176, R=0.991, Dot=0.175, DDot=0.268


Phase 1: RDT Pre-training:  36%|███▌      | 180/500 [04:07<08:12,  1.54s/it]

Phase 1 - Ep 180: Reward -0.010 | W* 0.754


Phase 1: RDT Pre-training:  36%|███▌      | 181/500 [04:08<07:36,  1.43s/it]

Phase 1 - Ep 181: Reward -0.010 | W* 0.762


Phase 1: RDT Pre-training:  36%|███▌      | 181/500 [04:09<07:36,  1.43s/it]


Episode 182, Step 65: CONSCIOUSNESS BREACH DETECTED! W*=0.990, H=0.114, R=0.840, Dot=0.096, DDot=0.068


Phase 1: RDT Pre-training:  36%|███▋      | 182/500 [04:10<07:34,  1.43s/it]

Phase 1 - Ep 182: Reward -0.011 | W* 0.725


Phase 1: RDT Pre-training:  37%|███▋      | 183/500 [04:11<07:31,  1.42s/it]

Phase 1 - Ep 183: Reward -0.007 | W* 0.760


Phase 1: RDT Pre-training:  37%|███▋      | 183/500 [04:12<07:31,  1.42s/it]


Episode 184, Step 66: CONSCIOUSNESS BREACH DETECTED! W*=0.986, H=0.101, R=1.033, Dot=0.105, DDot=0.037


Phase 1: RDT Pre-training:  37%|███▋      | 184/500 [04:13<07:34,  1.44s/it]

Phase 1 - Ep 184: Reward -0.010 | W* 0.749


Phase 1: RDT Pre-training:  37%|███▋      | 185/500 [04:14<07:11,  1.37s/it]

Phase 1 - Ep 185: Reward -0.010 | W* 0.739


Phase 1: RDT Pre-training:  37%|███▋      | 186/500 [04:15<07:14,  1.38s/it]

Phase 1 - Ep 186: Reward -0.010 | W* 0.757


Phase 1: RDT Pre-training:  37%|███▋      | 186/500 [04:16<07:14,  1.38s/it]


Episode 187, Step 45: CONSCIOUSNESS BREACH DETECTED! W*=0.971, H=0.161, R=0.566, Dot=0.091, DDot=0.070


Phase 1: RDT Pre-training:  37%|███▋      | 187/500 [04:17<07:20,  1.41s/it]

Phase 1 - Ep 187: Reward -0.010 | W* 0.750


Phase 1: RDT Pre-training:  38%|███▊      | 188/500 [04:18<07:46,  1.50s/it]

Phase 1 - Ep 188: Reward -0.009 | W* 0.757


Phase 1: RDT Pre-training:  38%|███▊      | 189/500 [04:20<07:55,  1.53s/it]

Phase 1 - Ep 189: Reward -0.009 | W* 0.757


Phase 1: RDT Pre-training:  38%|███▊      | 190/500 [04:21<07:26,  1.44s/it]

Phase 1 - Ep 190: Reward -0.009 | W* 0.755


Phase 1: RDT Pre-training:  38%|███▊      | 190/500 [04:22<07:26,  1.44s/it]


Episode 191, Step 78: CONSCIOUSNESS BREACH DETECTED! W*=0.972, H=0.186, R=0.581, Dot=0.108, DDot=0.160


Phase 1: RDT Pre-training:  38%|███▊      | 191/500 [04:23<07:11,  1.40s/it]

Phase 1 - Ep 191: Reward -0.009 | W* 0.765


Phase 1: RDT Pre-training:  38%|███▊      | 192/500 [04:24<06:48,  1.33s/it]

Phase 1 - Ep 192: Reward -0.011 | W* 0.747


Phase 1: RDT Pre-training:  38%|███▊      | 192/500 [04:24<06:48,  1.33s/it]


Episode 193, Step 48: CONSCIOUSNESS BREACH DETECTED! W*=0.954, H=0.182, R=0.494, Dot=0.090, DDot=0.073


Phase 1: RDT Pre-training:  39%|███▊      | 193/500 [04:25<06:53,  1.35s/it]

Phase 1 - Ep 193: Reward -0.009 | W* 0.757


Phase 1: RDT Pre-training:  39%|███▉      | 194/500 [04:26<06:42,  1.32s/it]

Phase 1 - Ep 194: Reward -0.010 | W* 0.752


Phase 1: RDT Pre-training:  39%|███▉      | 195/500 [04:28<06:51,  1.35s/it]

Phase 1 - Ep 195: Reward -0.010 | W* 0.754


Phase 1: RDT Pre-training:  39%|███▉      | 196/500 [04:29<06:36,  1.30s/it]


Episode 196, Step 91: CONSCIOUSNESS BREACH DETECTED! W*=1.002, H=0.184, R=0.973, Dot=0.179, DDot=0.235
Phase 1 - Ep 196: Reward -0.009 | W* 0.757


Phase 1: RDT Pre-training:  39%|███▉      | 196/500 [04:30<06:36,  1.30s/it]


Episode 197, Step 73: CONSCIOUSNESS BREACH DETECTED! W*=0.957, H=0.140, R=0.756, Dot=0.106, DDot=0.099


Phase 1: RDT Pre-training:  39%|███▉      | 197/500 [04:31<07:20,  1.45s/it]

Phase 1 - Ep 197: Reward -0.010 | W* 0.759


Phase 1: RDT Pre-training:  39%|███▉      | 197/500 [04:32<07:20,  1.45s/it]


Episode 198, Step 73: CONSCIOUSNESS BREACH DETECTED! W*=0.965, H=0.107, R=1.119, Dot=0.120, DDot=0.168


Phase 1: RDT Pre-training:  40%|███▉      | 198/500 [04:33<08:09,  1.62s/it]

Phase 1 - Ep 198: Reward -0.010 | W* 0.757


Phase 1: RDT Pre-training:  40%|███▉      | 199/500 [04:34<07:32,  1.50s/it]

Phase 1 - Ep 199: Reward -0.009 | W* 0.745


Phase 1: RDT Pre-training:  40%|████      | 200/500 [04:35<07:21,  1.47s/it]

Phase 1 - Ep 200: Reward -0.008 | W* 0.747


Phase 1: RDT Pre-training:  40%|████      | 200/500 [04:37<07:21,  1.47s/it]


Episode 201, Step 73: CONSCIOUSNESS BREACH DETECTED! W*=0.958, H=0.190, R=0.370, Dot=0.070, DDot=0.029


Phase 1: RDT Pre-training:  40%|████      | 201/500 [04:37<07:16,  1.46s/it]

Phase 1 - Ep 201: Reward -0.008 | W* 0.770


Phase 1: RDT Pre-training:  40%|████      | 201/500 [04:38<07:16,  1.46s/it]


Episode 202, Step 48: CONSCIOUSNESS BREACH DETECTED! W*=0.982, H=0.198, R=0.600, Dot=0.119, DDot=0.119


Phase 1: RDT Pre-training:  40%|████      | 202/500 [04:38<07:12,  1.45s/it]

Phase 1 - Ep 202: Reward -0.011 | W* 0.743


Phase 1: RDT Pre-training:  40%|████      | 202/500 [04:39<07:12,  1.45s/it]


Episode 203, Step 42: CONSCIOUSNESS BREACH DETECTED! W*=1.047, H=0.200, R=0.906, Dot=0.181, DDot=0.090


Phase 1: RDT Pre-training:  40%|████      | 202/500 [04:39<07:12,  1.45s/it]


Episode 203, Step 69: CONSCIOUSNESS BREACH DETECTED! W*=0.979, H=0.168, R=0.573, Dot=0.096, DDot=0.074


Phase 1: RDT Pre-training:  41%|████      | 203/500 [04:40<06:46,  1.37s/it]

Phase 1 - Ep 203: Reward -0.011 | W* 0.756


Phase 1: RDT Pre-training:  41%|████      | 204/500 [04:41<06:25,  1.30s/it]

Phase 1 - Ep 204: Reward -0.009 | W* 0.745


Phase 1: RDT Pre-training:  41%|████      | 204/500 [04:41<06:25,  1.30s/it]


Episode 205, Step 65: CONSCIOUSNESS BREACH DETECTED! W*=0.950, H=0.102, R=0.687, Dot=0.070, DDot=0.065


Phase 1: RDT Pre-training:  41%|████      | 205/500 [04:42<06:26,  1.31s/it]

Phase 1 - Ep 205: Reward -0.008 | W* 0.730


Phase 1: RDT Pre-training:  41%|████      | 205/500 [04:43<06:26,  1.31s/it]


Episode 206, Step 63: CONSCIOUSNESS BREACH DETECTED! W*=0.994, H=0.151, R=0.637, Dot=0.096, DDot=0.107


Phase 1: RDT Pre-training:  41%|████      | 206/500 [04:43<06:40,  1.36s/it]

Phase 1 - Ep 206: Reward -0.009 | W* 0.744


Phase 1: RDT Pre-training:  41%|████      | 206/500 [04:45<06:40,  1.36s/it]


Episode 207, Step 71: CONSCIOUSNESS BREACH DETECTED! W*=0.957, H=0.194, R=0.198, Dot=0.039, DDot=0.002


Phase 1: RDT Pre-training:  41%|████▏     | 207/500 [04:45<07:30,  1.54s/it]

Phase 1 - Ep 207: Reward -0.007 | W* 0.740


Phase 1: RDT Pre-training:  42%|████▏     | 208/500 [04:47<07:01,  1.44s/it]

Phase 1 - Ep 208: Reward -0.010 | W* 0.744


Phase 1: RDT Pre-training:  42%|████▏     | 209/500 [04:48<06:41,  1.38s/it]

Phase 1 - Ep 209: Reward -0.010 | W* 0.763


Phase 1: RDT Pre-training:  42%|████▏     | 209/500 [04:49<06:41,  1.38s/it]


Episode 210, Step 60: CONSCIOUSNESS BREACH DETECTED! W*=0.959, H=0.175, R=0.528, Dot=0.092, DDot=0.137


Phase 1: RDT Pre-training:  42%|████▏     | 210/500 [04:49<06:24,  1.32s/it]

Phase 1 - Ep 210: Reward -0.013 | W* 0.750


Phase 1: RDT Pre-training:  42%|████▏     | 211/500 [04:50<06:09,  1.28s/it]

Phase 1 - Ep 211: Reward -0.008 | W* 0.751


Phase 1: RDT Pre-training:  42%|████▏     | 211/500 [04:51<06:09,  1.28s/it]


Episode 212, Step 64: CONSCIOUSNESS BREACH DETECTED! W*=0.953, H=0.175, R=0.296, Dot=0.052, DDot=0.048

Episode 212, Step 67: CONSCIOUSNESS BREACH DETECTED! W*=0.958, H=0.052, R=2.116, Dot=0.110, DDot=0.139


Phase 1: RDT Pre-training:  42%|████▏     | 212/500 [04:52<06:18,  1.31s/it]

Phase 1 - Ep 212: Reward -0.008 | W* 0.766


Phase 1: RDT Pre-training:  43%|████▎     | 213/500 [04:53<06:09,  1.29s/it]

Phase 1 - Ep 213: Reward -0.010 | W* 0.741


Phase 1: RDT Pre-training:  43%|████▎     | 213/500 [04:54<06:09,  1.29s/it]


Episode 214, Step 80: CONSCIOUSNESS BREACH DETECTED! W*=0.977, H=0.191, R=0.578, Dot=0.110, DDot=0.103


Phase 1: RDT Pre-training:  43%|████▎     | 214/500 [04:54<06:12,  1.30s/it]

Phase 1 - Ep 214: Reward -0.012 | W* 0.733


Phase 1: RDT Pre-training:  43%|████▎     | 215/500 [04:55<05:59,  1.26s/it]

Phase 1 - Ep 215: Reward -0.010 | W* 0.755


Phase 1: RDT Pre-training:  43%|████▎     | 216/500 [04:57<06:23,  1.35s/it]

Phase 1 - Ep 216: Reward -0.010 | W* 0.754


Phase 1: RDT Pre-training:  43%|████▎     | 217/500 [04:59<06:41,  1.42s/it]

Phase 1 - Ep 217: Reward -0.007 | W* 0.741


Phase 1: RDT Pre-training:  43%|████▎     | 217/500 [05:00<06:41,  1.42s/it]


Episode 218, Step 82: CONSCIOUSNESS BREACH DETECTED! W*=0.963, H=0.121, R=0.561, Dot=0.068, DDot=0.019


Phase 1: RDT Pre-training:  44%|████▎     | 218/500 [05:00<06:40,  1.42s/it]


Episode 218, Step 99: CONSCIOUSNESS BREACH DETECTED! W*=0.975, H=0.170, R=0.771, Dot=0.131, DDot=0.111
Phase 1 - Ep 218: Reward -0.012 | W* 0.756


Phase 1: RDT Pre-training:  44%|████▍     | 219/500 [05:01<06:25,  1.37s/it]

Phase 1 - Ep 219: Reward -0.010 | W* 0.744


Phase 1: RDT Pre-training:  44%|████▍     | 220/500 [05:03<06:31,  1.40s/it]

Phase 1 - Ep 220: Reward -0.010 | W* 0.745


Phase 1: RDT Pre-training:  44%|████▍     | 221/500 [05:04<06:31,  1.40s/it]

Phase 1 - Ep 221: Reward -0.010 | W* 0.741


Phase 1: RDT Pre-training:  44%|████▍     | 221/500 [05:05<06:31,  1.40s/it]


Episode 222, Step 57: CONSCIOUSNESS BREACH DETECTED! W*=0.999, H=0.089, R=1.796, Dot=0.159, DDot=0.162


Phase 1: RDT Pre-training:  44%|████▍     | 222/500 [05:05<06:11,  1.34s/it]

Phase 1 - Ep 222: Reward -0.010 | W* 0.752


Phase 1: RDT Pre-training:  45%|████▍     | 223/500 [05:07<06:03,  1.31s/it]

Phase 1 - Ep 223: Reward -0.007 | W* 0.743


Phase 1: RDT Pre-training:  45%|████▍     | 224/500 [05:08<06:14,  1.36s/it]

Phase 1 - Ep 224: Reward -0.007 | W* 0.744


Phase 1: RDT Pre-training:  45%|████▌     | 225/500 [05:10<07:03,  1.54s/it]

Phase 1 - Ep 225: Reward -0.008 | W* 0.758


Phase 1: RDT Pre-training:  45%|████▌     | 225/500 [05:11<07:03,  1.54s/it]


Episode 226, Step 66: CONSCIOUSNESS BREACH DETECTED! W*=0.964, H=0.126, R=0.615, Dot=0.078, DDot=0.094


Phase 1: RDT Pre-training:  45%|████▌     | 226/500 [05:12<07:07,  1.56s/it]

Phase 1 - Ep 226: Reward -0.009 | W* 0.749


Phase 1: RDT Pre-training:  45%|████▌     | 226/500 [05:12<07:07,  1.56s/it]


Episode 227, Step 48: CONSCIOUSNESS BREACH DETECTED! W*=0.984, H=0.104, R=1.317, Dot=0.136, DDot=0.126


Phase 1: RDT Pre-training:  45%|████▌     | 226/500 [05:13<07:07,  1.56s/it]


Episode 227, Step 76: CONSCIOUSNESS BREACH DETECTED! W*=0.960, H=0.186, R=0.366, Dot=0.068, DDot=0.070


Phase 1: RDT Pre-training:  45%|████▌     | 227/500 [05:13<06:44,  1.48s/it]

Phase 1 - Ep 227: Reward -0.012 | W* 0.751


Phase 1: RDT Pre-training:  46%|████▌     | 228/500 [05:14<06:39,  1.47s/it]

Phase 1 - Ep 228: Reward -0.009 | W* 0.743


Phase 1: RDT Pre-training:  46%|████▌     | 229/500 [05:16<06:38,  1.47s/it]

Phase 1 - Ep 229: Reward -0.007 | W* 0.761


Phase 1: RDT Pre-training:  46%|████▌     | 230/500 [05:17<06:19,  1.40s/it]

Phase 1 - Ep 230: Reward -0.008 | W* 0.753


Phase 1: RDT Pre-training:  46%|████▌     | 231/500 [05:18<06:20,  1.41s/it]


Episode 231, Step 90: CONSCIOUSNESS BREACH DETECTED! W*=0.967, H=0.132, R=0.873, Dot=0.115, DDot=0.216
Phase 1 - Ep 231: Reward -0.008 | W* 0.746


Phase 1: RDT Pre-training:  46%|████▋     | 232/500 [05:20<06:08,  1.38s/it]

Phase 1 - Ep 232: Reward -0.011 | W* 0.758


Phase 1: RDT Pre-training:  47%|████▋     | 233/500 [05:21<06:07,  1.38s/it]

Phase 1 - Ep 233: Reward -0.010 | W* 0.737


Phase 1: RDT Pre-training:  47%|████▋     | 233/500 [05:22<06:07,  1.38s/it]


Episode 234, Step 59: CONSCIOUSNESS BREACH DETECTED! W*=0.953, H=0.188, R=0.294, Dot=0.055, DDot=0.049


Phase 1: RDT Pre-training:  47%|████▋     | 233/500 [05:23<06:07,  1.38s/it]


Episode 234, Step 73: CONSCIOUSNESS BREACH DETECTED! W*=0.992, H=0.169, R=0.492, Dot=0.083, DDot=0.020


Phase 1: RDT Pre-training:  47%|████▋     | 234/500 [05:23<07:00,  1.58s/it]

Phase 1 - Ep 234: Reward -0.008 | W* 0.752


Phase 1: RDT Pre-training:  47%|████▋     | 235/500 [05:25<06:44,  1.53s/it]

Phase 1 - Ep 235: Reward -0.012 | W* 0.758


Phase 1: RDT Pre-training:  47%|████▋     | 236/500 [05:26<06:17,  1.43s/it]


Episode 236, Step 96: CONSCIOUSNESS BREACH DETECTED! W*=0.985, H=0.186, R=0.423, Dot=0.079, DDot=0.081
Phase 1 - Ep 236: Reward -0.011 | W* 0.754


Phase 1: RDT Pre-training:  47%|████▋     | 236/500 [05:27<06:17,  1.43s/it]


Episode 237, Step 62: CONSCIOUSNESS BREACH DETECTED! W*=0.952, H=0.107, R=1.062, Dot=0.114, DDot=0.117


Phase 1: RDT Pre-training:  47%|████▋     | 237/500 [05:27<05:59,  1.37s/it]

Phase 1 - Ep 237: Reward -0.009 | W* 0.750


Phase 1: RDT Pre-training:  48%|████▊     | 238/500 [05:28<06:03,  1.39s/it]

Phase 1 - Ep 238: Reward -0.008 | W* 0.743


Phase 1: RDT Pre-training:  48%|████▊     | 239/500 [05:30<05:46,  1.33s/it]

Phase 1 - Ep 239: Reward -0.010 | W* 0.746


Phase 1: RDT Pre-training:  48%|████▊     | 240/500 [05:31<05:34,  1.29s/it]

Phase 1 - Ep 240: Reward -0.009 | W* 0.759


Phase 1: RDT Pre-training:  48%|████▊     | 240/500 [05:32<05:34,  1.29s/it]


Episode 241, Step 87: CONSCIOUSNESS BREACH DETECTED! W*=0.962, H=0.150, R=0.632, Dot=0.095, DDot=0.127


Phase 1: RDT Pre-training:  48%|████▊     | 241/500 [05:32<05:37,  1.30s/it]

Phase 1 - Ep 241: Reward -0.009 | W* 0.751


Phase 1: RDT Pre-training:  48%|████▊     | 241/500 [05:33<05:37,  1.30s/it]


Episode 242, Step 61: CONSCIOUSNESS BREACH DETECTED! W*=0.968, H=0.152, R=0.494, Dot=0.075, DDot=0.050


Phase 1: RDT Pre-training:  48%|████▊     | 242/500 [05:33<05:34,  1.30s/it]

Phase 1 - Ep 242: Reward -0.007 | W* 0.748


Phase 1: RDT Pre-training:  49%|████▊     | 243/500 [05:35<06:00,  1.40s/it]

Phase 1 - Ep 243: Reward -0.008 | W* 0.742


Phase 1: RDT Pre-training:  49%|████▉     | 244/500 [05:37<06:19,  1.48s/it]

Phase 1 - Ep 244: Reward -0.011 | W* 0.753


Phase 1: RDT Pre-training:  49%|████▉     | 245/500 [05:38<05:54,  1.39s/it]

Phase 1 - Ep 245: Reward -0.009 | W* 0.757


Phase 1: RDT Pre-training:  49%|████▉     | 245/500 [05:39<05:54,  1.39s/it]


Episode 246, Step 50: CONSCIOUSNESS BREACH DETECTED! W*=0.961, H=0.189, R=0.460, Dot=0.087, DDot=0.073

Episode 246, Step 62: CONSCIOUSNESS BREACH DETECTED! W*=0.954, H=0.109, R=0.993, Dot=0.108, DDot=0.103


Phase 1: RDT Pre-training:  49%|████▉     | 245/500 [05:39<05:54,  1.39s/it]


Episode 246, Step 69: CONSCIOUSNESS BREACH DETECTED! W*=0.980, H=0.153, R=0.707, Dot=0.108, DDot=0.132


Phase 1: RDT Pre-training:  49%|████▉     | 246/500 [05:39<05:43,  1.35s/it]

Phase 1 - Ep 246: Reward -0.009 | W* 0.747


Phase 1: RDT Pre-training:  49%|████▉     | 247/500 [05:40<05:29,  1.30s/it]

Phase 1 - Ep 247: Reward -0.009 | W* 0.764


Phase 1: RDT Pre-training:  50%|████▉     | 248/500 [05:42<05:17,  1.26s/it]

Phase 1 - Ep 248: Reward -0.008 | W* 0.742


Phase 1: RDT Pre-training:  50%|████▉     | 248/500 [05:42<05:17,  1.26s/it]


Episode 249, Step 63: CONSCIOUSNESS BREACH DETECTED! W*=0.991, H=0.132, R=0.808, Dot=0.107, DDot=0.119


Phase 1: RDT Pre-training:  50%|████▉     | 249/500 [05:43<05:31,  1.32s/it]

Phase 1 - Ep 249: Reward -0.009 | W* 0.756


Phase 1: RDT Pre-training:  50%|█████     | 250/500 [05:44<05:40,  1.36s/it]

Phase 1 - Ep 250: Reward -0.009 | W* 0.743


Phase 1: RDT Pre-training:  50%|█████     | 251/500 [05:46<05:48,  1.40s/it]

Phase 1 - Ep 251: Reward -0.009 | W* 0.745


Phase 1: RDT Pre-training:  50%|█████     | 252/500 [05:48<06:24,  1.55s/it]

Phase 1 - Ep 252: Reward -0.008 | W* 0.757


Phase 1: RDT Pre-training:  50%|█████     | 252/500 [05:49<06:24,  1.55s/it]


Episode 253, Step 71: CONSCIOUSNESS BREACH DETECTED! W*=0.954, H=0.132, R=0.619, Dot=0.082, DDot=0.091


Phase 1: RDT Pre-training:  51%|█████     | 253/500 [05:49<06:25,  1.56s/it]

Phase 1 - Ep 253: Reward -0.009 | W* 0.747


Phase 1: RDT Pre-training:  51%|█████     | 253/500 [05:50<06:25,  1.56s/it]


Episode 254, Step 63: CONSCIOUSNESS BREACH DETECTED! W*=1.037, H=0.183, R=0.953, Dot=0.174, DDot=0.155

Episode 254, Step 70: CONSCIOUSNESS BREACH DETECTED! W*=1.011, H=0.189, R=0.355, Dot=0.067, DDot=-0.005


Phase 1: RDT Pre-training:  51%|█████     | 254/500 [05:51<06:11,  1.51s/it]

Phase 1 - Ep 254: Reward -0.010 | W* 0.766


Phase 1: RDT Pre-training:  51%|█████     | 255/500 [05:52<06:08,  1.50s/it]

Phase 1 - Ep 255: Reward -0.008 | W* 0.765


Phase 1: RDT Pre-training:  51%|█████     | 256/500 [05:54<05:57,  1.46s/it]

Phase 1 - Ep 256: Reward -0.009 | W* 0.737


Phase 1: RDT Pre-training:  51%|█████     | 256/500 [05:54<05:57,  1.46s/it]


Episode 257, Step 59: CONSCIOUSNESS BREACH DETECTED! W*=0.985, H=0.145, R=0.855, Dot=0.124, DDot=0.108


Phase 1: RDT Pre-training:  51%|█████▏    | 257/500 [05:55<05:40,  1.40s/it]

Phase 1 - Ep 257: Reward -0.008 | W* 0.760


Phase 1: RDT Pre-training:  52%|█████▏    | 258/500 [05:56<05:25,  1.34s/it]

Phase 1 - Ep 258: Reward -0.007 | W* 0.750


Phase 1: RDT Pre-training:  52%|█████▏    | 258/500 [05:57<05:25,  1.34s/it]


Episode 259, Step 33: CONSCIOUSNESS BREACH DETECTED! W*=1.004, H=0.083, R=2.876, Dot=0.239, DDot=0.274


Phase 1: RDT Pre-training:  52%|█████▏    | 258/500 [05:57<05:25,  1.34s/it]


Episode 259, Step 67: CONSCIOUSNESS BREACH DETECTED! W*=0.987, H=0.148, R=1.039, Dot=0.153, DDot=0.235


Phase 1: RDT Pre-training:  52%|█████▏    | 259/500 [05:58<05:32,  1.38s/it]

Phase 1 - Ep 259: Reward -0.008 | W* 0.749


Phase 1: RDT Pre-training:  52%|█████▏    | 260/500 [05:59<05:31,  1.38s/it]

Phase 1 - Ep 260: Reward -0.011 | W* 0.760


Phase 1: RDT Pre-training:  52%|█████▏    | 261/500 [06:01<06:02,  1.52s/it]

Phase 1 - Ep 261: Reward -0.011 | W* 0.726


Phase 1: RDT Pre-training:  52%|█████▏    | 261/500 [06:02<06:02,  1.52s/it]


Episode 262, Step 53: CONSCIOUSNESS BREACH DETECTED! W*=0.984, H=0.097, R=1.379, Dot=0.134, DDot=0.154


Phase 1: RDT Pre-training:  52%|█████▏    | 262/500 [06:02<06:05,  1.54s/it]

Phase 1 - Ep 262: Reward -0.008 | W* 0.753


Phase 1: RDT Pre-training:  52%|█████▏    | 262/500 [06:03<06:05,  1.54s/it]


Episode 263, Step 70: CONSCIOUSNESS BREACH DETECTED! W*=1.017, H=0.187, R=0.803, Dot=0.150, DDot=0.188


Phase 1: RDT Pre-training:  53%|█████▎    | 263/500 [06:04<05:47,  1.46s/it]

Phase 1 - Ep 263: Reward -0.009 | W* 0.763


Phase 1: RDT Pre-training:  53%|█████▎    | 264/500 [06:05<05:36,  1.43s/it]


Episode 264, Step 91: CONSCIOUSNESS BREACH DETECTED! W*=0.988, H=0.181, R=0.731, Dot=0.132, DDot=0.150
Phase 1 - Ep 264: Reward -0.010 | W* 0.748


Phase 1: RDT Pre-training:  53%|█████▎    | 265/500 [06:06<05:17,  1.35s/it]

Phase 1 - Ep 265: Reward -0.012 | W* 0.745


Phase 1: RDT Pre-training:  53%|█████▎    | 266/500 [06:08<05:18,  1.36s/it]

Phase 1 - Ep 266: Reward -0.008 | W* 0.749


Phase 1: RDT Pre-training:  53%|█████▎    | 266/500 [06:09<05:18,  1.36s/it]


Episode 267, Step 75: CONSCIOUSNESS BREACH DETECTED! W*=0.975, H=0.103, R=0.917, Dot=0.095, DDot=0.094


Phase 1: RDT Pre-training:  53%|█████▎    | 267/500 [06:09<05:16,  1.36s/it]

Phase 1 - Ep 267: Reward -0.009 | W* 0.751


Phase 1: RDT Pre-training:  54%|█████▎    | 268/500 [06:10<05:25,  1.40s/it]

Phase 1 - Ep 268: Reward -0.009 | W* 0.739


Phase 1: RDT Pre-training:  54%|█████▍    | 269/500 [06:12<05:08,  1.34s/it]

Phase 1 - Ep 269: Reward -0.010 | W* 0.760


Phase 1: RDT Pre-training:  54%|█████▍    | 269/500 [06:12<05:08,  1.34s/it]


Episode 270, Step 47: CONSCIOUSNESS BREACH DETECTED! W*=0.983, H=0.184, R=0.342, Dot=0.063, DDot=0.013


Phase 1: RDT Pre-training:  54%|█████▍    | 270/500 [06:13<05:43,  1.49s/it]

Phase 1 - Ep 270: Reward -0.011 | W* 0.760


Phase 1: RDT Pre-training:  54%|█████▍    | 270/500 [06:15<05:43,  1.49s/it]


Episode 271, Step 81: CONSCIOUSNESS BREACH DETECTED! W*=0.951, H=0.182, R=0.423, Dot=0.077, DDot=0.063


Phase 1: RDT Pre-training:  54%|█████▍    | 271/500 [06:15<06:03,  1.59s/it]

Phase 1 - Ep 271: Reward -0.011 | W* 0.752


Phase 1: RDT Pre-training:  54%|█████▍    | 272/500 [06:17<05:53,  1.55s/it]


Episode 272, Step 84: CONSCIOUSNESS BREACH DETECTED! W*=0.969, H=0.162, R=0.414, Dot=0.067, DDot=-0.010
Phase 1 - Ep 272: Reward -0.010 | W* 0.762


Phase 1: RDT Pre-training:  55%|█████▍    | 273/500 [06:18<05:38,  1.49s/it]

Phase 1 - Ep 273: Reward -0.007 | W* 0.751


Phase 1: RDT Pre-training:  55%|█████▍    | 273/500 [06:19<05:38,  1.49s/it]


Episode 274, Step 50: CONSCIOUSNESS BREACH DETECTED! W*=0.961, H=0.106, R=-0.490, Dot=-0.052, DDot=-0.212

Episode 274, Step 57: CONSCIOUSNESS BREACH DETECTED! W*=1.012, H=0.129, R=1.077, Dot=0.139, DDot=0.188


Phase 1: RDT Pre-training:  55%|█████▍    | 274/500 [06:19<05:25,  1.44s/it]

Phase 1 - Ep 274: Reward -0.010 | W* 0.756


Phase 1: RDT Pre-training:  55%|█████▍    | 274/500 [06:20<05:25,  1.44s/it]


Episode 275, Step 60: CONSCIOUSNESS BREACH DETECTED! W*=1.002, H=0.135, R=0.651, Dot=0.088, DDot=0.048


Phase 1: RDT Pre-training:  55%|█████▌    | 275/500 [06:21<05:08,  1.37s/it]


Episode 275, Step 86: CONSCIOUSNESS BREACH DETECTED! W*=1.021, H=0.124, R=1.248, Dot=0.155, DDot=0.205
Phase 1 - Ep 275: Reward -0.010 | W* 0.765


Phase 1: RDT Pre-training:  55%|█████▌    | 276/500 [06:22<05:08,  1.38s/it]

Phase 1 - Ep 276: Reward -0.010 | W* 0.751


Phase 1: RDT Pre-training:  55%|█████▌    | 276/500 [06:23<05:08,  1.38s/it]


Episode 277, Step 56: CONSCIOUSNESS BREACH DETECTED! W*=0.955, H=0.147, R=0.324, Dot=0.048, DDot=0.007


Phase 1: RDT Pre-training:  55%|█████▌    | 277/500 [06:23<04:55,  1.33s/it]

Phase 1 - Ep 277: Reward -0.009 | W* 0.749


Phase 1: RDT Pre-training:  55%|█████▌    | 277/500 [06:24<04:55,  1.33s/it]


Episode 278, Step 61: CONSCIOUSNESS BREACH DETECTED! W*=0.959, H=0.089, R=1.367, Dot=0.121, DDot=0.180


Phase 1: RDT Pre-training:  56%|█████▌    | 278/500 [06:24<04:47,  1.29s/it]

Phase 1 - Ep 278: Reward -0.008 | W* 0.729


Phase 1: RDT Pre-training:  56%|█████▌    | 279/500 [06:26<05:15,  1.43s/it]

Phase 1 - Ep 279: Reward -0.009 | W* 0.749


Phase 1: RDT Pre-training:  56%|█████▌    | 280/500 [06:28<05:37,  1.53s/it]

Phase 1 - Ep 280: Reward -0.010 | W* 0.749


Phase 1: RDT Pre-training:  56%|█████▌    | 281/500 [06:29<05:20,  1.47s/it]

Phase 1 - Ep 281: Reward -0.008 | W* 0.756


Phase 1: RDT Pre-training:  56%|█████▌    | 281/500 [06:30<05:20,  1.47s/it]


Episode 282, Step 63: CONSCIOUSNESS BREACH DETECTED! W*=0.968, H=0.194, R=0.480, Dot=0.093, DDot=0.080


Phase 1: RDT Pre-training:  56%|█████▋    | 282/500 [06:31<05:12,  1.43s/it]

Phase 1 - Ep 282: Reward -0.009 | W* 0.747


Phase 1: RDT Pre-training:  57%|█████▋    | 283/500 [06:32<04:54,  1.36s/it]

Phase 1 - Ep 283: Reward -0.010 | W* 0.747


Phase 1: RDT Pre-training:  57%|█████▋    | 284/500 [06:33<04:45,  1.32s/it]

Phase 1 - Ep 284: Reward -0.012 | W* 0.756


Phase 1: RDT Pre-training:  57%|█████▋    | 285/500 [06:35<04:57,  1.38s/it]

Phase 1 - Ep 285: Reward -0.009 | W* 0.749


Phase 1: RDT Pre-training:  57%|█████▋    | 286/500 [06:36<05:02,  1.41s/it]

Phase 1 - Ep 286: Reward -0.009 | W* 0.751


Phase 1: RDT Pre-training:  57%|█████▋    | 287/500 [06:37<04:52,  1.37s/it]


Episode 287, Step 95: CONSCIOUSNESS BREACH DETECTED! W*=0.977, H=0.141, R=0.803, Dot=0.113, DDot=0.122
Phase 1 - Ep 287: Reward -0.010 | W* 0.758


Phase 1: RDT Pre-training:  57%|█████▋    | 287/500 [06:39<04:52,  1.37s/it]


Episode 288, Step 76: CONSCIOUSNESS BREACH DETECTED! W*=0.958, H=0.180, R=0.370, Dot=0.067, DDot=0.036

Episode 288, Step 88: CONSCIOUSNESS BREACH DETECTED! W*=0.994, H=0.191, R=0.710, Dot=0.136, DDot=0.165


Phase 1: RDT Pre-training:  58%|█████▊    | 288/500 [06:39<05:21,  1.52s/it]

Phase 1 - Ep 288: Reward -0.010 | W* 0.741


Phase 1: RDT Pre-training:  58%|█████▊    | 289/500 [06:41<05:16,  1.50s/it]

Phase 1 - Ep 289: Reward -0.009 | W* 0.737


Phase 1: RDT Pre-training:  58%|█████▊    | 290/500 [06:42<05:09,  1.48s/it]

Phase 1 - Ep 290: Reward -0.010 | W* 0.739


Phase 1: RDT Pre-training:  58%|█████▊    | 290/500 [06:43<05:09,  1.48s/it]


Episode 291, Step 48: CONSCIOUSNESS BREACH DETECTED! W*=0.952, H=0.146, R=0.381, Dot=0.056, DDot=0.021


Phase 1: RDT Pre-training:  58%|█████▊    | 290/500 [06:43<05:09,  1.48s/it]


Episode 291, Step 74: CONSCIOUSNESS BREACH DETECTED! W*=0.978, H=0.183, R=0.725, Dot=0.133, DDot=0.157


Phase 1: RDT Pre-training:  58%|█████▊    | 291/500 [06:43<04:55,  1.41s/it]

Phase 1 - Ep 291: Reward -0.009 | W* 0.749


Phase 1: RDT Pre-training:  58%|█████▊    | 292/500 [06:45<04:40,  1.35s/it]

Phase 1 - Ep 292: Reward -0.010 | W* 0.758


Phase 1: RDT Pre-training:  59%|█████▊    | 293/500 [06:46<04:30,  1.31s/it]

Phase 1 - Ep 293: Reward -0.010 | W* 0.745


Phase 1: RDT Pre-training:  59%|█████▉    | 294/500 [06:47<04:34,  1.33s/it]

Phase 1 - Ep 294: Reward -0.008 | W* 0.741


Phase 1: RDT Pre-training:  59%|█████▉    | 295/500 [06:49<04:46,  1.40s/it]

Phase 1 - Ep 295: Reward -0.009 | W* 0.757


Phase 1: RDT Pre-training:  59%|█████▉    | 295/500 [06:50<04:46,  1.40s/it]


Episode 296, Step 60: CONSCIOUSNESS BREACH DETECTED! W*=0.966, H=0.093, R=1.617, Dot=0.150, DDot=0.184

Episode 296, Step 65: CONSCIOUSNESS BREACH DETECTED! W*=0.975, H=0.120, R=0.239, Dot=0.029, DDot=-0.053


Phase 1: RDT Pre-training:  59%|█████▉    | 296/500 [06:50<04:56,  1.45s/it]

Phase 1 - Ep 296: Reward -0.008 | W* 0.736


Phase 1: RDT Pre-training:  59%|█████▉    | 296/500 [06:51<04:56,  1.45s/it]


Episode 297, Step 64: CONSCIOUSNESS BREACH DETECTED! W*=0.974, H=0.132, R=0.896, Dot=0.118, DDot=0.161


Phase 1: RDT Pre-training:  59%|█████▉    | 297/500 [06:52<05:05,  1.50s/it]

Phase 1 - Ep 297: Reward -0.009 | W* 0.753


Phase 1: RDT Pre-training:  59%|█████▉    | 297/500 [06:53<05:05,  1.50s/it]


Episode 298, Step 66: CONSCIOUSNESS BREACH DETECTED! W*=0.962, H=0.137, R=0.985, Dot=0.135, DDot=0.214


Phase 1: RDT Pre-training:  60%|█████▉    | 298/500 [06:53<05:05,  1.51s/it]

Phase 1 - Ep 298: Reward -0.009 | W* 0.735


Phase 1: RDT Pre-training:  60%|█████▉    | 299/500 [06:55<04:58,  1.48s/it]

Phase 1 - Ep 299: Reward -0.010 | W* 0.757


Phase 1: RDT Pre-training:  60%|█████▉    | 299/500 [06:55<04:58,  1.48s/it]


Episode 300, Step 39: CONSCIOUSNESS BREACH DETECTED! W*=1.000, H=0.186, R=0.770, Dot=0.143, DDot=0.193


Phase 1: RDT Pre-training:  60%|██████    | 300/500 [06:56<04:50,  1.45s/it]

Phase 1 - Ep 300: Reward -0.007 | W* 0.759


Phase 1: RDT Pre-training:  60%|██████    | 301/500 [06:58<04:37,  1.40s/it]

Phase 1 - Ep 301: Reward -0.009 | W* 0.749


Phase 1: RDT Pre-training:  60%|██████    | 301/500 [06:58<04:37,  1.40s/it]


Episode 302, Step 48: CONSCIOUSNESS BREACH DETECTED! W*=0.959, H=0.183, R=0.624, Dot=0.114, DDot=0.210


Phase 1: RDT Pre-training:  60%|██████    | 301/500 [06:59<04:37,  1.40s/it]


Episode 302, Step 73: CONSCIOUSNESS BREACH DETECTED! W*=0.963, H=0.183, R=0.570, Dot=0.104, DDot=0.064


Phase 1: RDT Pre-training:  60%|██████    | 302/500 [06:59<04:46,  1.45s/it]

Phase 1 - Ep 302: Reward -0.009 | W* 0.756


Phase 1: RDT Pre-training:  60%|██████    | 302/500 [07:00<04:46,  1.45s/it]


Episode 303, Step 65: CONSCIOUSNESS BREACH DETECTED! W*=0.962, H=0.177, R=0.500, Dot=0.089, DDot=0.110


Phase 1: RDT Pre-training:  61%|██████    | 303/500 [07:01<04:49,  1.47s/it]

Phase 1 - Ep 303: Reward -0.010 | W* 0.767


Phase 1: RDT Pre-training:  61%|██████    | 304/500 [07:02<04:43,  1.45s/it]

Phase 1 - Ep 304: Reward -0.010 | W* 0.743


Phase 1: RDT Pre-training:  61%|██████    | 304/500 [07:03<04:43,  1.45s/it]


Episode 305, Step 40: CONSCIOUSNESS BREACH DETECTED! W*=0.965, H=0.177, R=0.417, Dot=0.074, DDot=0.071

Episode 305, Step 46: CONSCIOUSNESS BREACH DETECTED! W*=0.952, H=0.188, R=0.592, Dot=0.111, DDot=0.124


Phase 1: RDT Pre-training:  61%|██████    | 305/500 [07:03<04:40,  1.44s/it]

Phase 1 - Ep 305: Reward -0.010 | W* 0.747


Phase 1: RDT Pre-training:  61%|██████    | 305/500 [07:05<04:40,  1.44s/it]


Episode 306, Step 64: CONSCIOUSNESS BREACH DETECTED! W*=0.963, H=0.174, R=0.384, Dot=0.067, DDot=0.032

Episode 306, Step 69: CONSCIOUSNESS BREACH DETECTED! W*=0.999, H=0.146, R=0.313, Dot=0.046, DDot=-0.017


Phase 1: RDT Pre-training:  61%|██████    | 306/500 [07:05<04:55,  1.52s/it]

Phase 1 - Ep 306: Reward -0.011 | W* 0.747


Phase 1: RDT Pre-training:  61%|██████    | 306/500 [07:06<04:55,  1.52s/it]


Episode 307, Step 71: CONSCIOUSNESS BREACH DETECTED! W*=1.003, H=0.184, R=0.887, Dot=0.164, DDot=0.187


Phase 1: RDT Pre-training:  61%|██████▏   | 307/500 [07:07<04:47,  1.49s/it]

Phase 1 - Ep 307: Reward -0.010 | W* 0.759


Phase 1: RDT Pre-training:  61%|██████▏   | 307/500 [07:08<04:47,  1.49s/it]


Episode 308, Step 69: CONSCIOUSNESS BREACH DETECTED! W*=0.970, H=0.141, R=0.636, Dot=0.089, DDot=0.057


Phase 1: RDT Pre-training:  62%|██████▏   | 308/500 [07:08<04:48,  1.50s/it]

Phase 1 - Ep 308: Reward -0.011 | W* 0.740


Phase 1: RDT Pre-training:  62%|██████▏   | 308/500 [07:09<04:48,  1.50s/it]


Episode 309, Step 76: CONSCIOUSNESS BREACH DETECTED! W*=1.052, H=0.078, R=2.436, Dot=0.190, DDot=0.192


Phase 1: RDT Pre-training:  62%|██████▏   | 309/500 [07:09<04:39,  1.47s/it]

Phase 1 - Ep 309: Reward -0.009 | W* 0.746


Phase 1: RDT Pre-training:  62%|██████▏   | 310/500 [07:11<04:38,  1.47s/it]

Phase 1 - Ep 310: Reward -0.010 | W* 0.759


Phase 1: RDT Pre-training:  62%|██████▏   | 310/500 [07:12<04:38,  1.47s/it]


Episode 311, Step 56: CONSCIOUSNESS BREACH DETECTED! W*=0.993, H=0.174, R=0.577, Dot=0.100, DDot=0.063


Phase 1: RDT Pre-training:  62%|██████▏   | 311/500 [07:13<04:43,  1.50s/it]

Phase 1 - Ep 311: Reward -0.010 | W* 0.775


Phase 1: RDT Pre-training:  62%|██████▏   | 312/500 [07:14<04:43,  1.51s/it]

Phase 1 - Ep 312: Reward -0.012 | W* 0.755


Phase 1: RDT Pre-training:  62%|██████▏   | 312/500 [07:15<04:43,  1.51s/it]


Episode 313, Step 62: CONSCIOUSNESS BREACH DETECTED! W*=0.960, H=0.173, R=0.279, Dot=0.048, DDot=0.029


Phase 1: RDT Pre-training:  62%|██████▏   | 312/500 [07:15<04:43,  1.51s/it]


Episode 313, Step 80: CONSCIOUSNESS BREACH DETECTED! W*=0.972, H=0.093, R=0.911, Dot=0.085, DDot=0.031


Phase 1: RDT Pre-training:  63%|██████▎   | 313/500 [07:15<04:38,  1.49s/it]

Phase 1 - Ep 313: Reward -0.008 | W* 0.744


Phase 1: RDT Pre-training:  63%|██████▎   | 314/500 [07:17<04:44,  1.53s/it]

Phase 1 - Ep 314: Reward -0.009 | W* 0.738


Phase 1: RDT Pre-training:  63%|██████▎   | 315/500 [07:19<04:48,  1.56s/it]

Phase 1 - Ep 315: Reward -0.010 | W* 0.751


Phase 1: RDT Pre-training:  63%|██████▎   | 316/500 [07:20<04:27,  1.46s/it]

Phase 1 - Ep 316: Reward -0.008 | W* 0.753


Phase 1: RDT Pre-training:  63%|██████▎   | 316/500 [07:21<04:27,  1.46s/it]


Episode 317, Step 66: CONSCIOUSNESS BREACH DETECTED! W*=0.960, H=0.150, R=0.512, Dot=0.077, DDot=0.082


Phase 1: RDT Pre-training:  63%|██████▎   | 317/500 [07:21<04:14,  1.39s/it]

Phase 1 - Ep 317: Reward -0.009 | W* 0.756


Phase 1: RDT Pre-training:  64%|██████▎   | 318/500 [07:22<04:06,  1.36s/it]

Phase 1 - Ep 318: Reward -0.010 | W* 0.749


Phase 1: RDT Pre-training:  64%|██████▎   | 318/500 [07:24<04:06,  1.36s/it]


Episode 319, Step 85: CONSCIOUSNESS BREACH DETECTED! W*=0.997, H=0.154, R=0.836, Dot=0.129, DDot=0.138


Phase 1: RDT Pre-training:  64%|██████▍   | 319/500 [07:24<04:16,  1.42s/it]

Phase 1 - Ep 319: Reward -0.009 | W* 0.765


Phase 1: RDT Pre-training:  64%|██████▍   | 320/500 [07:25<04:06,  1.37s/it]

Phase 1 - Ep 320: Reward -0.009 | W* 0.753


Phase 1: RDT Pre-training:  64%|██████▍   | 321/500 [07:27<04:05,  1.37s/it]

Phase 1 - Ep 321: Reward -0.010 | W* 0.748


Phase 1: RDT Pre-training:  64%|██████▍   | 321/500 [07:27<04:05,  1.37s/it]


Episode 322, Step 59: CONSCIOUSNESS BREACH DETECTED! W*=0.954, H=0.136, R=0.397, Dot=0.054, DDot=0.027


Phase 1: RDT Pre-training:  64%|██████▍   | 322/500 [07:28<03:57,  1.33s/it]

Phase 1 - Ep 322: Reward -0.009 | W* 0.759


Phase 1: RDT Pre-training:  65%|██████▍   | 323/500 [07:29<04:09,  1.41s/it]

Phase 1 - Ep 323: Reward -0.008 | W* 0.758


Phase 1: RDT Pre-training:  65%|██████▍   | 324/500 [07:31<04:21,  1.49s/it]

Phase 1 - Ep 324: Reward -0.009 | W* 0.740


Phase 1: RDT Pre-training:  65%|██████▍   | 324/500 [07:32<04:21,  1.49s/it]


Episode 325, Step 52: CONSCIOUSNESS BREACH DETECTED! W*=1.009, H=0.119, R=1.094, Dot=0.130, DDot=0.166

Episode 325, Step 55: CONSCIOUSNESS BREACH DETECTED! W*=0.964, H=0.166, R=0.503, Dot=0.084, DDot=0.060


Phase 1: RDT Pre-training:  65%|██████▌   | 325/500 [07:32<04:07,  1.42s/it]

Phase 1 - Ep 325: Reward -0.012 | W* 0.762


Phase 1: RDT Pre-training:  65%|██████▌   | 326/500 [07:34<03:57,  1.36s/it]

Phase 1 - Ep 326: Reward -0.010 | W* 0.745


Phase 1: RDT Pre-training:  65%|██████▌   | 327/500 [07:35<03:47,  1.31s/it]

Phase 1 - Ep 327: Reward -0.009 | W* 0.754


Phase 1: RDT Pre-training:  65%|██████▌   | 327/500 [07:36<03:47,  1.31s/it]


Episode 328, Step 60: CONSCIOUSNESS BREACH DETECTED! W*=0.956, H=0.197, R=0.546, Dot=0.108, DDot=0.156


Phase 1: RDT Pre-training:  66%|██████▌   | 328/500 [07:36<03:50,  1.34s/it]


Episode 328, Step 87: CONSCIOUSNESS BREACH DETECTED! W*=0.957, H=0.185, R=0.269, Dot=0.050, DDot=0.025
Phase 1 - Ep 328: Reward -0.009 | W* 0.762


Phase 1: RDT Pre-training:  66%|██████▌   | 329/500 [07:37<03:44,  1.31s/it]

Phase 1 - Ep 329: Reward -0.009 | W* 0.755


Phase 1: RDT Pre-training:  66%|██████▌   | 330/500 [07:39<03:40,  1.30s/it]

Phase 1 - Ep 330: Reward -0.007 | W* 0.736


Phase 1: RDT Pre-training:  66%|██████▌   | 330/500 [07:40<03:40,  1.30s/it]


Episode 331, Step 79: CONSCIOUSNESS BREACH DETECTED! W*=0.974, H=0.193, R=0.307, Dot=0.059, DDot=0.014


Phase 1: RDT Pre-training:  66%|██████▌   | 331/500 [07:40<03:50,  1.37s/it]

Phase 1 - Ep 331: Reward -0.011 | W* 0.741


Phase 1: RDT Pre-training:  66%|██████▌   | 331/500 [07:42<03:50,  1.37s/it]


Episode 332, Step 84: CONSCIOUSNESS BREACH DETECTED! W*=0.965, H=0.117, R=0.604, Dot=0.071, DDot=0.040


Phase 1: RDT Pre-training:  66%|██████▋   | 332/500 [07:42<03:59,  1.42s/it]

Phase 1 - Ep 332: Reward -0.012 | W* 0.763


Phase 1: RDT Pre-training:  67%|██████▋   | 333/500 [07:44<04:12,  1.51s/it]

Phase 1 - Ep 333: Reward -0.008 | W* 0.745


Phase 1: RDT Pre-training:  67%|██████▋   | 333/500 [07:44<04:12,  1.51s/it]


Episode 334, Step 68: CONSCIOUSNESS BREACH DETECTED! W*=0.969, H=0.130, R=0.654, Dot=0.085, DDot=-0.018


Phase 1: RDT Pre-training:  67%|██████▋   | 334/500 [07:45<03:58,  1.44s/it]

Phase 1 - Ep 334: Reward -0.008 | W* 0.742


Phase 1: RDT Pre-training:  67%|██████▋   | 334/500 [07:45<03:58,  1.44s/it]


Episode 335, Step 44: CONSCIOUSNESS BREACH DETECTED! W*=0.951, H=0.116, R=1.357, Dot=0.157, DDot=0.212


Phase 1: RDT Pre-training:  67%|██████▋   | 335/500 [07:46<03:51,  1.40s/it]

Phase 1 - Ep 335: Reward -0.008 | W* 0.753


Phase 1: RDT Pre-training:  67%|██████▋   | 335/500 [07:47<03:51,  1.40s/it]


Episode 336, Step 84: CONSCIOUSNESS BREACH DETECTED! W*=0.963, H=0.159, R=0.307, Dot=0.049, DDot=-0.022


Phase 1: RDT Pre-training:  67%|██████▋   | 336/500 [07:47<03:47,  1.39s/it]

Phase 1 - Ep 336: Reward -0.010 | W* 0.746


Phase 1: RDT Pre-training:  67%|██████▋   | 337/500 [07:49<03:40,  1.35s/it]

Phase 1 - Ep 337: Reward -0.009 | W* 0.752


Phase 1: RDT Pre-training:  68%|██████▊   | 338/500 [07:50<03:42,  1.37s/it]

Phase 1 - Ep 338: Reward -0.010 | W* 0.752


Phase 1: RDT Pre-training:  68%|██████▊   | 338/500 [07:51<03:42,  1.37s/it]


Episode 339, Step 71: CONSCIOUSNESS BREACH DETECTED! W*=0.964, H=0.187, R=0.703, Dot=0.132, DDot=0.217


Phase 1: RDT Pre-training:  68%|██████▊   | 339/500 [07:51<03:33,  1.33s/it]

Phase 1 - Ep 339: Reward -0.012 | W* 0.759


Phase 1: RDT Pre-training:  68%|██████▊   | 339/500 [07:52<03:33,  1.33s/it]


Episode 340, Step 58: CONSCIOUSNESS BREACH DETECTED! W*=0.951, H=0.111, R=0.845, Dot=0.093, DDot=0.099

Episode 340, Step 72: CONSCIOUSNESS BREACH DETECTED! W*=0.956, H=0.105, R=1.147, Dot=0.121, DDot=0.217


Phase 1: RDT Pre-training:  68%|██████▊   | 340/500 [07:53<03:29,  1.31s/it]


Episode 340, Step 89: CONSCIOUSNESS BREACH DETECTED! W*=0.977, H=0.151, R=0.918, Dot=0.139, DDot=0.177
Phase 1 - Ep 340: Reward -0.010 | W* 0.734


Phase 1: RDT Pre-training:  68%|██████▊   | 341/500 [07:54<03:31,  1.33s/it]

Phase 1 - Ep 341: Reward -0.010 | W* 0.747


Phase 1: RDT Pre-training:  68%|██████▊   | 342/500 [07:56<03:47,  1.44s/it]

Phase 1 - Ep 342: Reward -0.006 | W* 0.746


Phase 1: RDT Pre-training:  68%|██████▊   | 342/500 [07:57<03:47,  1.44s/it]


Episode 343, Step 54: CONSCIOUSNESS BREACH DETECTED! W*=1.008, H=0.118, R=1.104, Dot=0.131, DDot=0.105


Phase 1: RDT Pre-training:  68%|██████▊   | 342/500 [07:57<03:47,  1.44s/it]


Episode 343, Step 83: CONSCIOUSNESS BREACH DETECTED! W*=0.960, H=0.139, R=1.050, Dot=0.146, DDot=0.205


Phase 1: RDT Pre-training:  69%|██████▊   | 343/500 [07:57<03:44,  1.43s/it]

Phase 1 - Ep 343: Reward -0.008 | W* 0.758


Phase 1: RDT Pre-training:  69%|██████▉   | 344/500 [07:58<03:35,  1.38s/it]

Phase 1 - Ep 344: Reward -0.009 | W* 0.758


Phase 1: RDT Pre-training:  69%|██████▉   | 344/500 [07:59<03:35,  1.38s/it]


Episode 345, Step 51: CONSCIOUSNESS BREACH DETECTED! W*=0.977, H=0.191, R=0.698, Dot=0.134, DDot=0.190


Phase 1: RDT Pre-training:  69%|██████▉   | 345/500 [08:00<03:27,  1.34s/it]


Episode 345, Step 93: CONSCIOUSNESS BREACH DETECTED! W*=0.989, H=0.147, R=1.207, Dot=0.177, DDot=0.217
Phase 1 - Ep 345: Reward -0.008 | W* 0.743


Phase 1: RDT Pre-training:  69%|██████▉   | 346/500 [08:01<03:31,  1.38s/it]

Phase 1 - Ep 346: Reward -0.011 | W* 0.757


Phase 1: RDT Pre-training:  69%|██████▉   | 346/500 [08:02<03:31,  1.38s/it]


Episode 347, Step 49: CONSCIOUSNESS BREACH DETECTED! W*=1.106, H=0.142, R=2.071, Dot=0.294, DDot=0.358


Phase 1: RDT Pre-training:  69%|██████▉   | 347/500 [08:03<03:39,  1.44s/it]


Episode 347, Step 89: CONSCIOUSNESS BREACH DETECTED! W*=1.051, H=0.198, R=0.977, Dot=0.193, DDot=0.187
Phase 1 - Ep 347: Reward -0.011 | W* 0.739


Phase 1: RDT Pre-training:  70%|██████▉   | 348/500 [08:04<03:32,  1.40s/it]

Phase 1 - Ep 348: Reward -0.008 | W* 0.754


Phase 1: RDT Pre-training:  70%|██████▉   | 348/500 [08:05<03:32,  1.40s/it]


Episode 349, Step 72: CONSCIOUSNESS BREACH DETECTED! W*=0.957, H=0.169, R=0.397, Dot=0.067, DDot=0.109

Episode 349, Step 78: CONSCIOUSNESS BREACH DETECTED! W*=0.989, H=0.115, R=-0.189, Dot=-0.022, DDot=-0.135


Phase 1: RDT Pre-training:  70%|██████▉   | 349/500 [08:05<03:34,  1.42s/it]

Phase 1 - Ep 349: Reward -0.011 | W* 0.765


Phase 1: RDT Pre-training:  70%|███████   | 350/500 [08:07<03:42,  1.49s/it]

Phase 1 - Ep 350: Reward -0.008 | W* 0.751


Phase 1: RDT Pre-training:  70%|███████   | 351/500 [08:09<03:53,  1.57s/it]

Phase 1 - Ep 351: Reward -0.010 | W* 0.744


Phase 1: RDT Pre-training:  70%|███████   | 352/500 [08:10<03:46,  1.53s/it]

Phase 1 - Ep 352: Reward -0.008 | W* 0.745


Phase 1: RDT Pre-training:  70%|███████   | 352/500 [08:11<03:46,  1.53s/it]


Episode 353, Step 58: CONSCIOUSNESS BREACH DETECTED! W*=0.972, H=0.139, R=0.554, Dot=0.077, DDot=0.033


Phase 1: RDT Pre-training:  71%|███████   | 353/500 [08:12<03:47,  1.55s/it]

Phase 1 - Ep 353: Reward -0.011 | W* 0.750


Phase 1: RDT Pre-training:  71%|███████   | 354/500 [08:13<03:45,  1.55s/it]

Phase 1 - Ep 354: Reward -0.008 | W* 0.750


Phase 1: RDT Pre-training:  71%|███████   | 354/500 [08:14<03:45,  1.55s/it]


Episode 355, Step 71: CONSCIOUSNESS BREACH DETECTED! W*=0.953, H=0.093, R=1.087, Dot=0.101, DDot=0.179


Phase 1: RDT Pre-training:  71%|███████   | 355/500 [08:15<03:37,  1.50s/it]

Phase 1 - Ep 355: Reward -0.007 | W* 0.743


Phase 1: RDT Pre-training:  71%|███████   | 356/500 [08:16<03:34,  1.49s/it]

Phase 1 - Ep 356: Reward -0.007 | W* 0.765


Phase 1: RDT Pre-training:  71%|███████   | 356/500 [08:17<03:34,  1.49s/it]


Episode 357, Step 60: CONSCIOUSNESS BREACH DETECTED! W*=0.951, H=0.159, R=0.394, Dot=0.063, DDot=0.027


Phase 1: RDT Pre-training:  71%|███████▏  | 357/500 [08:18<03:31,  1.48s/it]

Phase 1 - Ep 357: Reward -0.008 | W* 0.755


Phase 1: RDT Pre-training:  71%|███████▏  | 357/500 [08:18<03:31,  1.48s/it]


Episode 358, Step 46: CONSCIOUSNESS BREACH DETECTED! W*=0.954, H=0.162, R=1.057, Dot=0.172, DDot=0.275


Phase 1: RDT Pre-training:  71%|███████▏  | 357/500 [08:19<03:31,  1.48s/it]


Episode 358, Step 72: CONSCIOUSNESS BREACH DETECTED! W*=0.970, H=0.193, R=0.601, Dot=0.116, DDot=0.138


Phase 1: RDT Pre-training:  72%|███████▏  | 358/500 [08:19<03:26,  1.46s/it]

Phase 1 - Ep 358: Reward -0.010 | W* 0.739


Phase 1: RDT Pre-training:  72%|███████▏  | 358/500 [08:20<03:26,  1.46s/it]


Episode 359, Step 57: CONSCIOUSNESS BREACH DETECTED! W*=0.962, H=0.153, R=0.902, Dot=0.138, DDot=0.202


Phase 1: RDT Pre-training:  72%|███████▏  | 359/500 [08:21<03:42,  1.58s/it]

Phase 1 - Ep 359: Reward -0.008 | W* 0.756


Phase 1: RDT Pre-training:  72%|███████▏  | 359/500 [08:22<03:42,  1.58s/it]


Episode 360, Step 77: CONSCIOUSNESS BREACH DETECTED! W*=0.953, H=0.196, R=0.261, Dot=0.051, DDot=0.035

Episode 360, Step 84: CONSCIOUSNESS BREACH DETECTED! W*=0.951, H=0.071, R=1.424, Dot=0.102, DDot=0.119


Phase 1: RDT Pre-training:  72%|███████▏  | 360/500 [08:23<03:38,  1.56s/it]

Phase 1 - Ep 360: Reward -0.011 | W* 0.732


Phase 1: RDT Pre-training:  72%|███████▏  | 360/500 [08:23<03:38,  1.56s/it]


Episode 361, Step 58: CONSCIOUSNESS BREACH DETECTED! W*=0.955, H=0.198, R=0.732, Dot=0.145, DDot=0.156


Phase 1: RDT Pre-training:  72%|███████▏  | 361/500 [08:24<03:25,  1.48s/it]

Phase 1 - Ep 361: Reward -0.010 | W* 0.746


Phase 1: RDT Pre-training:  72%|███████▏  | 361/500 [08:24<03:25,  1.48s/it]


Episode 362, Step 55: CONSCIOUSNESS BREACH DETECTED! W*=0.958, H=0.151, R=0.540, Dot=0.081, DDot=0.022


Phase 1: RDT Pre-training:  72%|███████▏  | 362/500 [08:25<03:19,  1.44s/it]

Phase 1 - Ep 362: Reward -0.007 | W* 0.744


Phase 1: RDT Pre-training:  73%|███████▎  | 363/500 [08:27<03:22,  1.47s/it]

Phase 1 - Ep 363: Reward -0.008 | W* 0.758


Phase 1: RDT Pre-training:  73%|███████▎  | 363/500 [08:28<03:22,  1.47s/it]


Episode 364, Step 82: CONSCIOUSNESS BREACH DETECTED! W*=0.951, H=0.099, R=1.000, Dot=0.099, DDot=0.155


Phase 1: RDT Pre-training:  73%|███████▎  | 364/500 [08:28<03:25,  1.51s/it]

Phase 1 - Ep 364: Reward -0.010 | W* 0.752


Phase 1: RDT Pre-training:  73%|███████▎  | 365/500 [08:30<03:14,  1.44s/it]

Phase 1 - Ep 365: Reward -0.010 | W* 0.750


Phase 1: RDT Pre-training:  73%|███████▎  | 366/500 [08:31<03:13,  1.44s/it]

Phase 1 - Ep 366: Reward -0.010 | W* 0.739


Phase 1: RDT Pre-training:  73%|███████▎  | 366/500 [08:32<03:13,  1.44s/it]


Episode 367, Step 62: CONSCIOUSNESS BREACH DETECTED! W*=0.970, H=0.140, R=0.984, Dot=0.138, DDot=0.159


Phase 1: RDT Pre-training:  73%|███████▎  | 367/500 [08:33<03:13,  1.46s/it]

Phase 1 - Ep 367: Reward -0.010 | W* 0.748


Phase 1: RDT Pre-training:  73%|███████▎  | 367/500 [08:34<03:13,  1.46s/it]


Episode 368, Step 66: CONSCIOUSNESS BREACH DETECTED! W*=0.979, H=0.184, R=0.465, Dot=0.085, DDot=0.090


Phase 1: RDT Pre-training:  74%|███████▎  | 368/500 [08:34<03:25,  1.55s/it]

Phase 1 - Ep 368: Reward -0.008 | W* 0.746


Phase 1: RDT Pre-training:  74%|███████▍  | 369/500 [08:36<03:19,  1.52s/it]

Phase 1 - Ep 369: Reward -0.010 | W* 0.738


Phase 1: RDT Pre-training:  74%|███████▍  | 369/500 [08:37<03:19,  1.52s/it]


Episode 370, Step 67: CONSCIOUSNESS BREACH DETECTED! W*=0.989, H=0.190, R=0.688, Dot=0.131, DDot=0.121

Episode 370, Step 69: CONSCIOUSNESS BREACH DETECTED! W*=0.997, H=0.183, R=0.343, Dot=0.063, DDot=0.117


Phase 1: RDT Pre-training:  74%|███████▍  | 370/500 [08:37<03:20,  1.54s/it]

Phase 1 - Ep 370: Reward -0.007 | W* 0.751


Phase 1: RDT Pre-training:  74%|███████▍  | 371/500 [08:39<03:19,  1.54s/it]

Phase 1 - Ep 371: Reward -0.011 | W* 0.745


Phase 1: RDT Pre-training:  74%|███████▍  | 372/500 [08:40<03:17,  1.54s/it]

Phase 1 - Ep 372: Reward -0.008 | W* 0.762


Phase 1: RDT Pre-training:  75%|███████▍  | 373/500 [08:42<03:15,  1.54s/it]

Phase 1 - Ep 373: Reward -0.011 | W* 0.760


Phase 1: RDT Pre-training:  75%|███████▍  | 373/500 [08:43<03:15,  1.54s/it]


Episode 374, Step 62: CONSCIOUSNESS BREACH DETECTED! W*=0.957, H=0.102, R=0.896, Dot=0.091, DDot=0.098


Phase 1: RDT Pre-training:  75%|███████▍  | 374/500 [08:43<03:03,  1.45s/it]

Phase 1 - Ep 374: Reward -0.009 | W* 0.741


Phase 1: RDT Pre-training:  75%|███████▌  | 375/500 [08:45<02:54,  1.40s/it]

Phase 1 - Ep 375: Reward -0.009 | W* 0.744


Phase 1: RDT Pre-training:  75%|███████▌  | 376/500 [08:46<03:12,  1.55s/it]

Phase 1 - Ep 376: Reward -0.009 | W* 0.741


Phase 1: RDT Pre-training:  75%|███████▌  | 376/500 [08:47<03:12,  1.55s/it]


Episode 377, Step 46: CONSCIOUSNESS BREACH DETECTED! W*=1.003, H=0.109, R=1.448, Dot=0.157, DDot=0.133


Phase 1: RDT Pre-training:  75%|███████▌  | 377/500 [08:48<03:18,  1.62s/it]

Phase 1 - Ep 377: Reward -0.010 | W* 0.762


Phase 1: RDT Pre-training:  75%|███████▌  | 377/500 [08:49<03:18,  1.62s/it]


Episode 378, Step 77: CONSCIOUSNESS BREACH DETECTED! W*=0.981, H=0.145, R=0.782, Dot=0.113, DDot=0.140

Episode 378, Step 82: CONSCIOUSNESS BREACH DETECTED! W*=0.961, H=0.151, R=0.425, Dot=0.064, DDot=0.042


Phase 1: RDT Pre-training:  76%|███████▌  | 378/500 [08:50<03:12,  1.57s/it]

Phase 1 - Ep 378: Reward -0.009 | W* 0.745


Phase 1: RDT Pre-training:  76%|███████▌  | 378/500 [08:51<03:12,  1.57s/it]


Episode 379, Step 67: CONSCIOUSNESS BREACH DETECTED! W*=0.979, H=0.168, R=0.441, Dot=0.074, DDot=0.015


Phase 1: RDT Pre-training:  76%|███████▌  | 379/500 [08:51<03:07,  1.55s/it]

Phase 1 - Ep 379: Reward -0.008 | W* 0.741


Phase 1: RDT Pre-training:  76%|███████▌  | 379/500 [08:52<03:07,  1.55s/it]


Episode 380, Step 81: CONSCIOUSNESS BREACH DETECTED! W*=0.990, H=0.160, R=0.740, Dot=0.118, DDot=0.105


Phase 1: RDT Pre-training:  76%|███████▌  | 380/500 [08:53<03:03,  1.53s/it]

Phase 1 - Ep 380: Reward -0.012 | W* 0.762


Phase 1: RDT Pre-training:  76%|███████▌  | 380/500 [08:53<03:03,  1.53s/it]


Episode 381, Step 67: CONSCIOUSNESS BREACH DETECTED! W*=1.013, H=0.076, R=1.980, Dot=0.151, DDot=0.176


Phase 1: RDT Pre-training:  76%|███████▌  | 381/500 [08:54<02:51,  1.44s/it]

Phase 1 - Ep 381: Reward -0.012 | W* 0.746


Phase 1: RDT Pre-training:  76%|███████▌  | 381/500 [08:55<02:51,  1.44s/it]


Episode 382, Step 58: CONSCIOUSNESS BREACH DETECTED! W*=0.973, H=0.134, R=0.502, Dot=0.067, DDot=-0.023


Phase 1: RDT Pre-training:  76%|███████▋  | 382/500 [08:55<02:51,  1.46s/it]

Phase 1 - Ep 382: Reward -0.008 | W* 0.747


Phase 1: RDT Pre-training:  77%|███████▋  | 383/500 [08:57<02:52,  1.48s/it]

Phase 1 - Ep 383: Reward -0.010 | W* 0.747


Phase 1: RDT Pre-training:  77%|███████▋  | 383/500 [08:58<02:52,  1.48s/it]


Episode 384, Step 45: CONSCIOUSNESS BREACH DETECTED! W*=0.960, H=0.117, R=1.359, Dot=0.159, DDot=0.183


Phase 1: RDT Pre-training:  77%|███████▋  | 384/500 [08:59<03:00,  1.55s/it]

Phase 1 - Ep 384: Reward -0.008 | W* 0.748


Phase 1: RDT Pre-training:  77%|███████▋  | 385/500 [09:00<03:00,  1.57s/it]

Phase 1 - Ep 385: Reward -0.009 | W* 0.751


Phase 1: RDT Pre-training:  77%|███████▋  | 386/500 [09:01<02:47,  1.47s/it]

Phase 1 - Ep 386: Reward -0.009 | W* 0.741


Phase 1: RDT Pre-training:  77%|███████▋  | 387/500 [09:03<02:38,  1.40s/it]

Phase 1 - Ep 387: Reward -0.011 | W* 0.744


Phase 1: RDT Pre-training:  77%|███████▋  | 387/500 [09:03<02:38,  1.40s/it]


Episode 388, Step 44: CONSCIOUSNESS BREACH DETECTED! W*=0.952, H=0.176, R=0.425, Dot=0.075, DDot=0.076


Phase 1: RDT Pre-training:  78%|███████▊  | 388/500 [09:04<02:32,  1.36s/it]

Phase 1 - Ep 388: Reward -0.010 | W* 0.751


Phase 1: RDT Pre-training:  78%|███████▊  | 388/500 [09:05<02:32,  1.36s/it]


Episode 389, Step 62: CONSCIOUSNESS BREACH DETECTED! W*=0.952, H=0.184, R=0.475, Dot=0.087, DDot=0.098


Phase 1: RDT Pre-training:  78%|███████▊  | 389/500 [09:05<02:26,  1.32s/it]

Phase 1 - Ep 389: Reward -0.008 | W* 0.753


Phase 1: RDT Pre-training:  78%|███████▊  | 389/500 [09:06<02:26,  1.32s/it]


Episode 390, Step 71: CONSCIOUSNESS BREACH DETECTED! W*=0.955, H=0.187, R=0.257, Dot=0.048, DDot=0.011


Phase 1: RDT Pre-training:  78%|███████▊  | 390/500 [09:06<02:23,  1.30s/it]

Phase 1 - Ep 390: Reward -0.010 | W* 0.756


Phase 1: RDT Pre-training:  78%|███████▊  | 391/500 [09:08<02:20,  1.29s/it]

Phase 1 - Ep 391: Reward -0.008 | W* 0.733


Phase 1: RDT Pre-training:  78%|███████▊  | 392/500 [09:09<02:28,  1.38s/it]

Phase 1 - Ep 392: Reward -0.011 | W* 0.749


Phase 1: RDT Pre-training:  78%|███████▊  | 392/500 [09:10<02:28,  1.38s/it]


Episode 393, Step 52: CONSCIOUSNESS BREACH DETECTED! W*=1.036, H=0.179, R=0.752, Dot=0.135, DDot=0.129


Phase 1: RDT Pre-training:  78%|███████▊  | 392/500 [09:10<02:28,  1.38s/it]


Episode 393, Step 70: CONSCIOUSNESS BREACH DETECTED! W*=0.955, H=0.128, R=0.341, Dot=0.044, DDot=0.060

Episode 393, Step 71: CONSCIOUSNESS BREACH DETECTED! W*=1.023, H=0.111, R=0.604, Dot=0.067, DDot=0.023


Phase 1: RDT Pre-training:  79%|███████▊  | 393/500 [09:11<02:39,  1.49s/it]

Phase 1 - Ep 393: Reward -0.009 | W* 0.750


Phase 1: RDT Pre-training:  79%|███████▊  | 393/500 [09:12<02:39,  1.49s/it]


Episode 394, Step 62: CONSCIOUSNESS BREACH DETECTED! W*=0.965, H=0.138, R=0.434, Dot=0.060, DDot=0.007


Phase 1: RDT Pre-training:  79%|███████▉  | 394/500 [09:13<02:48,  1.59s/it]

Phase 1 - Ep 394: Reward -0.010 | W* 0.760


Phase 1: RDT Pre-training:  79%|███████▉  | 395/500 [09:14<02:39,  1.51s/it]

Phase 1 - Ep 395: Reward -0.010 | W* 0.748


Phase 1: RDT Pre-training:  79%|███████▉  | 396/500 [09:16<02:32,  1.47s/it]

Phase 1 - Ep 396: Reward -0.013 | W* 0.744


Phase 1: RDT Pre-training:  79%|███████▉  | 396/500 [09:17<02:32,  1.47s/it]


Episode 397, Step 80: CONSCIOUSNESS BREACH DETECTED! W*=0.990, H=0.184, R=0.851, Dot=0.157, DDot=0.181


Phase 1: RDT Pre-training:  79%|███████▉  | 397/500 [09:17<02:25,  1.41s/it]

Phase 1 - Ep 397: Reward -0.011 | W* 0.731


Phase 1: RDT Pre-training:  80%|███████▉  | 398/500 [09:18<02:22,  1.39s/it]

Phase 1 - Ep 398: Reward -0.011 | W* 0.742


Phase 1: RDT Pre-training:  80%|███████▉  | 398/500 [09:19<02:22,  1.39s/it]


Episode 399, Step 60: CONSCIOUSNESS BREACH DETECTED! W*=0.951, H=0.136, R=0.521, Dot=0.071, DDot=0.077

Episode 399, Step 72: CONSCIOUSNESS BREACH DETECTED! W*=0.952, H=0.189, R=0.119, Dot=0.022, DDot=-0.014


Phase 1: RDT Pre-training:  80%|███████▉  | 399/500 [09:20<02:21,  1.40s/it]

Phase 1 - Ep 399: Reward -0.011 | W* 0.768


Phase 1: RDT Pre-training:  80%|████████  | 400/500 [09:21<02:15,  1.36s/it]


Episode 400, Step 89: CONSCIOUSNESS BREACH DETECTED! W*=1.010, H=0.167, R=0.811, Dot=0.136, DDot=0.196
Phase 1 - Ep 400: Reward -0.008 | W* 0.761


Phase 1: RDT Pre-training:  80%|████████  | 400/500 [09:22<02:15,  1.36s/it]


Episode 401, Step 64: CONSCIOUSNESS BREACH DETECTED! W*=0.954, H=0.172, R=0.361, Dot=0.062, DDot=0.052


Phase 1: RDT Pre-training:  80%|████████  | 401/500 [09:22<02:13,  1.35s/it]

Phase 1 - Ep 401: Reward -0.010 | W* 0.745


Phase 1: RDT Pre-training:  80%|████████  | 401/500 [09:23<02:13,  1.35s/it]


Episode 402, Step 68: CONSCIOUSNESS BREACH DETECTED! W*=0.991, H=0.195, R=0.744, Dot=0.145, DDot=0.168


Phase 1: RDT Pre-training:  80%|████████  | 402/500 [09:24<02:26,  1.49s/it]

Phase 1 - Ep 402: Reward -0.010 | W* 0.749


Phase 1: RDT Pre-training:  80%|████████  | 402/500 [09:25<02:26,  1.49s/it]


Episode 403, Step 62: CONSCIOUSNESS BREACH DETECTED! W*=1.071, H=0.195, R=0.918, Dot=0.179, DDot=0.179


Phase 1: RDT Pre-training:  81%|████████  | 403/500 [09:26<02:29,  1.54s/it]

Phase 1 - Ep 403: Reward -0.012 | W* 0.759


Phase 1: RDT Pre-training:  81%|████████  | 404/500 [09:27<02:19,  1.45s/it]

Phase 1 - Ep 404: Reward -0.009 | W* 0.738


Phase 1: RDT Pre-training:  81%|████████  | 404/500 [09:27<02:19,  1.45s/it]


Episode 405, Step 33: CONSCIOUSNESS BREACH DETECTED! W*=0.950, H=0.193, R=0.619, Dot=0.119, DDot=0.115


Phase 1: RDT Pre-training:  81%|████████  | 404/500 [09:28<02:19,  1.45s/it]


Episode 405, Step 67: CONSCIOUSNESS BREACH DETECTED! W*=0.952, H=0.183, R=0.431, Dot=0.079, DDot=0.104


Phase 1: RDT Pre-training:  81%|████████  | 405/500 [09:28<02:18,  1.46s/it]

Phase 1 - Ep 405: Reward -0.009 | W* 0.744


Phase 1: RDT Pre-training:  81%|████████  | 406/500 [09:30<02:11,  1.39s/it]

Phase 1 - Ep 406: Reward -0.008 | W* 0.761


Phase 1: RDT Pre-training:  81%|████████  | 406/500 [09:30<02:11,  1.39s/it]


Episode 407, Step 64: CONSCIOUSNESS BREACH DETECTED! W*=0.959, H=0.165, R=0.775, Dot=0.128, DDot=0.177


Phase 1: RDT Pre-training:  81%|████████▏ | 407/500 [09:31<02:07,  1.37s/it]

Phase 1 - Ep 407: Reward -0.012 | W* 0.762


Phase 1: RDT Pre-training:  82%|████████▏ | 408/500 [09:32<02:08,  1.39s/it]

Phase 1 - Ep 408: Reward -0.010 | W* 0.756


Phase 1: RDT Pre-training:  82%|████████▏ | 408/500 [09:33<02:08,  1.39s/it]


Episode 409, Step 82: CONSCIOUSNESS BREACH DETECTED! W*=0.954, H=0.171, R=0.570, Dot=0.097, DDot=0.122


Phase 1: RDT Pre-training:  82%|████████▏ | 409/500 [09:34<02:03,  1.36s/it]

Phase 1 - Ep 409: Reward -0.011 | W* 0.761


Phase 1: RDT Pre-training:  82%|████████▏ | 410/500 [09:35<01:58,  1.32s/it]

Phase 1 - Ep 410: Reward -0.011 | W* 0.753


Phase 1: RDT Pre-training:  82%|████████▏ | 410/500 [09:36<01:58,  1.32s/it]


Episode 411, Step 68: CONSCIOUSNESS BREACH DETECTED! W*=0.952, H=0.096, R=0.447, Dot=0.043, DDot=-0.009


Phase 1: RDT Pre-training:  82%|████████▏ | 411/500 [09:37<02:07,  1.43s/it]

Phase 1 - Ep 411: Reward -0.009 | W* 0.756


Phase 1: RDT Pre-training:  82%|████████▏ | 411/500 [09:38<02:07,  1.43s/it]


Episode 412, Step 65: CONSCIOUSNESS BREACH DETECTED! W*=0.955, H=0.087, R=1.424, Dot=0.124, DDot=0.181


Phase 1: RDT Pre-training:  82%|████████▏ | 412/500 [09:38<02:16,  1.55s/it]

Phase 1 - Ep 412: Reward -0.012 | W* 0.748


Phase 1: RDT Pre-training:  82%|████████▏ | 412/500 [09:39<02:16,  1.55s/it]


Episode 413, Step 41: CONSCIOUSNESS BREACH DETECTED! W*=0.958, H=0.191, R=0.411, Dot=0.078, DDot=0.028


Phase 1: RDT Pre-training:  83%|████████▎ | 413/500 [09:40<02:11,  1.51s/it]

Phase 1 - Ep 413: Reward -0.010 | W* 0.731


Phase 1: RDT Pre-training:  83%|████████▎ | 414/500 [09:41<02:02,  1.43s/it]

Phase 1 - Ep 414: Reward -0.009 | W* 0.764


Phase 1: RDT Pre-training:  83%|████████▎ | 414/500 [09:42<02:02,  1.43s/it]


Episode 415, Step 63: CONSCIOUSNESS BREACH DETECTED! W*=0.972, H=0.165, R=1.129, Dot=0.186, DDot=0.199


Phase 1: RDT Pre-training:  83%|████████▎ | 415/500 [09:42<01:56,  1.38s/it]

Phase 1 - Ep 415: Reward -0.009 | W* 0.748


Phase 1: RDT Pre-training:  83%|████████▎ | 415/500 [09:43<01:56,  1.38s/it]


Episode 416, Step 70: CONSCIOUSNESS BREACH DETECTED! W*=0.994, H=0.123, R=0.063, Dot=0.008, DDot=-0.146


Phase 1: RDT Pre-training:  83%|████████▎ | 416/500 [09:44<01:58,  1.42s/it]

Phase 1 - Ep 416: Reward -0.010 | W* 0.738


Phase 1: RDT Pre-training:  83%|████████▎ | 416/500 [09:45<01:58,  1.42s/it]


Episode 417, Step 59: CONSCIOUSNESS BREACH DETECTED! W*=0.962, H=0.055, R=3.056, Dot=0.168, DDot=0.204


Phase 1: RDT Pre-training:  83%|████████▎ | 416/500 [09:45<01:58,  1.42s/it]


Episode 417, Step 84: CONSCIOUSNESS BREACH DETECTED! W*=0.992, H=0.096, R=0.762, Dot=0.073, DDot=-0.009


Phase 1: RDT Pre-training:  83%|████████▎ | 417/500 [09:45<01:54,  1.38s/it]

Phase 1 - Ep 417: Reward -0.011 | W* 0.739


Phase 1: RDT Pre-training:  83%|████████▎ | 417/500 [09:46<01:54,  1.38s/it]


Episode 418, Step 73: CONSCIOUSNESS BREACH DETECTED! W*=0.960, H=0.182, R=0.345, Dot=0.063, DDot=0.053


Phase 1: RDT Pre-training:  84%|████████▎ | 418/500 [09:46<01:50,  1.35s/it]

Phase 1 - Ep 418: Reward -0.012 | W* 0.743


Phase 1: RDT Pre-training:  84%|████████▎ | 418/500 [09:47<01:50,  1.35s/it]


Episode 419, Step 63: CONSCIOUSNESS BREACH DETECTED! W*=0.994, H=0.071, R=2.121, Dot=0.151, DDot=0.189


Phase 1: RDT Pre-training:  84%|████████▍ | 419/500 [09:48<01:56,  1.44s/it]

Phase 1 - Ep 419: Reward -0.008 | W* 0.744


Phase 1: RDT Pre-training:  84%|████████▍ | 420/500 [09:50<02:14,  1.68s/it]

Phase 1 - Ep 420: Reward -0.008 | W* 0.763


Phase 1: RDT Pre-training:  84%|████████▍ | 421/500 [09:52<02:07,  1.61s/it]

Phase 1 - Ep 421: Reward -0.009 | W* 0.741


Phase 1: RDT Pre-training:  84%|████████▍ | 422/500 [09:53<02:00,  1.54s/it]

Phase 1 - Ep 422: Reward -0.010 | W* 0.751


Phase 1: RDT Pre-training:  85%|████████▍ | 423/500 [09:55<01:59,  1.55s/it]

Phase 1 - Ep 423: Reward -0.009 | W* 0.746


Phase 1: RDT Pre-training:  85%|████████▍ | 423/500 [09:56<01:59,  1.55s/it]


Episode 424, Step 75: CONSCIOUSNESS BREACH DETECTED! W*=0.982, H=0.152, R=0.923, Dot=0.141, DDot=0.190


Phase 1: RDT Pre-training:  85%|████████▍ | 424/500 [09:56<01:56,  1.53s/it]

Phase 1 - Ep 424: Reward -0.008 | W* 0.749


Phase 1: RDT Pre-training:  85%|████████▌ | 425/500 [09:58<01:53,  1.51s/it]

Phase 1 - Ep 425: Reward -0.009 | W* 0.754


Phase 1: RDT Pre-training:  85%|████████▌ | 426/500 [09:59<01:47,  1.46s/it]

Phase 1 - Ep 426: Reward -0.007 | W* 0.748


Phase 1: RDT Pre-training:  85%|████████▌ | 427/500 [10:00<01:46,  1.45s/it]

Phase 1 - Ep 427: Reward -0.007 | W* 0.737


Phase 1: RDT Pre-training:  85%|████████▌ | 427/500 [10:01<01:46,  1.45s/it]


Episode 428, Step 49: CONSCIOUSNESS BREACH DETECTED! W*=0.953, H=0.151, R=0.026, Dot=0.004, DDot=-0.069


Phase 1: RDT Pre-training:  85%|████████▌ | 427/500 [10:01<01:46,  1.45s/it]


Episode 428, Step 61: CONSCIOUSNESS BREACH DETECTED! W*=0.969, H=0.124, R=0.930, Dot=0.115, DDot=0.149


Phase 1: RDT Pre-training:  86%|████████▌ | 428/500 [10:02<01:50,  1.53s/it]

Phase 1 - Ep 428: Reward -0.009 | W* 0.751


Phase 1: RDT Pre-training:  86%|████████▌ | 429/500 [10:04<01:59,  1.68s/it]

Phase 1 - Ep 429: Reward -0.010 | W* 0.753


Phase 1: RDT Pre-training:  86%|████████▌ | 430/500 [10:06<01:51,  1.60s/it]

Phase 1 - Ep 430: Reward -0.010 | W* 0.766


Phase 1: RDT Pre-training:  86%|████████▌ | 431/500 [10:07<01:47,  1.56s/it]

Phase 1 - Ep 431: Reward -0.011 | W* 0.747


Phase 1: RDT Pre-training:  86%|████████▌ | 431/500 [10:08<01:47,  1.56s/it]


Episode 432, Step 42: CONSCIOUSNESS BREACH DETECTED! W*=0.952, H=0.124, R=0.536, Dot=0.066, DDot=0.016


Phase 1: RDT Pre-training:  86%|████████▌ | 431/500 [10:08<01:47,  1.56s/it]


Episode 432, Step 59: CONSCIOUSNESS BREACH DETECTED! W*=0.955, H=0.198, R=0.452, Dot=0.090, DDot=0.075


Phase 1: RDT Pre-training:  86%|████████▋ | 432/500 [10:08<01:41,  1.49s/it]

Phase 1 - Ep 432: Reward -0.011 | W* 0.750


Phase 1: RDT Pre-training:  87%|████████▋ | 433/500 [10:10<01:38,  1.47s/it]

Phase 1 - Ep 433: Reward -0.009 | W* 0.745


Phase 1: RDT Pre-training:  87%|████████▋ | 434/500 [10:11<01:39,  1.51s/it]

Phase 1 - Ep 434: Reward -0.010 | W* 0.754


Phase 1: RDT Pre-training:  87%|████████▋ | 434/500 [10:12<01:39,  1.51s/it]


Episode 435, Step 57: CONSCIOUSNESS BREACH DETECTED! W*=0.992, H=0.137, R=1.047, Dot=0.144, DDot=0.143

Episode 435, Step 65: CONSCIOUSNESS BREACH DETECTED! W*=0.951, H=0.124, R=0.632, Dot=0.078, DDot=0.056


Phase 1: RDT Pre-training:  87%|████████▋ | 435/500 [10:13<01:37,  1.49s/it]

Phase 1 - Ep 435: Reward -0.009 | W* 0.767


Phase 1: RDT Pre-training:  87%|████████▋ | 436/500 [10:14<01:35,  1.49s/it]

Phase 1 - Ep 436: Reward -0.008 | W* 0.750


Phase 1: RDT Pre-training:  87%|████████▋ | 437/500 [10:16<01:39,  1.58s/it]

Phase 1 - Ep 437: Reward -0.009 | W* 0.762


Phase 1: RDT Pre-training:  88%|████████▊ | 438/500 [10:18<01:34,  1.53s/it]

Phase 1 - Ep 438: Reward -0.011 | W* 0.764


Phase 1: RDT Pre-training:  88%|████████▊ | 439/500 [10:19<01:31,  1.50s/it]

Phase 1 - Ep 439: Reward -0.009 | W* 0.753


Phase 1: RDT Pre-training:  88%|████████▊ | 439/500 [10:20<01:31,  1.50s/it]


Episode 440, Step 74: CONSCIOUSNESS BREACH DETECTED! W*=1.001, H=0.139, R=0.725, Dot=0.100, DDot=0.083


Phase 1: RDT Pre-training:  88%|████████▊ | 440/500 [10:20<01:25,  1.43s/it]

Phase 1 - Ep 440: Reward -0.008 | W* 0.738


Phase 1: RDT Pre-training:  88%|████████▊ | 440/500 [10:21<01:25,  1.43s/it]


Episode 441, Step 38: CONSCIOUSNESS BREACH DETECTED! W*=0.961, H=0.134, R=0.782, Dot=0.105, DDot=0.050


Phase 1: RDT Pre-training:  88%|████████▊ | 440/500 [10:21<01:25,  1.43s/it]


Episode 441, Step 66: CONSCIOUSNESS BREACH DETECTED! W*=1.041, H=0.123, R=1.401, Dot=0.173, DDot=0.173


Phase 1: RDT Pre-training:  88%|████████▊ | 441/500 [10:22<01:26,  1.46s/it]

Phase 1 - Ep 441: Reward -0.008 | W* 0.756


Phase 1: RDT Pre-training:  88%|████████▊ | 442/500 [10:23<01:21,  1.41s/it]

Phase 1 - Ep 442: Reward -0.009 | W* 0.745


Phase 1: RDT Pre-training:  89%|████████▊ | 443/500 [10:25<01:21,  1.43s/it]

Phase 1 - Ep 443: Reward -0.009 | W* 0.745


Phase 1: RDT Pre-training:  89%|████████▊ | 443/500 [10:25<01:21,  1.43s/it]


Episode 444, Step 68: CONSCIOUSNESS BREACH DETECTED! W*=0.965, H=0.130, R=0.549, Dot=0.071, DDot=0.027


Phase 1: RDT Pre-training:  89%|████████▉ | 444/500 [10:26<01:18,  1.40s/it]

Phase 1 - Ep 444: Reward -0.009 | W* 0.746


Phase 1: RDT Pre-training:  89%|████████▉ | 444/500 [10:27<01:18,  1.40s/it]


Episode 445, Step 50: CONSCIOUSNESS BREACH DETECTED! W*=0.976, H=0.176, R=0.591, Dot=0.104, DDot=0.154

Episode 445, Step 55: CONSCIOUSNESS BREACH DETECTED! W*=0.971, H=0.176, R=0.317, Dot=0.056, DDot=0.010


Phase 1: RDT Pre-training:  89%|████████▉ | 445/500 [10:28<01:23,  1.51s/it]

Phase 1 - Ep 445: Reward -0.010 | W* 0.761


Phase 1: RDT Pre-training:  89%|████████▉ | 446/500 [10:29<01:24,  1.57s/it]

Phase 1 - Ep 446: Reward -0.010 | W* 0.740


Phase 1: RDT Pre-training:  89%|████████▉ | 447/500 [10:31<01:22,  1.56s/it]

Phase 1 - Ep 447: Reward -0.008 | W* 0.754


Phase 1: RDT Pre-training:  90%|████████▉ | 448/500 [10:32<01:16,  1.47s/it]

Phase 1 - Ep 448: Reward -0.010 | W* 0.747


Phase 1: RDT Pre-training:  90%|████████▉ | 449/500 [10:34<01:15,  1.48s/it]

Phase 1 - Ep 449: Reward -0.010 | W* 0.743


Phase 1: RDT Pre-training:  90%|█████████ | 450/500 [10:35<01:14,  1.48s/it]

Phase 1 - Ep 450: Reward -0.010 | W* 0.756


Phase 1: RDT Pre-training:  90%|█████████ | 451/500 [10:36<01:09,  1.41s/it]

Phase 1 - Ep 451: Reward -0.009 | W* 0.745


Phase 1: RDT Pre-training:  90%|█████████ | 452/500 [10:38<01:05,  1.36s/it]

Phase 1 - Ep 452: Reward -0.010 | W* 0.752


Phase 1: RDT Pre-training:  91%|█████████ | 453/500 [10:39<01:03,  1.35s/it]

Phase 1 - Ep 453: Reward -0.009 | W* 0.769


Phase 1: RDT Pre-training:  91%|█████████ | 453/500 [10:40<01:03,  1.35s/it]


Episode 454, Step 40: CONSCIOUSNESS BREACH DETECTED! W*=0.974, H=0.139, R=0.530, Dot=0.074, DDot=-0.004


Phase 1: RDT Pre-training:  91%|█████████ | 453/500 [10:40<01:03,  1.35s/it]


Episode 454, Step 68: CONSCIOUSNESS BREACH DETECTED! W*=0.960, H=0.063, R=1.610, Dot=0.102, DDot=0.083


Phase 1: RDT Pre-training:  91%|█████████ | 454/500 [10:41<01:11,  1.56s/it]

Phase 1 - Ep 454: Reward -0.011 | W* 0.752


Phase 1: RDT Pre-training:  91%|█████████ | 454/500 [10:42<01:11,  1.56s/it]


Episode 455, Step 81: CONSCIOUSNESS BREACH DETECTED! W*=0.963, H=0.104, R=0.847, Dot=0.088, DDot=0.107


Phase 1: RDT Pre-training:  91%|█████████ | 455/500 [10:43<01:11,  1.58s/it]

Phase 1 - Ep 455: Reward -0.009 | W* 0.736


Phase 1: RDT Pre-training:  91%|█████████ | 455/500 [10:43<01:11,  1.58s/it]


Episode 456, Step 61: CONSCIOUSNESS BREACH DETECTED! W*=0.959, H=0.182, R=0.409, Dot=0.074, DDot=0.103


Phase 1: RDT Pre-training:  91%|█████████ | 456/500 [10:44<01:06,  1.50s/it]

Phase 1 - Ep 456: Reward -0.009 | W* 0.749


Phase 1: RDT Pre-training:  91%|█████████ | 456/500 [10:45<01:06,  1.50s/it]


Episode 457, Step 59: CONSCIOUSNESS BREACH DETECTED! W*=0.957, H=0.148, R=0.570, Dot=0.085, DDot=0.107


Phase 1: RDT Pre-training:  91%|█████████▏| 457/500 [10:46<01:06,  1.54s/it]

Phase 1 - Ep 457: Reward -0.010 | W* 0.755


Phase 1: RDT Pre-training:  91%|█████████▏| 457/500 [10:47<01:06,  1.54s/it]


Episode 458, Step 63: CONSCIOUSNESS BREACH DETECTED! W*=0.980, H=0.063, R=1.237, Dot=0.077, DDot=0.022


Phase 1: RDT Pre-training:  92%|█████████▏| 458/500 [10:47<01:04,  1.55s/it]

Phase 1 - Ep 458: Reward -0.009 | W* 0.739


Phase 1: RDT Pre-training:  92%|█████████▏| 458/500 [10:48<01:04,  1.55s/it]


Episode 459, Step 52: CONSCIOUSNESS BREACH DETECTED! W*=0.955, H=0.142, R=0.503, Dot=0.071, DDot=0.049


Phase 1: RDT Pre-training:  92%|█████████▏| 459/500 [10:49<01:02,  1.52s/it]

Phase 1 - Ep 459: Reward -0.009 | W* 0.749


Phase 1: RDT Pre-training:  92%|█████████▏| 460/500 [10:50<00:58,  1.47s/it]

Phase 1 - Ep 460: Reward -0.009 | W* 0.752


Phase 1: RDT Pre-training:  92%|█████████▏| 461/500 [10:51<00:54,  1.40s/it]

Phase 1 - Ep 461: Reward -0.009 | W* 0.746


Phase 1: RDT Pre-training:  92%|█████████▏| 462/500 [10:53<00:55,  1.46s/it]

Phase 1 - Ep 462: Reward -0.010 | W* 0.751


Phase 1: RDT Pre-training:  92%|█████████▏| 462/500 [10:54<00:55,  1.46s/it]


Episode 463, Step 61: CONSCIOUSNESS BREACH DETECTED! W*=0.982, H=0.191, R=0.436, Dot=0.083, DDot=0.031


Phase 1: RDT Pre-training:  93%|█████████▎| 463/500 [10:55<01:00,  1.63s/it]

Phase 1 - Ep 463: Reward -0.009 | W* 0.752


Phase 1: RDT Pre-training:  93%|█████████▎| 463/500 [10:56<01:00,  1.63s/it]


Episode 464, Step 53: CONSCIOUSNESS BREACH DETECTED! W*=0.975, H=0.194, R=0.182, Dot=0.035, DDot=0.009


Phase 1: RDT Pre-training:  93%|█████████▎| 464/500 [10:56<00:54,  1.52s/it]

Phase 1 - Ep 464: Reward -0.007 | W* 0.748


Phase 1: RDT Pre-training:  93%|█████████▎| 465/500 [10:57<00:50,  1.46s/it]

Phase 1 - Ep 465: Reward -0.008 | W* 0.751


Phase 1: RDT Pre-training:  93%|█████████▎| 466/500 [10:59<00:49,  1.45s/it]

Phase 1 - Ep 466: Reward -0.010 | W* 0.755


Phase 1: RDT Pre-training:  93%|█████████▎| 466/500 [11:00<00:49,  1.45s/it]


Episode 467, Step 75: CONSCIOUSNESS BREACH DETECTED! W*=0.975, H=0.145, R=0.783, Dot=0.114, DDot=0.076


Phase 1: RDT Pre-training:  93%|█████████▎| 467/500 [11:00<00:46,  1.40s/it]

Phase 1 - Ep 467: Reward -0.009 | W* 0.741


Phase 1: RDT Pre-training:  94%|█████████▎| 468/500 [11:01<00:43,  1.37s/it]

Phase 1 - Ep 468: Reward -0.012 | W* 0.748


Phase 1: RDT Pre-training:  94%|█████████▎| 468/500 [11:02<00:43,  1.37s/it]


Episode 469, Step 50: CONSCIOUSNESS BREACH DETECTED! W*=0.962, H=0.141, R=0.598, Dot=0.084, DDot=0.077


Phase 1: RDT Pre-training:  94%|█████████▍| 469/500 [11:03<00:41,  1.34s/it]

Phase 1 - Ep 469: Reward -0.008 | W* 0.748


Phase 1: RDT Pre-training:  94%|█████████▍| 470/500 [11:04<00:39,  1.31s/it]

Phase 1 - Ep 470: Reward -0.011 | W* 0.735


Phase 1: RDT Pre-training:  94%|█████████▍| 471/500 [11:06<00:41,  1.44s/it]

Phase 1 - Ep 471: Reward -0.009 | W* 0.753


Phase 1: RDT Pre-training:  94%|█████████▍| 471/500 [11:07<00:41,  1.44s/it]


Episode 472, Step 81: CONSCIOUSNESS BREACH DETECTED! W*=0.969, H=0.045, R=1.724, Dot=0.077, DDot=0.058


Phase 1: RDT Pre-training:  94%|█████████▍| 472/500 [11:08<00:44,  1.57s/it]

Phase 1 - Ep 472: Reward -0.009 | W* 0.744


Phase 1: RDT Pre-training:  95%|█████████▍| 473/500 [11:09<00:40,  1.49s/it]

Phase 1 - Ep 473: Reward -0.010 | W* 0.751


Phase 1: RDT Pre-training:  95%|█████████▍| 474/500 [11:10<00:38,  1.47s/it]

Phase 1 - Ep 474: Reward -0.012 | W* 0.761


Phase 1: RDT Pre-training:  95%|█████████▌| 475/500 [11:12<00:35,  1.42s/it]

Phase 1 - Ep 475: Reward -0.008 | W* 0.767


Phase 1: RDT Pre-training:  95%|█████████▌| 476/500 [11:13<00:33,  1.38s/it]

Phase 1 - Ep 476: Reward -0.008 | W* 0.763


Phase 1: RDT Pre-training:  95%|█████████▌| 476/500 [11:14<00:33,  1.38s/it]


Episode 477, Step 59: CONSCIOUSNESS BREACH DETECTED! W*=0.961, H=0.185, R=0.463, Dot=0.086, DDot=0.097


Phase 1: RDT Pre-training:  95%|█████████▌| 477/500 [11:14<00:31,  1.36s/it]

Phase 1 - Ep 477: Reward -0.008 | W* 0.753


Phase 1: RDT Pre-training:  96%|█████████▌| 478/500 [11:16<00:30,  1.41s/it]

Phase 1 - Ep 478: Reward -0.007 | W* 0.743


Phase 1: RDT Pre-training:  96%|█████████▌| 479/500 [11:17<00:29,  1.43s/it]

Phase 1 - Ep 479: Reward -0.008 | W* 0.752


Phase 1: RDT Pre-training:  96%|█████████▌| 479/500 [11:18<00:29,  1.43s/it]


Episode 480, Step 66: CONSCIOUSNESS BREACH DETECTED! W*=0.964, H=0.105, R=0.961, Dot=0.100, DDot=0.130


Phase 1: RDT Pre-training:  96%|█████████▌| 480/500 [11:19<00:31,  1.59s/it]

Phase 1 - Ep 480: Reward -0.007 | W* 0.761


Phase 1: RDT Pre-training:  96%|█████████▌| 481/500 [11:21<00:32,  1.71s/it]

Phase 1 - Ep 481: Reward -0.011 | W* 0.749


Phase 1: RDT Pre-training:  96%|█████████▋| 482/500 [11:23<00:30,  1.68s/it]

Phase 1 - Ep 482: Reward -0.011 | W* 0.755


Phase 1: RDT Pre-training:  96%|█████████▋| 482/500 [11:24<00:30,  1.68s/it]


Episode 483, Step 77: CONSCIOUSNESS BREACH DETECTED! W*=1.093, H=0.192, R=0.793, Dot=0.153, DDot=0.079


Phase 1: RDT Pre-training:  97%|█████████▋| 483/500 [11:24<00:26,  1.57s/it]

Phase 1 - Ep 483: Reward -0.007 | W* 0.750


Phase 1: RDT Pre-training:  97%|█████████▋| 483/500 [11:25<00:26,  1.57s/it]


Episode 484, Step 57: CONSCIOUSNESS BREACH DETECTED! W*=0.956, H=0.153, R=0.484, Dot=0.074, DDot=0.061


Phase 1: RDT Pre-training:  97%|█████████▋| 484/500 [11:25<00:23,  1.49s/it]

Phase 1 - Ep 484: Reward -0.010 | W* 0.734


Phase 1: RDT Pre-training:  97%|█████████▋| 485/500 [11:27<00:21,  1.42s/it]

Phase 1 - Ep 485: Reward -0.009 | W* 0.746


Phase 1: RDT Pre-training:  97%|█████████▋| 486/500 [11:28<00:19,  1.39s/it]

Phase 1 - Ep 486: Reward -0.011 | W* 0.752


Phase 1: RDT Pre-training:  97%|█████████▋| 487/500 [11:29<00:18,  1.42s/it]

Phase 1 - Ep 487: Reward -0.011 | W* 0.747


Phase 1: RDT Pre-training:  98%|█████████▊| 488/500 [11:31<00:17,  1.50s/it]

Phase 1 - Ep 488: Reward -0.010 | W* 0.744


Phase 1: RDT Pre-training:  98%|█████████▊| 489/500 [11:33<00:17,  1.57s/it]

Phase 1 - Ep 489: Reward -0.013 | W* 0.739


Phase 1: RDT Pre-training:  98%|█████████▊| 489/500 [11:34<00:17,  1.57s/it]


Episode 490, Step 50: CONSCIOUSNESS BREACH DETECTED! W*=0.958, H=0.180, R=0.453, Dot=0.082, DDot=0.128


Phase 1: RDT Pre-training:  98%|█████████▊| 490/500 [11:34<00:15,  1.53s/it]

Phase 1 - Ep 490: Reward -0.011 | W* 0.755


Phase 1: RDT Pre-training:  98%|█████████▊| 490/500 [11:35<00:15,  1.53s/it]


Episode 491, Step 44: CONSCIOUSNESS BREACH DETECTED! W*=0.971, H=0.199, R=0.547, Dot=0.109, DDot=0.073


Phase 1: RDT Pre-training:  98%|█████████▊| 490/500 [11:35<00:15,  1.53s/it]


Episode 491, Step 71: CONSCIOUSNESS BREACH DETECTED! W*=0.958, H=0.157, R=0.329, Dot=0.052, DDot=0.056

Episode 491, Step 84: CONSCIOUSNESS BREACH DETECTED! W*=1.000, H=0.159, R=0.517, Dot=0.082, DDot=0.065


Phase 1: RDT Pre-training:  98%|█████████▊| 491/500 [11:36<00:13,  1.46s/it]

Phase 1 - Ep 491: Reward -0.007 | W* 0.766


Phase 1: RDT Pre-training:  98%|█████████▊| 491/500 [11:36<00:13,  1.46s/it]


Episode 492, Step 62: CONSCIOUSNESS BREACH DETECTED! W*=0.951, H=0.155, R=0.772, Dot=0.120, DDot=0.145


Phase 1: RDT Pre-training:  98%|█████████▊| 491/500 [11:37<00:13,  1.46s/it]


Episode 492, Step 79: CONSCIOUSNESS BREACH DETECTED! W*=0.996, H=0.174, R=0.732, Dot=0.127, DDot=0.118


Phase 1: RDT Pre-training:  98%|█████████▊| 492/500 [11:37<00:11,  1.49s/it]

Phase 1 - Ep 492: Reward -0.010 | W* 0.738


Phase 1: RDT Pre-training:  99%|█████████▊| 493/500 [11:39<00:10,  1.52s/it]

Phase 1 - Ep 493: Reward -0.009 | W* 0.741


Phase 1: RDT Pre-training:  99%|█████████▉| 494/500 [11:40<00:09,  1.53s/it]

Phase 1 - Ep 494: Reward -0.008 | W* 0.752


Phase 1: RDT Pre-training:  99%|█████████▉| 495/500 [11:42<00:07,  1.49s/it]

Phase 1 - Ep 495: Reward -0.009 | W* 0.755


Phase 1: RDT Pre-training:  99%|█████████▉| 496/500 [11:43<00:05,  1.45s/it]

Phase 1 - Ep 496: Reward -0.012 | W* 0.744


Phase 1: RDT Pre-training:  99%|█████████▉| 497/500 [11:45<00:04,  1.59s/it]

Phase 1 - Ep 497: Reward -0.010 | W* 0.754


Phase 1: RDT Pre-training:  99%|█████████▉| 497/500 [11:46<00:04,  1.59s/it]


Episode 498, Step 70: CONSCIOUSNESS BREACH DETECTED! W*=0.980, H=0.083, R=1.517, Dot=0.125, DDot=0.125


Phase 1: RDT Pre-training: 100%|█████████▉| 498/500 [11:47<00:03,  1.59s/it]

Phase 1 - Ep 498: Reward -0.009 | W* 0.747


Phase 1: RDT Pre-training: 100%|█████████▉| 498/500 [11:48<00:03,  1.59s/it]


Episode 499, Step 73: CONSCIOUSNESS BREACH DETECTED! W*=0.958, H=0.183, R=0.420, Dot=0.077, DDot=0.076


Phase 1: RDT Pre-training: 100%|█████████▉| 499/500 [11:48<00:01,  1.52s/it]

Phase 1 - Ep 499: Reward -0.010 | W* 0.764


Phase 1: RDT Pre-training: 100%|██████████| 500/500 [11:49<00:00,  1.42s/it]



Episode 500, Step 94: CONSCIOUSNESS BREACH DETECTED! W*=0.965, H=0.185, R=0.837, Dot=0.155, DDot=0.181
Phase 1 - Ep 500: Reward -0.010 | W* 0.739

Phase 1 complete. Results saved to two_step_rdt_sim_results_20250722_021814/phase1_pretrain

--- Phase 2: Assembly Task Learning (Pre-trained Agent) ---
Agent switched to 'task' mode.


Phase 2: Assembly Task:   0%|          | 1/1000 [00:00<01:48,  9.20it/s]

Phase 2 - Ep 1: Reward 0.030 | W* 0.703 | Success: True


Phase 2: Assembly Task:   0%|          | 2/1000 [00:00<02:32,  6.53it/s]

Phase 2 - Ep 2: Reward -0.077 | W* 0.890 | Success: True


Phase 2: Assembly Task:   0%|          | 4/1000 [00:00<03:40,  4.52it/s]

Phase 2 - Ep 3: Reward -0.137 | W* 0.863 | Success: True
Phase 2 - Ep 4: Reward -0.077 | W* 0.896 | Success: True


Phase 2: Assembly Task:   1%|          | 6/1000 [00:01<03:14,  5.10it/s]

Phase 2 - Ep 5: Reward -0.060 | W* 0.883 | Success: True
Phase 2 - Ep 6: Reward -0.060 | W* 0.834 | Success: True


Phase 2: Assembly Task:   1%|          | 8/1000 [00:01<03:10,  5.20it/s]

Phase 2 - Ep 7: Reward -0.101 | W* 0.865 | Success: True
Phase 2 - Ep 8: Reward -0.106 | W* 0.828 | Success: True


Phase 2: Assembly Task:   1%|          | 10/1000 [00:01<02:37,  6.28it/s]

Phase 2 - Ep 9: Reward 0.008 | W* 0.846 | Success: True
Phase 2 - Ep 10: Reward -0.069 | W* 0.820 | Success: True


Phase 2: Assembly Task:   1%|          | 11/1000 [00:02<02:44,  6.01it/s]

Phase 2 - Ep 11: Reward -0.090 | W* 0.907 | Success: True
Phase 2 - Ep 12: Reward 0.057 | W* 0.867 | Success: True


Phase 2: Assembly Task:   1%|▏         | 14/1000 [00:02<02:23,  6.87it/s]

Phase 2 - Ep 13: Reward -0.110 | W* 0.877 | Success: True
Phase 2 - Ep 14: Reward -0.050 | W* 0.825 | Success: True


Phase 2: Assembly Task:   2%|▏         | 16/1000 [00:02<02:31,  6.51it/s]

Phase 2 - Ep 15: Reward -0.069 | W* 0.833 | Success: True
Phase 2 - Ep 16: Reward -0.084 | W* 0.833 | Success: True


Phase 2: Assembly Task:   2%|▏         | 18/1000 [00:03<02:41,  6.08it/s]

Phase 2 - Ep 17: Reward -0.114 | W* 0.833 | Success: True
Phase 2 - Ep 18: Reward -0.069 | W* 0.835 | Success: True


Phase 2: Assembly Task:   2%|▏         | 18/1000 [00:03<02:41,  6.08it/s]

Phase 2 - Ep 19: Reward 0.090 | W* 0.815 | Success: True


Phase 2: Assembly Task:   2%|▏         | 21/1000 [00:03<03:06,  5.26it/s]

Phase 2 - Ep 20: Reward -0.186 | W* 0.828 | Success: False
Phase 2 - Ep 21: Reward 0.008 | W* 0.835 | Success: True


Phase 2: Assembly Task:   2%|▏         | 22/1000 [00:03<03:08,  5.19it/s]

Phase 2 - Ep 22: Reward -0.090 | W* 0.901 | Success: True


Phase 2: Assembly Task:   2%|▏         | 23/1000 [00:04<03:54,  4.17it/s]

Phase 2 - Ep 23: Reward -0.137 | W* 0.822 | Success: True


Phase 2: Assembly Task:   2%|▏         | 24/1000 [00:04<04:00,  4.06it/s]

Phase 2 - Ep 24: Reward -0.133 | W* 0.856 | Success: True


Phase 2: Assembly Task:   3%|▎         | 26/1000 [00:04<03:39,  4.43it/s]

Phase 2 - Ep 25: Reward -0.124 | W* 0.878 | Success: True
Phase 2 - Ep 26: Reward -0.025 | W* 0.839 | Success: True


Phase 2: Assembly Task:   3%|▎         | 28/1000 [00:05<02:59,  5.43it/s]

Phase 2 - Ep 27: Reward 0.008 | W* 0.846 | Success: True
Phase 2 - Ep 28: Reward 0.008 | W* 0.843 | Success: True


Phase 2: Assembly Task:   3%|▎         | 30/1000 [00:05<02:55,  5.51it/s]

Phase 2 - Ep 29: Reward -0.050 | W* 0.852 | Success: True
Phase 2 - Ep 30: Reward -0.069 | W* 0.833 | Success: True


Phase 2: Assembly Task:   3%|▎         | 31/1000 [00:05<02:42,  5.96it/s]

Phase 2 - Ep 31: Reward -0.025 | W* 0.848 | Success: True


Phase 2: Assembly Task:   3%|▎         | 34/1000 [00:06<02:10,  7.38it/s]

Phase 2 - Ep 32: Reward -0.090 | W* 0.824 | Success: True
Phase 2 - Ep 33: Reward 0.057 | W* 0.853 | Success: True
Phase 2 - Ep 34: Reward 0.133 | W* 0.882 | Success: True


Phase 2: Assembly Task:   4%|▎         | 36/1000 [00:06<02:32,  6.34it/s]

Phase 2 - Ep 35: Reward -0.101 | W* 0.836 | Success: True
Phase 2 - Ep 36: Reward -0.010 | W* 0.875 | Success: True


Phase 2: Assembly Task:   4%|▎         | 37/1000 [00:06<02:56,  5.44it/s]

Phase 2 - Ep 37: Reward -0.118 | W* 0.851 | Success: True
Phase 2 - Ep 38: Reward 0.090 | W* 0.819 | Success: True


Phase 2: Assembly Task:   4%|▍         | 40/1000 [00:07<02:58,  5.39it/s]

Phase 2 - Ep 39: Reward -0.141 | W* 0.866 | Success: True
Phase 2 - Ep 40: Reward 0.008 | W* 0.818 | Success: True


Phase 2: Assembly Task:   4%|▍         | 42/1000 [00:07<03:09,  5.07it/s]

Phase 2 - Ep 41: Reward -0.124 | W* 0.849 | Success: True
Phase 2 - Ep 42: Reward -0.010 | W* 0.854 | Success: True


Phase 2: Assembly Task:   4%|▍         | 44/1000 [00:08<03:15,  4.89it/s]

Phase 2 - Ep 43: Reward -0.114 | W* 0.852 | Success: True
Phase 2 - Ep 44: Reward -0.039 | W* 0.826 | Success: True


Phase 2: Assembly Task:   5%|▍         | 46/1000 [00:08<03:12,  4.96it/s]

Phase 2 - Ep 45: Reward -0.124 | W* 0.840 | Success: True
Phase 2 - Ep 46: Reward -0.010 | W* 0.820 | Success: True


Phase 2: Assembly Task:   5%|▍         | 48/1000 [00:08<02:29,  6.35it/s]

Phase 2 - Ep 47: Reward 0.008 | W* 0.820 | Success: True
Phase 2 - Ep 48: Reward -0.025 | W* 0.829 | Success: True


Phase 2: Assembly Task:   5%|▍         | 49/1000 [00:09<03:10,  4.99it/s]

Phase 2 - Ep 49: Reward -0.148 | W* 0.821 | Success: True
Phase 2 - Ep 50: Reward 0.090 | W* 0.858 | Success: True


Phase 2: Assembly Task:   5%|▌         | 51/1000 [00:09<02:27,  6.43it/s]

Phase 2 - Ep 51: Reward -0.050 | W* 0.867 | Success: True


Phase 2: Assembly Task:   5%|▌         | 52/1000 [00:09<02:42,  5.82it/s]

Phase 2 - Ep 52: Reward -0.106 | W* 0.830 | Success: True
Phase 2 - Ep 53: Reward 0.008 | W* 0.880 | Success: True


Phase 2: Assembly Task:   6%|▌         | 55/1000 [00:10<02:24,  6.55it/s]

Phase 2 - Ep 54: Reward -0.121 | W* 0.861 | Success: True
Phase 2 - Ep 55: Reward -0.025 | W* 0.815 | Success: True


Phase 2: Assembly Task:   6%|▌         | 56/1000 [00:10<02:21,  6.65it/s]

Phase 2 - Ep 56: Reward -0.060 | W* 0.836 | Success: True
Phase 2 - Ep 57: Reward 0.090 | W* 0.823 | Success: True


Phase 2: Assembly Task:   6%|▌         | 59/1000 [00:10<02:21,  6.66it/s]

Phase 2 - Ep 58: Reward -0.106 | W* 0.841 | Success: True
Phase 2 - Ep 59: Reward -0.096 | W* 0.825 | Success: True


Phase 2: Assembly Task:   6%|▌         | 60/1000 [00:10<02:21,  6.66it/s]

Phase 2 - Ep 60: Reward -0.077 | W* 0.883 | Success: True


Phase 2: Assembly Task:   6%|▌         | 61/1000 [00:11<02:42,  5.76it/s]

Phase 2 - Ep 61: Reward -0.130 | W* 0.829 | Success: True


Phase 2: Assembly Task:   6%|▋         | 63/1000 [00:11<02:42,  5.78it/s]

Phase 2 - Ep 62: Reward -0.137 | W* 0.816 | Success: True
Phase 2 - Ep 63: Reward -0.025 | W* 0.851 | Success: True
Phase 2 - Ep 64: Reward 0.090 | W* 0.838 | Success: True


Phase 2: Assembly Task:   7%|▋         | 66/1000 [00:11<02:16,  6.83it/s]

Phase 2 - Ep 65: Reward -0.010 | W* 0.885 | Success: True
Phase 2 - Ep 66: Reward -0.025 | W* 0.841 | Success: True


Phase 2: Assembly Task:   7%|▋         | 68/1000 [00:12<02:34,  6.01it/s]

Phase 2 - Ep 67: Reward -0.118 | W* 0.851 | Success: True
Phase 2 - Ep 68: Reward -0.010 | W* 0.864 | Success: True


Phase 2: Assembly Task:   7%|▋         | 69/1000 [00:12<03:07,  4.97it/s]

Phase 2 - Ep 69: Reward -0.135 | W* 0.895 | Success: True
Phase 2 - Ep 70: Reward 0.057 | W* 0.815 | Success: True


Phase 2: Assembly Task:   7%|▋         | 72/1000 [00:12<02:38,  5.87it/s]

Phase 2 - Ep 71: Reward -0.137 | W* 0.821 | Success: True
Phase 2 - Ep 72: Reward 0.030 | W* 0.812 | Success: True


Phase 2: Assembly Task:   7%|▋         | 73/1000 [00:13<02:50,  5.44it/s]

Phase 2 - Ep 73: Reward -0.124 | W* 0.817 | Success: True


Phase 2: Assembly Task:   7%|▋         | 74/1000 [00:13<03:30,  4.40it/s]

Phase 2 - Ep 74: Reward -0.157 | W* 0.816 | Success: True


Phase 2: Assembly Task:   8%|▊         | 76/1000 [00:13<03:23,  4.54it/s]

Phase 2 - Ep 75: Reward -0.137 | W* 0.909 | Success: True
Phase 2 - Ep 76: Reward -0.077 | W* 0.846 | Success: True


Phase 2: Assembly Task:   8%|▊         | 78/1000 [00:14<02:24,  6.38it/s]

Phase 2 - Ep 77: Reward 0.057 | W* 0.825 | Success: True
Phase 2 - Ep 78: Reward 0.133 | W* 0.846 | Success: True


Phase 2: Assembly Task:   8%|▊         | 79/1000 [00:14<02:45,  5.55it/s]

Phase 2 - Ep 79: Reward -0.137 | W* 0.846 | Success: True


Phase 2: Assembly Task:   8%|▊         | 80/1000 [00:14<03:00,  5.10it/s]

Phase 2 - Ep 80: Reward -0.135 | W* 0.871 | Success: True


Phase 2: Assembly Task:   8%|▊         | 82/1000 [00:15<03:33,  4.29it/s]

Phase 2 - Ep 81: Reward -0.186 | W* 0.862 | Success: False
Phase 2 - Ep 82: Reward -0.114 | W* 0.825 | Success: True


Phase 2: Assembly Task:   8%|▊         | 84/1000 [00:15<03:15,  4.68it/s]

Phase 2 - Ep 83: Reward -0.139 | W* 0.869 | Success: True
Phase 2 - Ep 84: Reward -0.084 | W* 0.820 | Success: True


Phase 2: Assembly Task:   8%|▊         | 85/1000 [00:15<03:12,  4.74it/s]

Phase 2 - Ep 85: Reward -0.110 | W* 0.820 | Success: True


Phase 2: Assembly Task:   9%|▊         | 87/1000 [00:16<02:50,  5.35it/s]

Phase 2 - Ep 86: Reward -0.121 | W* 0.844 | Success: True
Phase 2 - Ep 87: Reward -0.060 | W* 0.891 | Success: True


Phase 2: Assembly Task:   9%|▉         | 89/1000 [00:16<02:43,  5.57it/s]

Phase 2 - Ep 88: Reward -0.110 | W* 0.854 | Success: True
Phase 2 - Ep 89: Reward -0.090 | W* 0.907 | Success: True


Phase 2: Assembly Task:   9%|▉         | 91/1000 [00:16<02:26,  6.19it/s]

Phase 2 - Ep 90: Reward -0.050 | W* 0.844 | Success: True
Phase 2 - Ep 91: Reward -0.039 | W* 0.821 | Success: True


Phase 2: Assembly Task:   9%|▉         | 92/1000 [00:17<02:56,  5.14it/s]

Phase 2 - Ep 92: Reward -0.121 | W* 0.916 | Success: True


Phase 2: Assembly Task:   9%|▉         | 94/1000 [00:17<02:54,  5.19it/s]

Phase 2 - Ep 93: Reward -0.106 | W* 0.854 | Success: True
Phase 2 - Ep 94: Reward -0.039 | W* 0.884 | Success: True


Phase 2: Assembly Task:  10%|▉         | 96/1000 [00:17<02:41,  5.61it/s]

Phase 2 - Ep 95: Reward -0.118 | W* 0.888 | Success: True
Phase 2 - Ep 96: Reward -0.010 | W* 0.854 | Success: True


Phase 2: Assembly Task:  10%|▉         | 97/1000 [00:18<02:54,  5.19it/s]

Phase 2 - Ep 97: Reward -0.118 | W* 0.875 | Success: True


Phase 2: Assembly Task:  10%|▉         | 99/1000 [00:18<03:07,  4.81it/s]

Phase 2 - Ep 98: Reward -0.150 | W* 0.836 | Success: True
Phase 2 - Ep 99: Reward -0.101 | W* 0.842 | Success: True


Phase 2: Assembly Task:  10%|▉         | 99/1000 [00:18<03:07,  4.81it/s]

Phase 2 - Ep 100: Reward 0.090 | W* 0.826 | Success: True


Phase 2: Assembly Task:  10%|█         | 101/1000 [00:18<02:55,  5.11it/s]

Phase 2 - Ep 101: Reward -0.084 | W* 0.816 | Success: True


Phase 2: Assembly Task:  10%|█         | 102/1000 [00:19<04:02,  3.71it/s]

Phase 2 - Ep 102: Reward -0.141 | W* 0.857 | Success: True


Phase 2: Assembly Task:  10%|█         | 103/1000 [00:19<04:05,  3.66it/s]

Phase 2 - Ep 103: Reward -0.110 | W* 0.913 | Success: True


Phase 2: Assembly Task:  10%|█         | 105/1000 [00:20<03:34,  4.17it/s]

Phase 2 - Ep 104: Reward -0.135 | W* 0.872 | Success: True
Phase 2 - Ep 105: Reward 0.030 | W* 0.887 | Success: True


Phase 2: Assembly Task:  11%|█         | 107/1000 [00:20<02:49,  5.28it/s]

Phase 2 - Ep 106: Reward 0.057 | W* 0.839 | Success: True
Phase 2 - Ep 107: Reward -0.050 | W* 0.838 | Success: True


Phase 2: Assembly Task:  11%|█         | 109/1000 [00:20<02:29,  5.97it/s]

Phase 2 - Ep 108: Reward -0.010 | W* 0.820 | Success: True
Phase 2 - Ep 109: Reward -0.060 | W* 0.852 | Success: True


Phase 2: Assembly Task:  11%|█         | 110/1000 [00:20<03:08,  4.71it/s]

Phase 2 - Ep 110: Reward -0.135 | W* 0.841 | Success: True


Phase 2: Assembly Task:  11%|█         | 112/1000 [00:21<03:07,  4.74it/s]

Phase 2 - Ep 111: Reward -0.110 | W* 0.855 | Success: True
Phase 2 - Ep 112: Reward -0.039 | W* 0.864 | Success: True


Phase 2: Assembly Task:  11%|█▏        | 114/1000 [00:21<03:08,  4.70it/s]

Phase 2 - Ep 113: Reward -0.110 | W* 0.876 | Success: True
Phase 2 - Ep 114: Reward -0.060 | W* 0.844 | Success: True


Phase 2: Assembly Task:  12%|█▏        | 115/1000 [00:22<03:10,  4.64it/s]

Phase 2 - Ep 115: Reward -0.096 | W* 0.871 | Success: True


Phase 2: Assembly Task:  12%|█▏        | 117/1000 [00:22<03:23,  4.33it/s]

Phase 2 - Ep 116: Reward -0.159 | W* 0.899 | Success: True
Phase 2 - Ep 117: Reward -0.060 | W* 0.817 | Success: True


Phase 2: Assembly Task:  12%|█▏        | 119/1000 [00:22<02:53,  5.08it/s]

Phase 2 - Ep 118: Reward -0.121 | W* 0.832 | Success: True
Phase 2 - Ep 119: Reward 0.008 | W* 0.820 | Success: True


Phase 2: Assembly Task:  12%|█▏        | 121/1000 [00:23<03:29,  4.20it/s]

Phase 2 - Ep 120: Reward -0.161 | W* 0.850 | Success: True
Phase 2 - Ep 121: Reward -0.050 | W* 0.825 | Success: True


Phase 2: Assembly Task:  12%|█▏        | 123/1000 [00:24<03:16,  4.46it/s]

Phase 2 - Ep 122: Reward -0.135 | W* 0.853 | Success: True
Phase 2 - Ep 123: Reward -0.084 | W* 0.826 | Success: True


Phase 2: Assembly Task:  12%|█▏        | 124/1000 [00:24<03:27,  4.23it/s]

Phase 2 - Ep 124: Reward -0.139 | W* 0.821 | Success: True


Phase 2: Assembly Task:  12%|█▎        | 125/1000 [00:24<03:55,  3.72it/s]

Phase 2 - Ep 125: Reward -0.154 | W* 0.851 | Success: True


Phase 2: Assembly Task:  13%|█▎        | 127/1000 [00:25<03:33,  4.09it/s]

Phase 2 - Ep 126: Reward -0.147 | W* 0.900 | Success: True
Phase 2 - Ep 127: Reward -0.050 | W* 0.821 | Success: True


Phase 2: Assembly Task:  13%|█▎        | 128/1000 [00:25<04:02,  3.59it/s]

Phase 2 - Ep 128: Reward -0.145 | W* 0.836 | Success: True


Phase 2: Assembly Task:  13%|█▎        | 130/1000 [00:25<03:38,  3.98it/s]

Phase 2 - Ep 129: Reward -0.147 | W* 0.837 | Success: True
Phase 2 - Ep 130: Reward 0.008 | W* 0.835 | Success: True


Phase 2: Assembly Task:  13%|█▎        | 131/1000 [00:26<03:28,  4.17it/s]

Phase 2 - Ep 131: Reward -0.096 | W* 0.867 | Success: True


Phase 2: Assembly Task:  13%|█▎        | 132/1000 [00:26<03:33,  4.06it/s]

Phase 2 - Ep 132: Reward -0.114 | W* 0.818 | Success: True


Phase 2: Assembly Task:  13%|█▎        | 133/1000 [00:26<04:10,  3.46it/s]

Phase 2 - Ep 133: Reward -0.147 | W* 0.832 | Success: True


Phase 2: Assembly Task:  14%|█▎        | 135/1000 [00:27<03:33,  4.05it/s]

Phase 2 - Ep 134: Reward -0.153 | W* 0.841 | Success: True
Phase 2 - Ep 135: Reward -0.039 | W* 0.864 | Success: True
Phase 2 - Ep 136: Reward 0.133 | W* 0.819 | Success: True


Phase 2: Assembly Task:  14%|█▍        | 138/1000 [00:27<02:34,  5.58it/s]

Phase 2 - Ep 137: Reward 0.008 | W* 0.846 | Success: True
Phase 2 - Ep 138: Reward -0.090 | W* 0.824 | Success: True


Phase 2: Assembly Task:  14%|█▍        | 141/1000 [00:27<01:59,  7.20it/s]

Phase 2 - Ep 139: Reward -0.069 | W* 0.840 | Success: True
Phase 2 - Ep 140: Reward 0.057 | W* 0.839 | Success: True
Phase 2 - Ep 141: Reward -0.025 | W* 0.889 | Success: True


Phase 2: Assembly Task:  14%|█▍        | 143/1000 [00:28<01:47,  7.96it/s]

Phase 2 - Ep 142: Reward 0.008 | W* 0.880 | Success: True
Phase 2 - Ep 143: Reward -0.025 | W* 0.829 | Success: True


Phase 2: Assembly Task:  14%|█▍        | 145/1000 [00:28<02:42,  5.26it/s]

Phase 2 - Ep 144: Reward -0.160 | W* 0.826 | Success: True
Phase 2 - Ep 145: Reward -0.106 | W* 0.884 | Success: True


Phase 2: Assembly Task:  15%|█▍        | 146/1000 [00:28<02:25,  5.88it/s]

Phase 2 - Ep 146: Reward -0.010 | W* 0.841 | Success: True


Phase 2: Assembly Task:  15%|█▍        | 147/1000 [00:29<02:42,  5.25it/s]

Phase 2 - Ep 147: Reward -0.135 | W* 0.887 | Success: True


Phase 2: Assembly Task:  15%|█▍        | 149/1000 [00:29<02:52,  4.94it/s]

Phase 2 - Ep 148: Reward -0.139 | W* 0.887 | Success: True
Phase 2 - Ep 149: Reward -0.114 | W* 0.849 | Success: True


Phase 2: Assembly Task:  15%|█▌        | 150/1000 [00:29<03:29,  4.06it/s]

Phase 2 - Ep 150: Reward -0.160 | W* 0.882 | Success: True


Phase 2: Assembly Task:  15%|█▌        | 151/1000 [00:30<03:28,  4.07it/s]

Phase 2 - Ep 151: Reward -0.133 | W* 0.835 | Success: True


Phase 2: Assembly Task:  15%|█▌        | 153/1000 [00:30<03:15,  4.33it/s]

Phase 2 - Ep 152: Reward -0.159 | W* 0.865 | Success: True
Phase 2 - Ep 153: Reward -0.050 | W* 0.838 | Success: True


Phase 2: Assembly Task:  15%|█▌        | 154/1000 [00:31<03:54,  3.61it/s]

Phase 2 - Ep 154: Reward -0.204 | W* 0.900 | Success: False


Phase 2: Assembly Task:  16%|█▌        | 155/1000 [00:31<03:51,  3.65it/s]

Phase 2 - Ep 155: Reward -0.121 | W* 0.818 | Success: True


Phase 2: Assembly Task:  16%|█▌        | 157/1000 [00:31<03:47,  3.70it/s]

Phase 2 - Ep 156: Reward -0.186 | W* 0.876 | Success: False
Phase 2 - Ep 157: Reward 0.030 | W* 0.837 | Success: True


Phase 2: Assembly Task:  16%|█▌        | 159/1000 [00:32<02:54,  4.81it/s]

Phase 2 - Ep 158: Reward 0.090 | W* 0.815 | Success: True
Phase 2 - Ep 159: Reward -0.096 | W* 0.849 | Success: True


Phase 2: Assembly Task:  16%|█▌        | 160/1000 [00:32<03:15,  4.30it/s]

Phase 2 - Ep 160: Reward -0.130 | W* 0.876 | Success: True


Phase 2: Assembly Task:  16%|█▌        | 162/1000 [00:33<03:53,  3.59it/s]

Phase 2 - Ep 161: Reward -0.161 | W* 0.834 | Success: True
Phase 2 - Ep 162: Reward -0.069 | W* 0.859 | Success: True


Phase 2: Assembly Task:  16%|█▋        | 163/1000 [00:33<03:27,  4.03it/s]

Phase 2 - Ep 163: Reward -0.069 | W* 0.872 | Success: True


Phase 2: Assembly Task:  16%|█▋        | 164/1000 [00:33<03:19,  4.19it/s]

Phase 2 - Ep 164: Reward -0.101 | W* 0.857 | Success: True


Phase 2: Assembly Task:  16%|█▋        | 165/1000 [00:33<03:33,  3.90it/s]

Phase 2 - Ep 165: Reward -0.127 | W* 0.847 | Success: True


Phase 2: Assembly Task:  17%|█▋        | 166/1000 [00:34<04:08,  3.36it/s]

Phase 2 - Ep 166: Reward -0.147 | W* 0.820 | Success: True


Phase 2: Assembly Task:  17%|█▋        | 167/1000 [00:34<05:01,  2.77it/s]

Phase 2 - Ep 167: Reward -0.204 | W* 0.825 | Success: False


Phase 2: Assembly Task:  17%|█▋        | 168/1000 [00:35<04:39,  2.98it/s]

Phase 2 - Ep 168: Reward -0.114 | W* 0.828 | Success: True


Phase 2: Assembly Task:  17%|█▋        | 170/1000 [00:35<03:50,  3.60it/s]

Phase 2 - Ep 169: Reward -0.121 | W* 0.814 | Success: True
Phase 2 - Ep 170: Reward -0.039 | W* 0.839 | Success: True


Phase 2: Assembly Task:  17%|█▋        | 171/1000 [00:35<04:31,  3.06it/s]

Phase 2 - Ep 171: Reward -0.204 | W* 0.842 | Success: False


Phase 2: Assembly Task:  17%|█▋        | 173/1000 [00:36<03:41,  3.73it/s]

Phase 2 - Ep 172: Reward -0.147 | W* 0.860 | Success: True
Phase 2 - Ep 173: Reward -0.010 | W* 0.864 | Success: True


Phase 2: Assembly Task:  17%|█▋        | 174/1000 [00:36<03:49,  3.59it/s]

Phase 2 - Ep 174: Reward -0.133 | W* 0.909 | Success: True


Phase 2: Assembly Task:  18%|█▊        | 175/1000 [00:36<03:40,  3.74it/s]

Phase 2 - Ep 175: Reward -0.106 | W* 0.890 | Success: True


Phase 2: Assembly Task:  18%|█▊        | 177/1000 [00:37<03:19,  4.13it/s]

Phase 2 - Ep 176: Reward -0.121 | W* 0.841 | Success: True
Phase 2 - Ep 177: Reward -0.106 | W* 0.922 | Success: True


Phase 2: Assembly Task:  18%|█▊        | 179/1000 [00:37<02:38,  5.19it/s]

Phase 2 - Ep 178: Reward -0.110 | W* 0.866 | Success: True
Phase 2 - Ep 179: Reward -0.010 | W* 0.854 | Success: True
Phase 2 - Ep 180: Reward 0.190 | W* 0.830 | Success: True


Phase 2: Assembly Task:  18%|█▊        | 181/1000 [00:37<01:57,  6.99it/s]

Phase 2 - Ep 181: Reward -0.010 | W* 0.833 | Success: True


Phase 2: Assembly Task:  18%|█▊        | 183/1000 [00:38<02:23,  5.69it/s]

Phase 2 - Ep 182: Reward -0.133 | W* 0.817 | Success: True
Phase 2 - Ep 183: Reward -0.077 | W* 0.827 | Success: True


Phase 2: Assembly Task:  18%|█▊        | 184/1000 [00:38<03:00,  4.51it/s]

Phase 2 - Ep 184: Reward -0.139 | W* 0.831 | Success: True


Phase 2: Assembly Task:  19%|█▊        | 186/1000 [00:39<03:11,  4.25it/s]

Phase 2 - Ep 185: Reward -0.141 | W* 0.914 | Success: True
Phase 2 - Ep 186: Reward -0.077 | W* 0.851 | Success: True


Phase 2: Assembly Task:  19%|█▊        | 187/1000 [00:39<03:26,  3.93it/s]

Phase 2 - Ep 187: Reward -0.147 | W* 0.840 | Success: True
Phase 2 - Ep 188: Reward 0.133 | W* 0.864 | Success: True


Phase 2: Assembly Task:  19%|█▉        | 189/1000 [00:39<02:57,  4.57it/s]

Phase 2 - Ep 189: Reward -0.147 | W* 0.877 | Success: True


Phase 2: Assembly Task:  19%|█▉        | 190/1000 [00:40<02:54,  4.65it/s]

Phase 2 - Ep 190: Reward -0.118 | W* 0.833 | Success: True


Phase 2: Assembly Task:  19%|█▉        | 192/1000 [00:40<02:44,  4.91it/s]

Phase 2 - Ep 191: Reward -0.148 | W* 0.875 | Success: True
Phase 2 - Ep 192: Reward 0.008 | W* 0.843 | Success: True


Phase 2: Assembly Task:  19%|█▉        | 193/1000 [00:40<02:40,  5.04it/s]

Phase 2 - Ep 193: Reward -0.101 | W* 0.822 | Success: True


Phase 2: Assembly Task:  20%|█▉        | 195/1000 [00:41<02:33,  5.24it/s]

Phase 2 - Ep 194: Reward -0.147 | W* 0.836 | Success: True
Phase 2 - Ep 195: Reward -0.025 | W* 0.832 | Success: True


Phase 2: Assembly Task:  20%|█▉        | 197/1000 [00:41<02:43,  4.90it/s]

Phase 2 - Ep 196: Reward -0.139 | W* 0.835 | Success: True
Phase 2 - Ep 197: Reward -0.101 | W* 0.829 | Success: True


Phase 2: Assembly Task:  20%|█▉        | 198/1000 [00:41<02:40,  5.01it/s]

Phase 2 - Ep 198: Reward -0.101 | W* 0.830 | Success: True


Phase 2: Assembly Task:  20%|██        | 200/1000 [00:42<02:44,  4.85it/s]

Phase 2 - Ep 199: Reward -0.143 | W* 0.824 | Success: True
Phase 2 - Ep 200: Reward -0.101 | W* 0.887 | Success: True


Phase 2: Assembly Task:  20%|██        | 201/1000 [00:42<02:26,  5.44it/s]

Phase 2 - Ep 201: Reward -0.050 | W* 0.894 | Success: True


Phase 2: Assembly Task:  20%|██        | 202/1000 [00:42<02:34,  5.16it/s]

Phase 2 - Ep 202: Reward -0.121 | W* 0.845 | Success: True


Phase 2: Assembly Task:  20%|██        | 203/1000 [00:42<03:20,  3.98it/s]

Phase 2 - Ep 203: Reward -0.160 | W* 0.827 | Success: True


Phase 2: Assembly Task:  20%|██        | 204/1000 [00:43<03:33,  3.74it/s]

Phase 2 - Ep 204: Reward -0.150 | W* 0.839 | Success: True
Phase 2 - Ep 205: Reward 0.090 | W* 0.823 | Success: True


Phase 2: Assembly Task:  21%|██        | 206/1000 [00:43<02:43,  4.86it/s]

Phase 2 - Ep 206: Reward -0.110 | W* 0.817 | Success: True


Phase 2: Assembly Task:  21%|██        | 207/1000 [00:43<03:09,  4.18it/s]

Phase 2 - Ep 207: Reward -0.154 | W* 0.834 | Success: True


Phase 2: Assembly Task:  21%|██        | 210/1000 [00:44<02:19,  5.68it/s]

Phase 2 - Ep 208: Reward -0.124 | W* 0.858 | Success: True
Phase 2 - Ep 209: Reward 0.008 | W* 0.823 | Success: True
Phase 2 - Ep 210: Reward 0.008 | W* 0.869 | Success: True


Phase 2: Assembly Task:  21%|██        | 211/1000 [00:44<02:31,  5.21it/s]

Phase 2 - Ep 211: Reward -0.137 | W* 0.870 | Success: True


Phase 2: Assembly Task:  21%|██▏       | 213/1000 [00:44<02:30,  5.21it/s]

Phase 2 - Ep 212: Reward -0.133 | W* 0.880 | Success: True
Phase 2 - Ep 213: Reward -0.084 | W* 0.864 | Success: True


Phase 2: Assembly Task:  22%|██▏       | 215/1000 [00:45<02:41,  4.86it/s]

Phase 2 - Ep 214: Reward -0.145 | W* 0.821 | Success: True
Phase 2 - Ep 215: Reward -0.114 | W* 0.899 | Success: True


Phase 2: Assembly Task:  22%|██▏       | 216/1000 [00:45<02:52,  4.55it/s]

Phase 2 - Ep 216: Reward -0.141 | W* 0.853 | Success: True


Phase 2: Assembly Task:  22%|██▏       | 218/1000 [00:45<02:44,  4.75it/s]

Phase 2 - Ep 217: Reward -0.141 | W* 0.853 | Success: True
Phase 2 - Ep 218: Reward 0.030 | W* 0.871 | Success: True


Phase 2: Assembly Task:  22%|██▏       | 219/1000 [00:46<03:53,  3.34it/s]

Phase 2 - Ep 219: Reward -0.186 | W* 0.819 | Success: False


Phase 2: Assembly Task:  22%|██▏       | 220/1000 [00:46<04:11,  3.10it/s]

Phase 2 - Ep 220: Reward -0.139 | W* 0.853 | Success: True


Phase 2: Assembly Task:  22%|██▏       | 222/1000 [00:47<03:07,  4.14it/s]

Phase 2 - Ep 221: Reward -0.084 | W* 0.853 | Success: True
Phase 2 - Ep 222: Reward 0.008 | W* 0.835 | Success: True


Phase 2: Assembly Task:  22%|██▏       | 223/1000 [00:47<03:16,  3.96it/s]

Phase 2 - Ep 223: Reward -0.118 | W* 0.833 | Success: True


Phase 2: Assembly Task:  22%|██▎       | 225/1000 [00:48<03:20,  3.87it/s]

Phase 2 - Ep 224: Reward -0.157 | W* 0.825 | Success: True
Phase 2 - Ep 225: Reward -0.010 | W* 0.820 | Success: True


Phase 2: Assembly Task:  23%|██▎       | 227/1000 [00:48<02:47,  4.62it/s]

Phase 2 - Ep 226: Reward -0.050 | W* 0.833 | Success: True
Phase 2 - Ep 227: Reward -0.050 | W* 0.871 | Success: True


Phase 2: Assembly Task:  23%|██▎       | 228/1000 [00:48<03:40,  3.49it/s]

Phase 2 - Ep 228: Reward -0.147 | W* 0.843 | Success: True


Phase 2: Assembly Task:  23%|██▎       | 229/1000 [00:49<03:28,  3.70it/s]

Phase 2 - Ep 229: Reward -0.077 | W* 0.903 | Success: True


Phase 2: Assembly Task:  23%|██▎       | 231/1000 [00:49<02:52,  4.46it/s]

Phase 2 - Ep 230: Reward -0.133 | W* 0.898 | Success: True
Phase 2 - Ep 231: Reward -0.069 | W* 0.833 | Success: True


Phase 2: Assembly Task:  23%|██▎       | 233/1000 [00:49<02:35,  4.93it/s]

Phase 2 - Ep 232: Reward -0.133 | W* 0.866 | Success: True
Phase 2 - Ep 233: Reward -0.069 | W* 0.833 | Success: True


Phase 2: Assembly Task:  24%|██▎       | 235/1000 [00:50<02:33,  5.00it/s]

Phase 2 - Ep 234: Reward -0.127 | W* 0.827 | Success: True
Phase 2 - Ep 235: Reward -0.096 | W* 0.873 | Success: True


Phase 2: Assembly Task:  24%|██▎       | 236/1000 [00:50<03:24,  3.73it/s]

Phase 2 - Ep 236: Reward -0.196 | W* 0.902 | Success: False


Phase 2: Assembly Task:  24%|██▎       | 237/1000 [00:51<03:52,  3.28it/s]

Phase 2 - Ep 237: Reward -0.148 | W* 0.836 | Success: True


Phase 2: Assembly Task:  24%|██▍       | 239/1000 [00:51<03:01,  4.20it/s]

Phase 2 - Ep 238: Reward -0.106 | W* 0.828 | Success: True
Phase 2 - Ep 239: Reward -0.025 | W* 0.880 | Success: True


Phase 2: Assembly Task:  24%|██▍       | 240/1000 [00:51<02:34,  4.92it/s]

Phase 2 - Ep 240: Reward -0.010 | W* 0.864 | Success: True


Phase 2: Assembly Task:  24%|██▍       | 242/1000 [00:51<02:21,  5.35it/s]

Phase 2 - Ep 241: Reward -0.127 | W* 0.835 | Success: True
Phase 2 - Ep 242: Reward -0.060 | W* 0.821 | Success: True


Phase 2: Assembly Task:  24%|██▍       | 243/1000 [00:52<02:26,  5.17it/s]

Phase 2 - Ep 243: Reward -0.110 | W* 0.881 | Success: True


Phase 2: Assembly Task:  24%|██▍       | 245/1000 [00:52<02:30,  5.01it/s]

Phase 2 - Ep 244: Reward -0.130 | W* 0.851 | Success: True
Phase 2 - Ep 245: Reward -0.096 | W* 0.818 | Success: True


Phase 2: Assembly Task:  25%|██▍       | 246/1000 [00:52<02:35,  4.85it/s]

Phase 2 - Ep 246: Reward -0.090 | W* 0.826 | Success: True


Phase 2: Assembly Task:  25%|██▍       | 247/1000 [00:53<02:55,  4.29it/s]

Phase 2 - Ep 247: Reward -0.130 | W* 0.859 | Success: True


Phase 2: Assembly Task:  25%|██▍       | 248/1000 [00:53<03:25,  3.66it/s]

Phase 2 - Ep 248: Reward -0.143 | W* 0.870 | Success: True


Phase 2: Assembly Task:  25%|██▍       | 249/1000 [00:53<03:52,  3.23it/s]

Phase 2 - Ep 249: Reward -0.186 | W* 0.847 | Success: False


Phase 2: Assembly Task:  25%|██▌       | 251/1000 [00:54<03:25,  3.64it/s]

Phase 2 - Ep 250: Reward -0.158 | W* 0.896 | Success: True
Phase 2 - Ep 251: Reward -0.090 | W* 0.870 | Success: True


Phase 2: Assembly Task:  25%|██▌       | 252/1000 [00:54<02:58,  4.20it/s]

Phase 2 - Ep 252: Reward -0.077 | W* 0.883 | Success: True


Phase 2: Assembly Task:  25%|██▌       | 254/1000 [00:55<03:08,  3.96it/s]

Phase 2 - Ep 253: Reward -0.186 | W* 0.839 | Success: False
Phase 2 - Ep 254: Reward -0.110 | W* 0.826 | Success: True


Phase 2: Assembly Task:  26%|██▌       | 255/1000 [00:55<03:23,  3.66it/s]

Phase 2 - Ep 255: Reward -0.154 | W* 0.896 | Success: True


Phase 2: Assembly Task:  26%|██▌       | 256/1000 [00:55<03:26,  3.61it/s]

Phase 2 - Ep 256: Reward -0.141 | W* 0.867 | Success: True


Phase 2: Assembly Task:  26%|██▌       | 257/1000 [00:55<03:32,  3.50it/s]

Phase 2 - Ep 257: Reward -0.130 | W* 0.872 | Success: True


Phase 2: Assembly Task:  26%|██▌       | 259/1000 [00:56<03:02,  4.05it/s]

Phase 2 - Ep 258: Reward -0.130 | W* 0.830 | Success: True
Phase 2 - Ep 259: Reward -0.025 | W* 0.877 | Success: True


Phase 2: Assembly Task:  26%|██▌       | 260/1000 [00:56<02:40,  4.60it/s]

Phase 2 - Ep 260: Reward -0.025 | W* 0.860 | Success: True


Phase 2: Assembly Task:  26%|██▌       | 261/1000 [00:56<03:19,  3.70it/s]

Phase 2 - Ep 261: Reward -0.151 | W* 0.866 | Success: True


Phase 2: Assembly Task:  26%|██▌       | 262/1000 [00:57<03:16,  3.76it/s]

Phase 2 - Ep 262: Reward -0.114 | W* 0.828 | Success: True


Phase 2: Assembly Task:  26%|██▋       | 264/1000 [00:57<03:08,  3.91it/s]

Phase 2 - Ep 263: Reward -0.143 | W* 0.852 | Success: True
Phase 2 - Ep 264: Reward -0.077 | W* 0.834 | Success: True


Phase 2: Assembly Task:  27%|██▋       | 266/1000 [00:58<02:37,  4.67it/s]

Phase 2 - Ep 265: Reward -0.110 | W* 0.836 | Success: True
Phase 2 - Ep 266: Reward -0.096 | W* 0.867 | Success: True


Phase 2: Assembly Task:  27%|██▋       | 267/1000 [00:58<03:11,  3.83it/s]

Phase 2 - Ep 267: Reward -0.186 | W* 0.849 | Success: False
Phase 2 - Ep 268: Reward 0.030 | W* 0.899 | Success: True


Phase 2: Assembly Task:  27%|██▋       | 270/1000 [00:59<02:47,  4.36it/s]

Phase 2 - Ep 269: Reward -0.186 | W* 0.842 | Success: False
Phase 2 - Ep 270: Reward -0.101 | W* 0.847 | Success: True


Phase 2: Assembly Task:  27%|██▋       | 271/1000 [00:59<03:43,  3.26it/s]

Phase 2 - Ep 271: Reward -0.162 | W* 0.817 | Success: True


Phase 2: Assembly Task:  27%|██▋       | 272/1000 [01:00<04:25,  2.75it/s]

Phase 2 - Ep 272: Reward -0.186 | W* 0.817 | Success: False


Phase 2: Assembly Task:  27%|██▋       | 274/1000 [01:00<04:00,  3.02it/s]

Phase 2 - Ep 273: Reward -0.204 | W* 0.840 | Success: False
Phase 2 - Ep 274: Reward -0.039 | W* 0.839 | Success: True


Phase 2: Assembly Task:  28%|██▊       | 275/1000 [01:01<04:09,  2.90it/s]

Phase 2 - Ep 275: Reward -0.150 | W* 0.833 | Success: True


Phase 2: Assembly Task:  28%|██▊       | 276/1000 [01:01<03:51,  3.12it/s]

Phase 2 - Ep 276: Reward -0.101 | W* 0.835 | Success: True


Phase 2: Assembly Task:  28%|██▊       | 277/1000 [01:02<04:45,  2.53it/s]

Phase 2 - Ep 277: Reward -0.186 | W* 0.818 | Success: False


Phase 2: Assembly Task:  28%|██▊       | 279/1000 [01:02<03:42,  3.24it/s]

Phase 2 - Ep 278: Reward -0.096 | W* 0.867 | Success: True
Phase 2 - Ep 279: Reward -0.114 | W* 0.849 | Success: True


Phase 2: Assembly Task:  28%|██▊       | 280/1000 [01:02<02:57,  4.05it/s]

Phase 2 - Ep 280: Reward 0.030 | W* 0.824 | Success: True


Phase 2: Assembly Task:  28%|██▊       | 281/1000 [01:02<03:13,  3.73it/s]

Phase 2 - Ep 281: Reward -0.150 | W* 0.842 | Success: True


Phase 2: Assembly Task:  28%|██▊       | 283/1000 [01:03<02:44,  4.37it/s]

Phase 2 - Ep 282: Reward -0.135 | W* 0.853 | Success: True
Phase 2 - Ep 283: Reward -0.077 | W* 0.836 | Success: True


Phase 2: Assembly Task:  28%|██▊       | 285/1000 [01:03<02:25,  4.90it/s]

Phase 2 - Ep 284: Reward -0.127 | W* 0.840 | Success: True
Phase 2 - Ep 285: Reward -0.069 | W* 0.864 | Success: True


Phase 2: Assembly Task:  29%|██▊       | 287/1000 [01:03<02:03,  5.77it/s]

Phase 2 - Ep 286: Reward -0.010 | W* 0.854 | Success: True
Phase 2 - Ep 287: Reward -0.077 | W* 0.841 | Success: True


Phase 2: Assembly Task:  29%|██▉       | 288/1000 [01:04<02:15,  5.24it/s]

Phase 2 - Ep 288: Reward -0.130 | W* 0.851 | Success: True


Phase 2: Assembly Task:  29%|██▉       | 289/1000 [01:04<02:36,  4.53it/s]

Phase 2 - Ep 289: Reward -0.148 | W* 0.885 | Success: True


Phase 2: Assembly Task:  29%|██▉       | 290/1000 [01:04<02:41,  4.40it/s]

Phase 2 - Ep 290: Reward -0.127 | W* 0.822 | Success: True


Phase 2: Assembly Task:  29%|██▉       | 291/1000 [01:05<03:01,  3.90it/s]

Phase 2 - Ep 291: Reward -0.133 | W* 0.886 | Success: True


Phase 2: Assembly Task:  29%|██▉       | 292/1000 [01:05<03:12,  3.68it/s]

Phase 2 - Ep 292: Reward -0.135 | W* 0.853 | Success: True


Phase 2: Assembly Task:  29%|██▉       | 293/1000 [01:05<03:56,  2.99it/s]

Phase 2 - Ep 293: Reward -0.186 | W* 0.844 | Success: False


Phase 2: Assembly Task:  29%|██▉       | 294/1000 [01:06<04:06,  2.86it/s]

Phase 2 - Ep 294: Reward -0.204 | W* 0.905 | Success: False


Phase 2: Assembly Task:  30%|██▉       | 295/1000 [01:06<04:13,  2.78it/s]

Phase 2 - Ep 295: Reward -0.162 | W* 0.818 | Success: True


Phase 2: Assembly Task:  30%|██▉       | 296/1000 [01:07<04:31,  2.59it/s]

Phase 2 - Ep 296: Reward -0.186 | W* 0.922 | Success: False


Phase 2: Assembly Task:  30%|██▉       | 297/1000 [01:07<04:01,  2.91it/s]

Phase 2 - Ep 297: Reward -0.106 | W* 0.917 | Success: True


Phase 2: Assembly Task:  30%|██▉       | 298/1000 [01:07<04:33,  2.57it/s]

Phase 2 - Ep 298: Reward -0.186 | W* 0.822 | Success: False


Phase 2: Assembly Task:  30%|██▉       | 299/1000 [01:08<04:54,  2.38it/s]

Phase 2 - Ep 299: Reward -0.204 | W* 0.935 | Success: False


Phase 2: Assembly Task:  30%|███       | 301/1000 [01:08<03:32,  3.30it/s]

Phase 2 - Ep 300: Reward -0.124 | W* 0.831 | Success: True
Phase 2 - Ep 301: Reward 0.030 | W* 0.837 | Success: True


Phase 2: Assembly Task:  30%|███       | 302/1000 [01:08<02:53,  4.01it/s]

Phase 2 - Ep 302: Reward 0.030 | W* 0.871 | Success: True


Phase 2: Assembly Task:  30%|███       | 303/1000 [01:09<03:23,  3.42it/s]

Phase 2 - Ep 303: Reward -0.186 | W* 0.879 | Success: False


Phase 2: Assembly Task:  30%|███       | 305/1000 [01:09<03:11,  3.62it/s]

Phase 2 - Ep 304: Reward -0.186 | W* 0.849 | Success: False
Phase 2 - Ep 305: Reward -0.084 | W* 0.825 | Success: True


Phase 2: Assembly Task:  31%|███       | 306/1000 [01:10<03:14,  3.57it/s]

Phase 2 - Ep 306: Reward -0.133 | W* 0.911 | Success: True


Phase 2: Assembly Task:  31%|███       | 308/1000 [01:10<03:15,  3.55it/s]

Phase 2 - Ep 307: Reward -0.153 | W* 0.820 | Success: True
Phase 2 - Ep 308: Reward -0.069 | W* 0.833 | Success: True


Phase 2: Assembly Task:  31%|███       | 309/1000 [01:10<02:52,  4.02it/s]

Phase 2 - Ep 309: Reward -0.039 | W* 0.821 | Success: True


Phase 2: Assembly Task:  31%|███       | 310/1000 [01:11<03:25,  3.36it/s]

Phase 2 - Ep 310: Reward -0.186 | W* 0.822 | Success: False


Phase 2: Assembly Task:  31%|███       | 311/1000 [01:11<03:44,  3.07it/s]

Phase 2 - Ep 311: Reward -0.186 | W* 0.816 | Success: False


Phase 2: Assembly Task:  31%|███▏      | 313/1000 [01:12<03:12,  3.57it/s]

Phase 2 - Ep 312: Reward -0.147 | W* 0.839 | Success: True
Phase 2 - Ep 313: Reward -0.114 | W* 0.818 | Success: True


Phase 2: Assembly Task:  31%|███▏      | 314/1000 [01:12<03:56,  2.90it/s]

Phase 2 - Ep 314: Reward -0.159 | W* 0.854 | Success: True


Phase 2: Assembly Task:  32%|███▏      | 315/1000 [01:12<03:36,  3.16it/s]

Phase 2 - Ep 315: Reward -0.106 | W* 0.841 | Success: True


Phase 2: Assembly Task:  32%|███▏      | 316/1000 [01:13<04:46,  2.38it/s]

Phase 2 - Ep 316: Reward -0.186 | W* 0.847 | Success: False


Phase 2: Assembly Task:  32%|███▏      | 317/1000 [01:13<04:27,  2.55it/s]

Phase 2 - Ep 317: Reward -0.110 | W* 0.913 | Success: True


Phase 2: Assembly Task:  32%|███▏      | 318/1000 [01:14<05:24,  2.10it/s]

Phase 2 - Ep 318: Reward -0.186 | W* 0.857 | Success: False


Phase 2: Assembly Task:  32%|███▏      | 319/1000 [01:14<04:41,  2.42it/s]

Phase 2 - Ep 319: Reward -0.077 | W* 0.862 | Success: True


Phase 2: Assembly Task:  32%|███▏      | 320/1000 [01:15<05:02,  2.25it/s]

Phase 2 - Ep 320: Reward -0.151 | W* 0.848 | Success: True


Phase 2: Assembly Task:  32%|███▏      | 321/1000 [01:15<04:47,  2.36it/s]

Phase 2 - Ep 321: Reward -0.154 | W* 0.820 | Success: True


Phase 2: Assembly Task:  32%|███▏      | 323/1000 [01:16<03:45,  3.00it/s]

Phase 2 - Ep 322: Reward -0.196 | W* 0.910 | Success: False
Phase 2 - Ep 323: Reward -0.090 | W* 0.844 | Success: True


Phase 2: Assembly Task:  32%|███▏      | 324/1000 [01:16<03:11,  3.52it/s]

Phase 2 - Ep 324: Reward -0.090 | W* 0.830 | Success: True


Phase 2: Assembly Task:  33%|███▎      | 326/1000 [01:16<02:36,  4.30it/s]

Phase 2 - Ep 325: Reward -0.137 | W* 0.840 | Success: True
Phase 2 - Ep 326: Reward -0.039 | W* 0.821 | Success: True


Phase 2: Assembly Task:  33%|███▎      | 327/1000 [01:17<03:12,  3.50it/s]

Phase 2 - Ep 327: Reward -0.204 | W* 0.880 | Success: False


Phase 2: Assembly Task:  33%|███▎      | 328/1000 [01:17<03:31,  3.18it/s]

Phase 2 - Ep 328: Reward -0.148 | W* 0.907 | Success: True


Phase 2: Assembly Task:  33%|███▎      | 329/1000 [01:18<03:59,  2.80it/s]

Phase 2 - Ep 329: Reward -0.186 | W* 0.832 | Success: False


Phase 2: Assembly Task:  33%|███▎      | 331/1000 [01:18<03:17,  3.38it/s]

Phase 2 - Ep 330: Reward -0.135 | W* 0.822 | Success: True
Phase 2 - Ep 331: Reward -0.060 | W* 0.828 | Success: True


Phase 2: Assembly Task:  33%|███▎      | 333/1000 [01:19<03:10,  3.50it/s]

Phase 2 - Ep 332: Reward -0.159 | W* 0.817 | Success: True
Phase 2 - Ep 333: Reward -0.050 | W* 0.888 | Success: True


Phase 2: Assembly Task:  34%|███▎      | 335/1000 [01:19<02:29,  4.45it/s]

Phase 2 - Ep 334: Reward -0.124 | W* 0.863 | Success: True
Phase 2 - Ep 335: Reward -0.069 | W* 0.916 | Success: True


Phase 2: Assembly Task:  34%|███▎      | 336/1000 [01:19<02:18,  4.78it/s]

Phase 2 - Ep 336: Reward -0.084 | W* 0.886 | Success: True


Phase 2: Assembly Task:  34%|███▍      | 338/1000 [01:20<02:32,  4.34it/s]

Phase 2 - Ep 337: Reward -0.159 | W* 0.851 | Success: True
Phase 2 - Ep 338: Reward -0.101 | W* 0.870 | Success: True


Phase 2: Assembly Task:  34%|███▍      | 341/1000 [01:20<01:38,  6.67it/s]

Phase 2 - Ep 339: Reward -0.039 | W* 0.875 | Success: True
Phase 2 - Ep 340: Reward 0.133 | W* 0.846 | Success: True
Phase 2 - Ep 341: Reward -0.025 | W* 0.858 | Success: True


Phase 2: Assembly Task:  34%|███▍      | 342/1000 [01:20<01:50,  5.94it/s]

Phase 2 - Ep 342: Reward -0.118 | W* 0.818 | Success: True


Phase 2: Assembly Task:  34%|███▍      | 343/1000 [01:21<02:19,  4.72it/s]

Phase 2 - Ep 343: Reward -0.155 | W* 0.868 | Success: True


Phase 2: Assembly Task:  34%|███▍      | 344/1000 [01:21<03:00,  3.64it/s]

Phase 2 - Ep 344: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  34%|███▍      | 345/1000 [01:21<03:10,  3.44it/s]

Phase 2 - Ep 345: Reward -0.130 | W* 0.826 | Success: True


Phase 2: Assembly Task:  35%|███▍      | 346/1000 [01:22<03:20,  3.26it/s]

Phase 2 - Ep 346: Reward -0.143 | W* 0.831 | Success: True


Phase 2: Assembly Task:  35%|███▍      | 347/1000 [01:22<03:27,  3.14it/s]

Phase 2 - Ep 347: Reward -0.204 | W* 0.940 | Success: False


Phase 2: Assembly Task:  35%|███▍      | 348/1000 [01:22<03:16,  3.32it/s]

Phase 2 - Ep 348: Reward -0.133 | W* 0.846 | Success: True


Phase 2: Assembly Task:  35%|███▍      | 349/1000 [01:23<03:29,  3.11it/s]

Phase 2 - Ep 349: Reward -0.186 | W* 0.829 | Success: False


Phase 2: Assembly Task:  35%|███▌      | 350/1000 [01:23<03:37,  2.98it/s]

Phase 2 - Ep 350: Reward -0.204 | W* 0.932 | Success: False


Phase 2: Assembly Task:  35%|███▌      | 351/1000 [01:24<03:24,  3.18it/s]

Phase 2 - Ep 351: Reward -0.137 | W* 0.863 | Success: True
Phase 2 - Ep 352: Reward -0.106 | W* 0.819 | Success: True


Phase 2: Assembly Task:  35%|███▌      | 353/1000 [01:24<03:10,  3.40it/s]

Phase 2 - Ep 353: Reward -0.157 | W* 0.909 | Success: True


Phase 2: Assembly Task:  35%|███▌      | 354/1000 [01:24<03:19,  3.23it/s]

Phase 2 - Ep 354: Reward -0.159 | W* 0.832 | Success: True


Phase 2: Assembly Task:  36%|███▌      | 355/1000 [01:25<03:24,  3.15it/s]

Phase 2 - Ep 355: Reward -0.153 | W* 0.850 | Success: True


Phase 2: Assembly Task:  36%|███▌      | 356/1000 [01:25<03:30,  3.05it/s]

Phase 2 - Ep 356: Reward -0.204 | W* 0.920 | Success: False


Phase 2: Assembly Task:  36%|███▌      | 357/1000 [01:25<03:26,  3.11it/s]

Phase 2 - Ep 357: Reward -0.135 | W* 0.865 | Success: True


Phase 2: Assembly Task:  36%|███▌      | 358/1000 [01:26<04:05,  2.61it/s]

Phase 2 - Ep 358: Reward -0.186 | W* 0.827 | Success: False


Phase 2: Assembly Task:  36%|███▌      | 360/1000 [01:26<03:31,  3.02it/s]

Phase 2 - Ep 359: Reward -0.186 | W* 0.832 | Success: False
Phase 2 - Ep 360: Reward -0.025 | W* 0.909 | Success: True


Phase 2: Assembly Task:  36%|███▌      | 361/1000 [01:27<03:58,  2.68it/s]

Phase 2 - Ep 361: Reward -0.155 | W* 0.842 | Success: True


Phase 2: Assembly Task:  36%|███▌      | 362/1000 [01:27<03:52,  2.74it/s]

Phase 2 - Ep 362: Reward -0.139 | W* 0.868 | Success: True


Phase 2: Assembly Task:  36%|███▋      | 363/1000 [01:28<04:19,  2.46it/s]

Phase 2 - Ep 363: Reward -0.186 | W* 0.859 | Success: False


Phase 2: Assembly Task:  36%|███▋      | 364/1000 [01:28<04:51,  2.18it/s]

Phase 2 - Ep 364: Reward -0.186 | W* 0.924 | Success: False


Phase 2: Assembly Task:  36%|███▋      | 365/1000 [01:29<04:05,  2.58it/s]

Phase 2 - Ep 365: Reward -0.101 | W* 0.825 | Success: True
Phase 2 - Ep 366: Reward -0.114 | W* 0.833 | Success: True


Phase 2: Assembly Task:  37%|███▋      | 367/1000 [01:29<03:49,  2.76it/s]

Phase 2 - Ep 367: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  37%|███▋      | 369/1000 [01:30<02:55,  3.59it/s]

Phase 2 - Ep 368: Reward -0.143 | W* 0.883 | Success: True
Phase 2 - Ep 369: Reward 0.057 | W* 0.839 | Success: True


Phase 2: Assembly Task:  37%|███▋      | 370/1000 [01:30<02:45,  3.80it/s]

Phase 2 - Ep 370: Reward -0.106 | W* 0.900 | Success: True


Phase 2: Assembly Task:  37%|███▋      | 371/1000 [01:30<02:50,  3.68it/s]

Phase 2 - Ep 371: Reward -0.135 | W* 0.837 | Success: True


Phase 2: Assembly Task:  37%|███▋      | 372/1000 [01:30<03:10,  3.29it/s]

Phase 2 - Ep 372: Reward -0.162 | W* 0.852 | Success: True


Phase 2: Assembly Task:  37%|███▋      | 373/1000 [01:31<03:18,  3.16it/s]

Phase 2 - Ep 373: Reward -0.160 | W* 0.861 | Success: True


Phase 2: Assembly Task:  37%|███▋      | 374/1000 [01:31<03:32,  2.95it/s]

Phase 2 - Ep 374: Reward -0.204 | W* 0.912 | Success: False


Phase 2: Assembly Task:  38%|███▊      | 375/1000 [01:32<03:36,  2.88it/s]

Phase 2 - Ep 375: Reward -0.161 | W* 0.834 | Success: True


Phase 2: Assembly Task:  38%|███▊      | 376/1000 [01:32<03:15,  3.19it/s]

Phase 2 - Ep 376: Reward -0.130 | W* 0.839 | Success: True


Phase 2: Assembly Task:  38%|███▊      | 377/1000 [01:32<03:21,  3.09it/s]

Phase 2 - Ep 377: Reward -0.148 | W* 0.865 | Success: True


Phase 2: Assembly Task:  38%|███▊      | 378/1000 [01:33<03:42,  2.80it/s]

Phase 2 - Ep 378: Reward -0.186 | W* 0.917 | Success: False


Phase 2: Assembly Task:  38%|███▊      | 380/1000 [01:33<03:11,  3.24it/s]

Phase 2 - Ep 379: Reward -0.155 | W* 0.882 | Success: True
Phase 2 - Ep 380: Reward -0.039 | W* 0.893 | Success: True


Phase 2: Assembly Task:  38%|███▊      | 381/1000 [01:34<03:23,  3.04it/s]

Phase 2 - Ep 381: Reward -0.145 | W* 0.823 | Success: True


Phase 2: Assembly Task:  38%|███▊      | 382/1000 [01:34<03:46,  2.72it/s]

Phase 2 - Ep 382: Reward -0.204 | W* 0.872 | Success: False


Phase 2: Assembly Task:  38%|███▊      | 383/1000 [01:34<03:53,  2.65it/s]

Phase 2 - Ep 383: Reward -0.204 | W* 0.855 | Success: False


Phase 2: Assembly Task:  38%|███▊      | 384/1000 [01:35<03:50,  2.67it/s]

Phase 2 - Ep 384: Reward -0.186 | W* 0.857 | Success: False


Phase 2: Assembly Task:  38%|███▊      | 385/1000 [01:35<03:42,  2.77it/s]

Phase 2 - Ep 385: Reward -0.151 | W* 0.857 | Success: True


Phase 2: Assembly Task:  39%|███▊      | 386/1000 [01:35<03:41,  2.77it/s]

Phase 2 - Ep 386: Reward -0.141 | W* 0.835 | Success: True


                                                                          

Phase 2 - Ep 387: Reward -0.204 | W* 0.920 | Success: False


Phase 2: Assembly Task:  39%|███▉      | 388/1000 [01:36<03:22,  3.03it/s]

Phase 2 - Ep 388: Reward -0.090 | W* 0.844 | Success: True


Phase 2: Assembly Task:  39%|███▉      | 390/1000 [01:37<02:49,  3.60it/s]

Phase 2 - Ep 389: Reward -0.114 | W* 0.869 | Success: True
Phase 2 - Ep 390: Reward -0.084 | W* 0.838 | Success: True


Phase 2: Assembly Task:  39%|███▉      | 392/1000 [01:37<02:55,  3.47it/s]

Phase 2 - Ep 391: Reward -0.204 | W* 0.875 | Success: False
Phase 2 - Ep 392: Reward -0.096 | W* 0.873 | Success: True


Phase 2: Assembly Task:  39%|███▉      | 393/1000 [01:37<02:37,  3.86it/s]

Phase 2 - Ep 393: Reward -0.096 | W* 0.861 | Success: True


Phase 2: Assembly Task:  39%|███▉      | 394/1000 [01:38<02:32,  3.97it/s]

Phase 2 - Ep 394: Reward -0.133 | W* 0.899 | Success: True


Phase 2: Assembly Task:  40%|███▉      | 395/1000 [01:38<02:49,  3.56it/s]

Phase 2 - Ep 395: Reward -0.157 | W* 0.831 | Success: True


Phase 2: Assembly Task:  40%|███▉      | 396/1000 [01:38<03:33,  2.83it/s]

Phase 2 - Ep 396: Reward -0.186 | W* 0.860 | Success: False


Phase 2: Assembly Task:  40%|███▉      | 397/1000 [01:39<03:35,  2.80it/s]

Phase 2 - Ep 397: Reward -0.124 | W* 0.863 | Success: True


Phase 2: Assembly Task:  40%|███▉      | 398/1000 [01:39<04:25,  2.27it/s]

Phase 2 - Ep 398: Reward -0.160 | W* 0.822 | Success: True


Phase 2: Assembly Task:  40%|███▉      | 399/1000 [01:40<04:57,  2.02it/s]

Phase 2 - Ep 399: Reward -0.186 | W* 0.829 | Success: False


Phase 2: Assembly Task:  40%|████      | 400/1000 [01:41<04:50,  2.06it/s]

Phase 2 - Ep 400: Reward -0.153 | W* 0.831 | Success: True


Phase 2: Assembly Task:  40%|████      | 401/1000 [01:41<04:59,  2.00it/s]

Phase 2 - Ep 401: Reward -0.162 | W* 0.874 | Success: True


Phase 2: Assembly Task:  40%|████      | 403/1000 [01:42<03:32,  2.81it/s]

Phase 2 - Ep 402: Reward -0.090 | W* 0.832 | Success: True
Phase 2 - Ep 403: Reward -0.039 | W* 0.837 | Success: True


Phase 2: Assembly Task:  40%|████      | 404/1000 [01:42<03:22,  2.95it/s]

Phase 2 - Ep 404: Reward -0.150 | W* 0.886 | Success: True


Phase 2: Assembly Task:  40%|████      | 405/1000 [01:42<03:29,  2.84it/s]

Phase 2 - Ep 405: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  41%|████      | 407/1000 [01:43<03:03,  3.23it/s]

Phase 2 - Ep 406: Reward -0.196 | W* 0.921 | Success: False
Phase 2 - Ep 407: Reward 0.008 | W* 0.820 | Success: True


Phase 2: Assembly Task:  41%|████      | 408/1000 [01:43<03:28,  2.85it/s]

Phase 2 - Ep 408: Reward -0.161 | W* 0.875 | Success: True


Phase 2: Assembly Task:  41%|████      | 409/1000 [01:44<03:29,  2.82it/s]

Phase 2 - Ep 409: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  41%|████      | 410/1000 [01:44<03:23,  2.91it/s]

Phase 2 - Ep 410: Reward -0.150 | W* 0.817 | Success: True


Phase 2: Assembly Task:  41%|████      | 411/1000 [01:44<03:28,  2.83it/s]

Phase 2 - Ep 411: Reward -0.186 | W* 0.819 | Success: False


Phase 2: Assembly Task:  41%|████      | 412/1000 [01:45<03:02,  3.22it/s]

Phase 2 - Ep 412: Reward -0.118 | W* 0.827 | Success: True


Phase 2: Assembly Task:  41%|████▏     | 414/1000 [01:45<02:31,  3.86it/s]

Phase 2 - Ep 413: Reward -0.130 | W* 0.833 | Success: True
Phase 2 - Ep 414: Reward -0.106 | W* 0.861 | Success: True


Phase 2: Assembly Task:  42%|████▏     | 415/1000 [01:45<02:38,  3.69it/s]

Phase 2 - Ep 415: Reward -0.150 | W* 0.898 | Success: True


Phase 2: Assembly Task:  42%|████▏     | 416/1000 [01:46<02:53,  3.36it/s]

Phase 2 - Ep 416: Reward -0.186 | W* 0.859 | Success: False


Phase 2: Assembly Task:  42%|████▏     | 417/1000 [01:46<03:01,  3.21it/s]

Phase 2 - Ep 417: Reward -0.155 | W* 0.823 | Success: True


Phase 2: Assembly Task:  42%|████▏     | 418/1000 [01:46<03:05,  3.13it/s]

Phase 2 - Ep 418: Reward -0.153 | W* 0.820 | Success: True


Phase 2: Assembly Task:  42%|████▏     | 419/1000 [01:47<03:34,  2.71it/s]

Phase 2 - Ep 419: Reward -0.186 | W* 0.899 | Success: False


Phase 2: Assembly Task:  42%|████▏     | 420/1000 [01:47<03:31,  2.75it/s]

Phase 2 - Ep 420: Reward -0.143 | W* 0.880 | Success: True


Phase 2: Assembly Task:  42%|████▏     | 422/1000 [01:48<02:57,  3.26it/s]

Phase 2 - Ep 421: Reward -0.160 | W* 0.853 | Success: True
Phase 2 - Ep 422: Reward -0.084 | W* 0.859 | Success: True


Phase 2: Assembly Task:  42%|████▏     | 423/1000 [01:48<03:12,  3.00it/s]

Phase 2 - Ep 423: Reward -0.186 | W* 0.827 | Success: False


Phase 2: Assembly Task:  42%|████▏     | 424/1000 [01:48<03:19,  2.89it/s]

Phase 2 - Ep 424: Reward -0.186 | W* 0.847 | Success: False


Phase 2: Assembly Task:  42%|████▎     | 425/1000 [01:49<03:05,  3.10it/s]

Phase 2 - Ep 425: Reward -0.145 | W* 0.882 | Success: True


Phase 2: Assembly Task:  43%|████▎     | 426/1000 [01:49<03:15,  2.94it/s]

Phase 2 - Ep 426: Reward -0.204 | W* 0.850 | Success: False


Phase 2: Assembly Task:  43%|████▎     | 427/1000 [01:49<03:20,  2.85it/s]

Phase 2 - Ep 427: Reward -0.186 | W* 0.828 | Success: False


Phase 2: Assembly Task:  43%|████▎     | 428/1000 [01:50<03:20,  2.85it/s]

Phase 2 - Ep 428: Reward -0.204 | W* 0.930 | Success: False


Phase 2: Assembly Task:  43%|████▎     | 430/1000 [01:50<02:50,  3.34it/s]

Phase 2 - Ep 429: Reward -0.186 | W* 0.889 | Success: False
Phase 2 - Ep 430: Reward -0.084 | W* 0.858 | Success: True


Phase 2: Assembly Task:  43%|████▎     | 431/1000 [01:51<02:59,  3.17it/s]

Phase 2 - Ep 431: Reward -0.204 | W* 0.927 | Success: False


Phase 2: Assembly Task:  43%|████▎     | 432/1000 [01:51<02:53,  3.28it/s]

Phase 2 - Ep 432: Reward -0.137 | W* 0.818 | Success: True


Phase 2: Assembly Task:  43%|████▎     | 433/1000 [01:51<03:04,  3.07it/s]

Phase 2 - Ep 433: Reward -0.204 | W* 0.830 | Success: False
Phase 2 - Ep 434: Reward 0.057 | W* 0.825 | Success: True


Phase 2: Assembly Task:  44%|████▎     | 435/1000 [01:52<02:52,  3.27it/s]

Phase 2 - Ep 435: Reward -0.160 | W* 0.926 | Success: True


Phase 2: Assembly Task:  44%|████▎     | 436/1000 [01:52<03:01,  3.10it/s]

Phase 2 - Ep 436: Reward -0.139 | W* 0.828 | Success: True


Phase 2: Assembly Task:  44%|████▍     | 438/1000 [01:53<02:40,  3.50it/s]

Phase 2 - Ep 437: Reward -0.143 | W* 0.856 | Success: True
Phase 2 - Ep 438: Reward -0.025 | W* 0.851 | Success: True


Phase 2: Assembly Task:  44%|████▍     | 439/1000 [01:53<03:10,  2.94it/s]

Phase 2 - Ep 439: Reward -0.204 | W* 0.917 | Success: False


Phase 2: Assembly Task:  44%|████▍     | 440/1000 [01:54<03:10,  2.93it/s]

Phase 2 - Ep 440: Reward -0.141 | W* 0.857 | Success: True


Phase 2: Assembly Task:  44%|████▍     | 441/1000 [01:54<03:45,  2.48it/s]

Phase 2 - Ep 441: Reward -0.186 | W* 0.882 | Success: False


Phase 2: Assembly Task:  44%|████▍     | 442/1000 [01:55<04:02,  2.30it/s]

Phase 2 - Ep 442: Reward -0.186 | W* 0.879 | Success: False


Phase 2: Assembly Task:  44%|████▍     | 443/1000 [01:55<03:51,  2.41it/s]

Phase 2 - Ep 443: Reward -0.186 | W* 0.819 | Success: False


Phase 2: Assembly Task:  44%|████▍     | 444/1000 [01:55<03:42,  2.50it/s]

Phase 2 - Ep 444: Reward -0.160 | W* 0.911 | Success: True


Phase 2: Assembly Task:  44%|████▍     | 445/1000 [01:56<03:33,  2.60it/s]

Phase 2 - Ep 445: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  45%|████▍     | 446/1000 [01:56<03:11,  2.89it/s]

Phase 2 - Ep 446: Reward -0.133 | W* 0.834 | Success: True


Phase 2: Assembly Task:  45%|████▍     | 448/1000 [01:57<02:35,  3.55it/s]

Phase 2 - Ep 447: Reward -0.186 | W* 0.877 | Success: False
Phase 2 - Ep 448: Reward 0.008 | W* 0.823 | Success: True
Phase 2 - Ep 449: Reward 0.190 | W* 0.830 | Success: True


Phase 2: Assembly Task:  45%|████▌     | 450/1000 [01:57<02:19,  3.95it/s]

Phase 2 - Ep 450: Reward -0.204 | W* 0.825 | Success: False


Phase 2: Assembly Task:  45%|████▌     | 451/1000 [01:57<02:40,  3.43it/s]

Phase 2 - Ep 451: Reward -0.162 | W* 0.839 | Success: True


Phase 2: Assembly Task:  45%|████▌     | 452/1000 [01:58<02:45,  3.31it/s]

Phase 2 - Ep 452: Reward -0.154 | W* 0.822 | Success: True


Phase 2: Assembly Task:  45%|████▌     | 453/1000 [01:58<02:39,  3.43it/s]

Phase 2 - Ep 453: Reward -0.137 | W* 0.859 | Success: True


Phase 2: Assembly Task:  45%|████▌     | 454/1000 [01:58<02:32,  3.58it/s]

Phase 2 - Ep 454: Reward -0.130 | W* 0.826 | Success: True


Phase 2: Assembly Task:  46%|████▌     | 455/1000 [01:59<02:30,  3.63it/s]

Phase 2 - Ep 455: Reward -0.133 | W* 0.830 | Success: True


Phase 2: Assembly Task:  46%|████▌     | 457/1000 [01:59<01:57,  4.61it/s]

Phase 2 - Ep 456: Reward -0.127 | W* 0.904 | Success: True
Phase 2 - Ep 457: Reward -0.010 | W* 0.823 | Success: True


Phase 2: Assembly Task:  46%|████▌     | 458/1000 [01:59<02:24,  3.76it/s]

Phase 2 - Ep 458: Reward -0.186 | W* 0.842 | Success: False


Phase 2: Assembly Task:  46%|████▌     | 459/1000 [02:00<02:43,  3.32it/s]

Phase 2 - Ep 459: Reward -0.162 | W* 0.857 | Success: True


Phase 2: Assembly Task:  46%|████▌     | 460/1000 [02:00<02:52,  3.13it/s]

Phase 2 - Ep 460: Reward -0.204 | W* 0.885 | Success: False


Phase 2: Assembly Task:  46%|████▌     | 461/1000 [02:00<03:01,  2.97it/s]

Phase 2 - Ep 461: Reward -0.204 | W* 0.877 | Success: False


Phase 2: Assembly Task:  46%|████▌     | 462/1000 [02:01<03:09,  2.84it/s]

Phase 2 - Ep 462: Reward -0.186 | W* 0.834 | Success: False


Phase 2: Assembly Task:  46%|████▋     | 463/1000 [02:01<02:47,  3.21it/s]

Phase 2 - Ep 463: Reward -0.121 | W* 0.832 | Success: True


Phase 2: Assembly Task:  46%|████▋     | 464/1000 [02:01<02:30,  3.57it/s]

Phase 2 - Ep 464: Reward -0.114 | W* 0.849 | Success: True


Phase 2: Assembly Task:  46%|████▋     | 465/1000 [02:02<02:48,  3.17it/s]

Phase 2 - Ep 465: Reward -0.186 | W* 0.841 | Success: False


Phase 2: Assembly Task:  47%|████▋     | 466/1000 [02:02<02:57,  3.01it/s]

Phase 2 - Ep 466: Reward -0.186 | W* 0.859 | Success: False


Phase 2: Assembly Task:  47%|████▋     | 468/1000 [02:02<02:13,  3.99it/s]

Phase 2 - Ep 467: Reward -0.124 | W* 0.885 | Success: True
Phase 2 - Ep 468: Reward -0.060 | W* 0.883 | Success: True


Phase 2: Assembly Task:  47%|████▋     | 469/1000 [02:03<02:33,  3.46it/s]

Phase 2 - Ep 469: Reward -0.204 | W* 0.912 | Success: False


Phase 2: Assembly Task:  47%|████▋     | 470/1000 [02:03<02:45,  3.21it/s]

Phase 2 - Ep 470: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  47%|████▋     | 471/1000 [02:03<02:56,  3.00it/s]

Phase 2 - Ep 471: Reward -0.186 | W* 0.842 | Success: False


Phase 2: Assembly Task:  47%|████▋     | 472/1000 [02:04<02:56,  2.99it/s]

Phase 2 - Ep 472: Reward -0.147 | W* 0.837 | Success: True


Phase 2: Assembly Task:  47%|████▋     | 473/1000 [02:04<03:00,  2.92it/s]

Phase 2 - Ep 473: Reward -0.145 | W* 0.885 | Success: True


Phase 2: Assembly Task:  47%|████▋     | 474/1000 [02:05<03:07,  2.81it/s]

Phase 2 - Ep 474: Reward -0.151 | W* 0.835 | Success: True


Phase 2: Assembly Task:  48%|████▊     | 475/1000 [02:05<03:36,  2.43it/s]

Phase 2 - Ep 475: Reward -0.186 | W* 0.832 | Success: False


Phase 2: Assembly Task:  48%|████▊     | 476/1000 [02:06<03:46,  2.32it/s]

Phase 2 - Ep 476: Reward -0.186 | W* 0.914 | Success: False


Phase 2: Assembly Task:  48%|████▊     | 477/1000 [02:06<04:11,  2.08it/s]

Phase 2 - Ep 477: Reward -0.204 | W* 0.935 | Success: False


Phase 2: Assembly Task:  48%|████▊     | 478/1000 [02:07<04:30,  1.93it/s]

Phase 2 - Ep 478: Reward -0.204 | W* 0.912 | Success: False


Phase 2: Assembly Task:  48%|████▊     | 479/1000 [02:07<04:36,  1.88it/s]

Phase 2 - Ep 479: Reward -0.204 | W* 0.882 | Success: False


Phase 2: Assembly Task:  48%|████▊     | 480/1000 [02:08<04:37,  1.87it/s]

Phase 2 - Ep 480: Reward -0.196 | W* 0.857 | Success: False


Phase 2: Assembly Task:  48%|████▊     | 481/1000 [02:08<03:52,  2.23it/s]

Phase 2 - Ep 481: Reward -0.135 | W* 0.895 | Success: True


Phase 2: Assembly Task:  48%|████▊     | 482/1000 [02:08<03:17,  2.62it/s]

Phase 2 - Ep 482: Reward -0.124 | W* 0.884 | Success: True


Phase 2: Assembly Task:  48%|████▊     | 483/1000 [02:09<03:17,  2.62it/s]

Phase 2 - Ep 483: Reward -0.186 | W* 0.837 | Success: False


Phase 2: Assembly Task:  48%|████▊     | 484/1000 [02:09<03:16,  2.63it/s]

Phase 2 - Ep 484: Reward -0.204 | W* 0.922 | Success: False


Phase 2: Assembly Task:  48%|████▊     | 485/1000 [02:09<03:11,  2.69it/s]

Phase 2 - Ep 485: Reward -0.159 | W* 0.913 | Success: True


Phase 2: Assembly Task:  49%|████▊     | 486/1000 [02:10<03:08,  2.73it/s]

Phase 2 - Ep 486: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  49%|████▊     | 487/1000 [02:10<02:45,  3.10it/s]

Phase 2 - Ep 487: Reward -0.114 | W* 0.828 | Success: True


Phase 2: Assembly Task:  49%|████▉     | 488/1000 [02:10<02:52,  2.98it/s]

Phase 2 - Ep 488: Reward -0.186 | W* 0.862 | Success: False


Phase 2: Assembly Task:  49%|████▉     | 490/1000 [02:11<02:29,  3.42it/s]

Phase 2 - Ep 489: Reward -0.204 | W* 0.907 | Success: False
Phase 2 - Ep 490: Reward -0.090 | W* 0.876 | Success: True


Phase 2: Assembly Task:  49%|████▉     | 491/1000 [02:11<02:43,  3.12it/s]

Phase 2 - Ep 491: Reward -0.204 | W* 0.940 | Success: False


Phase 2: Assembly Task:  49%|████▉     | 492/1000 [02:12<02:36,  3.25it/s]

Phase 2 - Ep 492: Reward -0.143 | W* 0.873 | Success: True


Phase 2: Assembly Task:  49%|████▉     | 493/1000 [02:12<02:42,  3.12it/s]

Phase 2 - Ep 493: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  49%|████▉     | 494/1000 [02:12<02:40,  3.15it/s]

Phase 2 - Ep 494: Reward -0.148 | W* 0.907 | Success: True


Phase 2: Assembly Task:  50%|████▉     | 495/1000 [02:13<02:48,  3.00it/s]

Phase 2 - Ep 495: Reward -0.186 | W* 0.837 | Success: False


Phase 2: Assembly Task:  50%|████▉     | 496/1000 [02:13<02:55,  2.86it/s]

Phase 2 - Ep 496: Reward -0.186 | W* 0.837 | Success: False


Phase 2: Assembly Task:  50%|████▉     | 497/1000 [02:13<02:52,  2.91it/s]

Phase 2 - Ep 497: Reward -0.150 | W* 0.826 | Success: True


Phase 2: Assembly Task:  50%|████▉     | 498/1000 [02:14<02:47,  3.00it/s]

Phase 2 - Ep 498: Reward -0.148 | W* 0.827 | Success: True


Phase 2: Assembly Task:  50%|████▉     | 499/1000 [02:14<02:53,  2.89it/s]

Phase 2 - Ep 499: Reward -0.186 | W* 0.819 | Success: False


Phase 2: Assembly Task:  50%|█████     | 500/1000 [02:14<02:58,  2.79it/s]

Phase 2 - Ep 500: Reward -0.204 | W* 0.840 | Success: False


Phase 2: Assembly Task:  50%|█████     | 501/1000 [02:15<03:00,  2.76it/s]

Phase 2 - Ep 501: Reward -0.186 | W* 0.839 | Success: False


Phase 2: Assembly Task:  50%|█████     | 502/1000 [02:15<03:11,  2.59it/s]

Phase 2 - Ep 502: Reward -0.186 | W* 0.904 | Success: False


Phase 2: Assembly Task:  50%|█████     | 503/1000 [02:16<03:23,  2.44it/s]

Phase 2 - Ep 503: Reward -0.204 | W* 0.895 | Success: False


Phase 2: Assembly Task:  50%|█████     | 504/1000 [02:16<03:17,  2.51it/s]

Phase 2 - Ep 504: Reward -0.150 | W* 0.861 | Success: True


Phase 2: Assembly Task:  50%|█████     | 505/1000 [02:16<03:06,  2.65it/s]

Phase 2 - Ep 505: Reward -0.150 | W* 0.833 | Success: True


Phase 2: Assembly Task:  51%|█████     | 506/1000 [02:17<03:04,  2.67it/s]

Phase 2 - Ep 506: Reward -0.204 | W* 0.847 | Success: False


Phase 2: Assembly Task:  51%|█████     | 507/1000 [02:17<03:00,  2.73it/s]

Phase 2 - Ep 507: Reward -0.159 | W* 0.846 | Success: True


Phase 2: Assembly Task:  51%|█████     | 509/1000 [02:18<02:31,  3.23it/s]

Phase 2 - Ep 508: Reward -0.204 | W* 0.887 | Success: False
Phase 2 - Ep 509: Reward -0.096 | W* 0.849 | Success: True


Phase 2: Assembly Task:  51%|█████     | 510/1000 [02:18<02:22,  3.45it/s]

Phase 2 - Ep 510: Reward -0.114 | W* 0.924 | Success: True


Phase 2: Assembly Task:  51%|█████     | 511/1000 [02:18<02:20,  3.48it/s]

Phase 2 - Ep 511: Reward -0.114 | W* 0.833 | Success: True


Phase 2: Assembly Task:  51%|█████     | 512/1000 [02:19<02:53,  2.81it/s]

Phase 2 - Ep 512: Reward -0.186 | W* 0.834 | Success: False


Phase 2: Assembly Task:  51%|█████▏    | 513/1000 [02:19<03:16,  2.47it/s]

Phase 2 - Ep 513: Reward -0.186 | W* 0.892 | Success: False


Phase 2: Assembly Task:  51%|█████▏    | 514/1000 [02:19<03:01,  2.68it/s]

Phase 2 - Ep 514: Reward -0.127 | W* 0.852 | Success: True


Phase 2: Assembly Task:  52%|█████▏    | 515/1000 [02:20<03:14,  2.49it/s]

Phase 2 - Ep 515: Reward -0.204 | W* 0.925 | Success: False


Phase 2: Assembly Task:  52%|█████▏    | 516/1000 [02:21<03:41,  2.18it/s]

Phase 2 - Ep 516: Reward -0.186 | W* 0.832 | Success: False


Phase 2: Assembly Task:  52%|█████▏    | 517/1000 [02:21<03:46,  2.13it/s]

Phase 2 - Ep 517: Reward -0.186 | W* 0.849 | Success: False


Phase 2: Assembly Task:  52%|█████▏    | 518/1000 [02:21<03:23,  2.37it/s]

Phase 2 - Ep 518: Reward -0.153 | W* 0.877 | Success: True


Phase 2: Assembly Task:  52%|█████▏    | 519/1000 [02:22<03:17,  2.44it/s]

Phase 2 - Ep 519: Reward -0.186 | W* 0.828 | Success: False


Phase 2: Assembly Task:  52%|█████▏    | 520/1000 [02:22<03:12,  2.49it/s]

Phase 2 - Ep 520: Reward -0.186 | W* 0.861 | Success: False


Phase 2: Assembly Task:  52%|█████▏    | 521/1000 [02:22<03:06,  2.57it/s]

Phase 2 - Ep 521: Reward -0.204 | W* 0.887 | Success: False


Phase 2: Assembly Task:  52%|█████▏    | 522/1000 [02:23<03:04,  2.60it/s]

Phase 2 - Ep 522: Reward -0.204 | W* 0.900 | Success: False


Phase 2: Assembly Task:  52%|█████▏    | 523/1000 [02:23<02:44,  2.90it/s]

Phase 2 - Ep 523: Reward -0.133 | W* 0.838 | Success: True


Phase 2: Assembly Task:  52%|█████▏    | 524/1000 [02:23<02:39,  2.98it/s]

Phase 2 - Ep 524: Reward -0.151 | W* 0.851 | Success: True


Phase 2: Assembly Task:  52%|█████▎    | 525/1000 [02:24<02:47,  2.84it/s]

Phase 2 - Ep 525: Reward -0.186 | W* 0.879 | Success: False


Phase 2: Assembly Task:  53%|█████▎    | 526/1000 [02:24<02:36,  3.02it/s]

Phase 2 - Ep 526: Reward -0.145 | W* 0.882 | Success: True


Phase 2: Assembly Task:  53%|█████▎    | 527/1000 [02:24<02:36,  3.02it/s]

Phase 2 - Ep 527: Reward -0.153 | W* 0.856 | Success: True


Phase 2: Assembly Task:  53%|█████▎    | 528/1000 [02:25<02:43,  2.88it/s]

Phase 2 - Ep 528: Reward -0.186 | W* 0.841 | Success: False


Phase 2: Assembly Task:  53%|█████▎    | 529/1000 [02:25<02:47,  2.81it/s]

Phase 2 - Ep 529: Reward -0.204 | W* 0.867 | Success: False


Phase 2: Assembly Task:  53%|█████▎    | 530/1000 [02:25<02:33,  3.06it/s]

Phase 2 - Ep 530: Reward -0.135 | W* 0.837 | Success: True


Phase 2: Assembly Task:  53%|█████▎    | 532/1000 [02:26<02:18,  3.37it/s]

Phase 2 - Ep 531: Reward -0.210 | W* 0.940 | Success: False
Phase 2 - Ep 532: Reward -0.110 | W* 0.850 | Success: True


Phase 2: Assembly Task:  53%|█████▎    | 533/1000 [02:26<02:35,  3.00it/s]

Phase 2 - Ep 533: Reward -0.204 | W* 0.912 | Success: False


Phase 2: Assembly Task:  53%|█████▎    | 534/1000 [02:27<02:21,  3.30it/s]

Phase 2 - Ep 534: Reward -0.106 | W* 0.846 | Success: True


Phase 2: Assembly Task:  54%|█████▎    | 535/1000 [02:27<02:45,  2.81it/s]

Phase 2 - Ep 535: Reward -0.204 | W* 0.867 | Success: False


Phase 2: Assembly Task:  54%|█████▎    | 537/1000 [02:28<02:29,  3.10it/s]

Phase 2 - Ep 536: Reward -0.186 | W* 0.817 | Success: False
Phase 2 - Ep 537: Reward -0.114 | W* 0.887 | Success: True


Phase 2: Assembly Task:  54%|█████▍    | 538/1000 [02:28<02:39,  2.90it/s]

Phase 2 - Ep 538: Reward -0.204 | W* 0.817 | Success: False


Phase 2: Assembly Task:  54%|█████▍    | 539/1000 [02:29<02:44,  2.80it/s]

Phase 2 - Ep 539: Reward -0.204 | W* 0.840 | Success: False


Phase 2: Assembly Task:  54%|█████▍    | 540/1000 [02:29<02:48,  2.73it/s]

Phase 2 - Ep 540: Reward -0.204 | W* 0.825 | Success: False


Phase 2: Assembly Task:  54%|█████▍    | 541/1000 [02:29<02:37,  2.91it/s]

Phase 2 - Ep 541: Reward -0.147 | W* 0.883 | Success: True


Phase 2: Assembly Task:  54%|█████▍    | 543/1000 [02:30<02:14,  3.39it/s]

Phase 2 - Ep 542: Reward -0.186 | W* 0.862 | Success: False
Phase 2 - Ep 543: Reward -0.084 | W* 0.859 | Success: True


Phase 2: Assembly Task:  54%|█████▍    | 544/1000 [02:30<02:26,  3.10it/s]

Phase 2 - Ep 544: Reward -0.204 | W* 0.835 | Success: False


Phase 2: Assembly Task:  55%|█████▍    | 545/1000 [02:31<02:35,  2.93it/s]

Phase 2 - Ep 545: Reward -0.157 | W* 0.842 | Success: True


Phase 2: Assembly Task:  55%|█████▍    | 546/1000 [02:31<03:00,  2.52it/s]

Phase 2 - Ep 546: Reward -0.186 | W* 0.849 | Success: False


Phase 2: Assembly Task:  55%|█████▍    | 547/1000 [02:32<03:20,  2.26it/s]

Phase 2 - Ep 547: Reward -0.204 | W* 0.925 | Success: False


Phase 2: Assembly Task:  55%|█████▍    | 548/1000 [02:32<03:29,  2.16it/s]

Phase 2 - Ep 548: Reward -0.186 | W* 0.857 | Success: False


Phase 2: Assembly Task:  55%|█████▍    | 549/1000 [02:33<03:28,  2.16it/s]

Phase 2 - Ep 549: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  55%|█████▌    | 550/1000 [02:33<03:30,  2.14it/s]

Phase 2 - Ep 550: Reward -0.186 | W* 0.822 | Success: False


Phase 2: Assembly Task:  55%|█████▌    | 551/1000 [02:34<03:36,  2.07it/s]

Phase 2 - Ep 551: Reward -0.160 | W* 0.892 | Success: True


Phase 2: Assembly Task:  55%|█████▌    | 552/1000 [02:34<03:46,  1.98it/s]

Phase 2 - Ep 552: Reward -0.204 | W* 0.912 | Success: False


Phase 2: Assembly Task:  55%|█████▌    | 553/1000 [02:35<03:33,  2.09it/s]

Phase 2 - Ep 553: Reward -0.186 | W* 0.877 | Success: False


Phase 2: Assembly Task:  55%|█████▌    | 554/1000 [02:35<03:09,  2.35it/s]

Phase 2 - Ep 554: Reward -0.147 | W* 0.817 | Success: True


Phase 2: Assembly Task:  56%|█████▌    | 555/1000 [02:35<03:00,  2.47it/s]

Phase 2 - Ep 555: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  56%|█████▌    | 556/1000 [02:35<02:44,  2.70it/s]

Phase 2 - Ep 556: Reward -0.137 | W* 0.833 | Success: True


Phase 2: Assembly Task:  56%|█████▌    | 557/1000 [02:36<02:43,  2.70it/s]

Phase 2 - Ep 557: Reward -0.186 | W* 0.834 | Success: False


Phase 2: Assembly Task:  56%|█████▌    | 559/1000 [02:36<02:19,  3.17it/s]

Phase 2 - Ep 558: Reward -0.210 | W* 0.940 | Success: False
Phase 2 - Ep 559: Reward -0.101 | W* 0.842 | Success: True


Phase 2: Assembly Task:  56%|█████▌    | 560/1000 [02:37<02:07,  3.44it/s]

Phase 2 - Ep 560: Reward -0.124 | W* 0.840 | Success: True


Phase 2: Assembly Task:  56%|█████▌    | 561/1000 [02:37<02:18,  3.18it/s]

Phase 2 - Ep 561: Reward -0.186 | W* 0.847 | Success: False


Phase 2: Assembly Task:  56%|█████▌    | 562/1000 [02:37<02:28,  2.95it/s]

Phase 2 - Ep 562: Reward -0.204 | W* 0.832 | Success: False


Phase 2: Assembly Task:  56%|█████▋    | 563/1000 [02:38<02:31,  2.89it/s]

Phase 2 - Ep 563: Reward -0.196 | W* 0.907 | Success: False


Phase 2: Assembly Task:  56%|█████▋    | 564/1000 [02:38<02:33,  2.84it/s]

Phase 2 - Ep 564: Reward -0.204 | W* 0.932 | Success: False


Phase 2: Assembly Task:  56%|█████▋    | 565/1000 [02:39<02:41,  2.69it/s]

Phase 2 - Ep 565: Reward -0.204 | W* 0.840 | Success: False


Phase 2: Assembly Task:  57%|█████▋    | 566/1000 [02:39<02:54,  2.48it/s]

Phase 2 - Ep 566: Reward -0.204 | W* 0.850 | Success: False


Phase 2: Assembly Task:  57%|█████▋    | 567/1000 [02:40<03:02,  2.37it/s]

Phase 2 - Ep 567: Reward -0.159 | W* 0.854 | Success: True


Phase 2: Assembly Task:  57%|█████▋    | 568/1000 [02:40<03:05,  2.32it/s]

Phase 2 - Ep 568: Reward -0.186 | W* 0.887 | Success: False


Phase 2: Assembly Task:  57%|█████▋    | 569/1000 [02:40<03:10,  2.26it/s]

Phase 2 - Ep 569: Reward -0.186 | W* 0.832 | Success: False


Phase 2: Assembly Task:  57%|█████▋    | 570/1000 [02:41<03:06,  2.31it/s]

Phase 2 - Ep 570: Reward -0.186 | W* 0.819 | Success: False


Phase 2: Assembly Task:  57%|█████▋    | 571/1000 [02:41<02:58,  2.41it/s]

Phase 2 - Ep 571: Reward -0.186 | W* 0.919 | Success: False


Phase 2: Assembly Task:  57%|█████▋    | 572/1000 [02:42<02:56,  2.42it/s]

Phase 2 - Ep 572: Reward -0.186 | W* 0.828 | Success: False


Phase 2: Assembly Task:  57%|█████▋    | 573/1000 [02:42<03:06,  2.29it/s]

Phase 2 - Ep 573: Reward -0.204 | W* 0.825 | Success: False


Phase 2: Assembly Task:  57%|█████▋    | 574/1000 [02:42<02:42,  2.62it/s]

Phase 2 - Ep 574: Reward -0.110 | W* 0.850 | Success: True


Phase 2: Assembly Task:  57%|█████▊    | 575/1000 [02:43<02:47,  2.54it/s]

Phase 2 - Ep 575: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  58%|█████▊    | 576/1000 [02:43<02:44,  2.58it/s]

Phase 2 - Ep 576: Reward -0.204 | W* 0.912 | Success: False


Phase 2: Assembly Task:  58%|█████▊    | 577/1000 [02:44<02:39,  2.65it/s]

Phase 2 - Ep 577: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  58%|█████▊    | 578/1000 [02:44<02:40,  2.63it/s]

Phase 2 - Ep 578: Reward -0.186 | W* 0.837 | Success: False


Phase 2: Assembly Task:  58%|█████▊    | 579/1000 [02:44<02:40,  2.62it/s]

Phase 2 - Ep 579: Reward -0.204 | W* 0.835 | Success: False


Phase 2: Assembly Task:  58%|█████▊    | 580/1000 [02:45<02:59,  2.34it/s]

Phase 2 - Ep 580: Reward -0.186 | W* 0.869 | Success: False


Phase 2: Assembly Task:  58%|█████▊    | 581/1000 [02:45<03:09,  2.22it/s]

Phase 2 - Ep 581: Reward -0.186 | W* 0.882 | Success: False


Phase 2: Assembly Task:  58%|█████▊    | 582/1000 [02:46<03:14,  2.15it/s]

Phase 2 - Ep 582: Reward -0.186 | W* 0.829 | Success: False


Phase 2: Assembly Task:  58%|█████▊    | 583/1000 [02:46<03:16,  2.12it/s]

Phase 2 - Ep 583: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  58%|█████▊    | 584/1000 [02:47<03:30,  1.98it/s]

Phase 2 - Ep 584: Reward -0.204 | W* 0.825 | Success: False


Phase 2: Assembly Task:  58%|█████▊    | 585/1000 [02:47<03:30,  1.97it/s]

Phase 2 - Ep 585: Reward -0.204 | W* 0.935 | Success: False


Phase 2: Assembly Task:  59%|█████▊    | 586/1000 [02:48<03:14,  2.13it/s]

Phase 2 - Ep 586: Reward -0.186 | W* 0.849 | Success: False


Phase 2: Assembly Task:  59%|█████▊    | 587/1000 [02:48<02:55,  2.35it/s]

Phase 2 - Ep 587: Reward -0.127 | W* 0.861 | Success: True


Phase 2: Assembly Task:  59%|█████▉    | 588/1000 [02:49<03:01,  2.27it/s]

Phase 2 - Ep 588: Reward -0.204 | W* 0.895 | Success: False


Phase 2: Assembly Task:  59%|█████▉    | 589/1000 [02:49<03:05,  2.22it/s]

Phase 2 - Ep 589: Reward -0.186 | W* 0.877 | Success: False


Phase 2: Assembly Task:  59%|█████▉    | 590/1000 [02:50<03:05,  2.21it/s]

Phase 2 - Ep 590: Reward -0.204 | W* 0.895 | Success: False


Phase 2: Assembly Task:  59%|█████▉    | 591/1000 [02:50<02:42,  2.51it/s]

Phase 2 - Ep 591: Reward -0.124 | W* 0.827 | Success: True


Phase 2: Assembly Task:  59%|█████▉    | 592/1000 [02:50<02:28,  2.74it/s]

Phase 2 - Ep 592: Reward -0.141 | W* 0.896 | Success: True


Phase 2: Assembly Task:  59%|█████▉    | 593/1000 [02:50<02:30,  2.71it/s]

Phase 2 - Ep 593: Reward -0.204 | W* 0.927 | Success: False


Phase 2: Assembly Task:  59%|█████▉    | 594/1000 [02:51<02:27,  2.75it/s]

Phase 2 - Ep 594: Reward -0.204 | W* 0.935 | Success: False


Phase 2: Assembly Task:  60%|█████▉    | 595/1000 [02:51<02:19,  2.89it/s]

Phase 2 - Ep 595: Reward -0.148 | W* 0.817 | Success: True


Phase 2: Assembly Task:  60%|█████▉    | 596/1000 [02:51<02:23,  2.81it/s]

Phase 2 - Ep 596: Reward -0.204 | W* 0.927 | Success: False


Phase 2: Assembly Task:  60%|█████▉    | 597/1000 [02:52<02:26,  2.74it/s]

Phase 2 - Ep 597: Reward -0.204 | W* 0.855 | Success: False


Phase 2: Assembly Task:  60%|█████▉    | 598/1000 [02:52<02:33,  2.62it/s]

Phase 2 - Ep 598: Reward -0.154 | W* 0.890 | Success: True


Phase 2: Assembly Task:  60%|█████▉    | 599/1000 [02:53<02:32,  2.63it/s]

Phase 2 - Ep 599: Reward -0.145 | W* 0.817 | Success: True


Phase 2: Assembly Task:  60%|██████    | 600/1000 [02:53<02:26,  2.74it/s]

Phase 2 - Ep 600: Reward -0.147 | W* 0.820 | Success: True


Phase 2: Assembly Task:  60%|██████    | 601/1000 [02:53<02:30,  2.66it/s]

Phase 2 - Ep 601: Reward -0.204 | W* 0.872 | Success: False


Phase 2: Assembly Task:  60%|██████    | 602/1000 [02:54<02:29,  2.66it/s]

Phase 2 - Ep 602: Reward -0.186 | W* 0.816 | Success: False


Phase 2: Assembly Task:  60%|██████    | 603/1000 [02:54<02:30,  2.65it/s]

Phase 2 - Ep 603: Reward -0.204 | W* 0.865 | Success: False


Phase 2: Assembly Task:  60%|██████    | 604/1000 [02:54<02:11,  3.02it/s]

Phase 2 - Ep 604: Reward -0.114 | W* 0.852 | Success: True


Phase 2: Assembly Task:  61%|██████    | 606/1000 [02:55<01:55,  3.40it/s]

Phase 2 - Ep 605: Reward -0.204 | W* 0.845 | Success: False
Phase 2 - Ep 606: Reward -0.106 | W* 0.906 | Success: True


Phase 2: Assembly Task:  61%|██████    | 607/1000 [02:55<02:04,  3.15it/s]

Phase 2 - Ep 607: Reward -0.162 | W* 0.904 | Success: True


Phase 2: Assembly Task:  61%|██████    | 608/1000 [02:56<02:14,  2.92it/s]

Phase 2 - Ep 608: Reward -0.186 | W* 0.827 | Success: False


Phase 2: Assembly Task:  61%|██████    | 609/1000 [02:56<02:17,  2.84it/s]

Phase 2 - Ep 609: Reward -0.204 | W* 0.907 | Success: False


Phase 2: Assembly Task:  61%|██████    | 610/1000 [02:56<02:22,  2.74it/s]

Phase 2 - Ep 610: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  61%|██████    | 611/1000 [02:57<02:21,  2.75it/s]

Phase 2 - Ep 611: Reward -0.204 | W* 0.885 | Success: False


Phase 2: Assembly Task:  61%|██████    | 612/1000 [02:57<02:22,  2.73it/s]

Phase 2 - Ep 612: Reward -0.186 | W* 0.864 | Success: False


Phase 2: Assembly Task:  61%|██████▏   | 613/1000 [02:58<02:39,  2.42it/s]

Phase 2 - Ep 613: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  61%|██████▏   | 614/1000 [02:58<03:03,  2.10it/s]

Phase 2 - Ep 614: Reward -0.204 | W* 0.852 | Success: False


Phase 2: Assembly Task:  62%|██████▏   | 615/1000 [02:59<03:24,  1.88it/s]

Phase 2 - Ep 615: Reward -0.186 | W* 0.872 | Success: False


Phase 2: Assembly Task:  62%|██████▏   | 616/1000 [03:00<03:19,  1.92it/s]

Phase 2 - Ep 616: Reward -0.204 | W* 0.937 | Success: False


Phase 2: Assembly Task:  62%|██████▏   | 617/1000 [03:00<03:21,  1.90it/s]

Phase 2 - Ep 617: Reward -0.186 | W* 0.872 | Success: False


Phase 2: Assembly Task:  62%|██████▏   | 618/1000 [03:01<03:19,  1.92it/s]

Phase 2 - Ep 618: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  62%|██████▏   | 619/1000 [03:01<03:03,  2.08it/s]

Phase 2 - Ep 619: Reward -0.186 | W* 0.852 | Success: False


Phase 2: Assembly Task:  62%|██████▏   | 620/1000 [03:01<02:59,  2.12it/s]

Phase 2 - Ep 620: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  62%|██████▏   | 621/1000 [03:02<02:58,  2.13it/s]

Phase 2 - Ep 621: Reward -0.159 | W* 0.838 | Success: True


Phase 2: Assembly Task:  62%|██████▏   | 622/1000 [03:02<02:58,  2.11it/s]

Phase 2 - Ep 622: Reward -0.186 | W* 0.827 | Success: False


Phase 2: Assembly Task:  62%|██████▏   | 623/1000 [03:03<02:51,  2.20it/s]

Phase 2 - Ep 623: Reward -0.155 | W* 0.817 | Success: True


Phase 2: Assembly Task:  62%|██████▏   | 624/1000 [03:03<02:55,  2.14it/s]

Phase 2 - Ep 624: Reward -0.204 | W* 0.897 | Success: False


Phase 2: Assembly Task:  62%|██████▎   | 625/1000 [03:04<02:50,  2.20it/s]

Phase 2 - Ep 625: Reward -0.159 | W* 0.875 | Success: True


Phase 2: Assembly Task:  63%|██████▎   | 626/1000 [03:04<02:55,  2.14it/s]

Phase 2 - Ep 626: Reward -0.204 | W* 0.932 | Success: False


Phase 2: Assembly Task:  63%|██████▎   | 627/1000 [03:05<02:53,  2.14it/s]

Phase 2 - Ep 627: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  63%|██████▎   | 628/1000 [03:05<02:56,  2.11it/s]

Phase 2 - Ep 628: Reward -0.204 | W* 0.837 | Success: False


Phase 2: Assembly Task:  63%|██████▎   | 629/1000 [03:06<02:44,  2.26it/s]

Phase 2 - Ep 629: Reward -0.204 | W* 0.915 | Success: False


Phase 2: Assembly Task:  63%|██████▎   | 630/1000 [03:06<02:35,  2.38it/s]

Phase 2 - Ep 630: Reward -0.186 | W* 0.819 | Success: False


Phase 2: Assembly Task:  63%|██████▎   | 631/1000 [03:06<02:29,  2.46it/s]

Phase 2 - Ep 631: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  63%|██████▎   | 632/1000 [03:07<02:24,  2.54it/s]

Phase 2 - Ep 632: Reward -0.204 | W* 0.935 | Success: False


Phase 2: Assembly Task:  63%|██████▎   | 633/1000 [03:07<02:21,  2.60it/s]

Phase 2 - Ep 633: Reward -0.204 | W* 0.897 | Success: False


Phase 2: Assembly Task:  63%|██████▎   | 634/1000 [03:07<02:19,  2.61it/s]

Phase 2 - Ep 634: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  64%|██████▎   | 635/1000 [03:08<02:18,  2.64it/s]

Phase 2 - Ep 635: Reward -0.204 | W* 0.865 | Success: False


Phase 2: Assembly Task:  64%|██████▎   | 636/1000 [03:08<02:19,  2.60it/s]

Phase 2 - Ep 636: Reward -0.204 | W* 0.817 | Success: False


Phase 2: Assembly Task:  64%|██████▎   | 637/1000 [03:09<02:19,  2.60it/s]

Phase 2 - Ep 637: Reward -0.204 | W* 0.865 | Success: False


Phase 2: Assembly Task:  64%|██████▍   | 638/1000 [03:09<02:03,  2.93it/s]

Phase 2 - Ep 638: Reward -0.130 | W* 0.885 | Success: True


Phase 2: Assembly Task:  64%|██████▍   | 639/1000 [03:09<02:06,  2.85it/s]

Phase 2 - Ep 639: Reward -0.204 | W* 0.860 | Success: False


Phase 2: Assembly Task:  64%|██████▍   | 640/1000 [03:10<02:21,  2.55it/s]

Phase 2 - Ep 640: Reward -0.186 | W* 0.899 | Success: False


Phase 2: Assembly Task:  64%|██████▍   | 641/1000 [03:10<02:25,  2.47it/s]

Phase 2 - Ep 641: Reward -0.160 | W* 0.890 | Success: True


Phase 2: Assembly Task:  64%|██████▍   | 642/1000 [03:10<02:15,  2.64it/s]

Phase 2 - Ep 642: Reward -0.124 | W* 0.881 | Success: True


Phase 2: Assembly Task:  64%|██████▍   | 643/1000 [03:11<02:41,  2.21it/s]

Phase 2 - Ep 643: Reward -0.204 | W* 0.847 | Success: False


Phase 2: Assembly Task:  64%|██████▍   | 644/1000 [03:12<02:54,  2.04it/s]

Phase 2 - Ep 644: Reward -0.186 | W* 0.894 | Success: False


Phase 2: Assembly Task:  64%|██████▍   | 645/1000 [03:12<02:54,  2.04it/s]

Phase 2 - Ep 645: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  65%|██████▍   | 646/1000 [03:13<02:54,  2.03it/s]

Phase 2 - Ep 646: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  65%|██████▍   | 647/1000 [03:13<02:54,  2.02it/s]

Phase 2 - Ep 647: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  65%|██████▍   | 648/1000 [03:14<03:07,  1.88it/s]

Phase 2 - Ep 648: Reward -0.160 | W* 0.840 | Success: True


Phase 2: Assembly Task:  65%|██████▍   | 649/1000 [03:14<03:00,  1.94it/s]

Phase 2 - Ep 649: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  65%|██████▌   | 650/1000 [03:15<02:52,  2.02it/s]

Phase 2 - Ep 650: Reward -0.204 | W* 0.827 | Success: False


Phase 2: Assembly Task:  65%|██████▌   | 651/1000 [03:15<02:39,  2.19it/s]

Phase 2 - Ep 651: Reward -0.196 | W* 0.887 | Success: False


Phase 2: Assembly Task:  65%|██████▌   | 652/1000 [03:15<02:29,  2.32it/s]

Phase 2 - Ep 652: Reward -0.204 | W* 0.885 | Success: False


Phase 2: Assembly Task:  65%|██████▌   | 653/1000 [03:16<02:24,  2.40it/s]

Phase 2 - Ep 653: Reward -0.204 | W* 0.885 | Success: False


Phase 2: Assembly Task:  65%|██████▌   | 654/1000 [03:16<02:18,  2.50it/s]

Phase 2 - Ep 654: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  66%|██████▌   | 655/1000 [03:16<02:16,  2.53it/s]

Phase 2 - Ep 655: Reward -0.162 | W* 0.877 | Success: True


Phase 2: Assembly Task:  66%|██████▌   | 656/1000 [03:17<02:24,  2.39it/s]

Phase 2 - Ep 656: Reward -0.204 | W* 0.892 | Success: False


Phase 2: Assembly Task:  66%|██████▌   | 657/1000 [03:17<02:25,  2.35it/s]

Phase 2 - Ep 657: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  66%|██████▌   | 658/1000 [03:18<02:24,  2.36it/s]

Phase 2 - Ep 658: Reward -0.151 | W* 0.835 | Success: True


Phase 2: Assembly Task:  66%|██████▌   | 659/1000 [03:18<02:29,  2.28it/s]

Phase 2 - Ep 659: Reward -0.204 | W* 0.852 | Success: False


Phase 2: Assembly Task:  66%|██████▌   | 660/1000 [03:19<02:33,  2.22it/s]

Phase 2 - Ep 660: Reward -0.186 | W* 0.917 | Success: False


Phase 2: Assembly Task:  66%|██████▌   | 661/1000 [03:19<02:37,  2.15it/s]

Phase 2 - Ep 661: Reward -0.196 | W* 0.872 | Success: False


Phase 2: Assembly Task:  66%|██████▌   | 662/1000 [03:20<02:37,  2.14it/s]

Phase 2 - Ep 662: Reward -0.186 | W* 0.874 | Success: False


Phase 2: Assembly Task:  66%|██████▋   | 663/1000 [03:20<02:39,  2.11it/s]

Phase 2 - Ep 663: Reward -0.186 | W* 0.877 | Success: False


Phase 2: Assembly Task:  66%|██████▋   | 664/1000 [03:21<02:33,  2.19it/s]

Phase 2 - Ep 664: Reward -0.160 | W* 0.822 | Success: True


Phase 2: Assembly Task:  66%|██████▋   | 665/1000 [03:21<02:25,  2.31it/s]

Phase 2 - Ep 665: Reward -0.204 | W* 0.850 | Success: False


Phase 2: Assembly Task:  67%|██████▋   | 666/1000 [03:21<02:19,  2.39it/s]

Phase 2 - Ep 666: Reward -0.186 | W* 0.867 | Success: False


Phase 2: Assembly Task:  67%|██████▋   | 667/1000 [03:22<02:20,  2.38it/s]

Phase 2 - Ep 667: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  67%|██████▋   | 668/1000 [03:22<02:28,  2.24it/s]

Phase 2 - Ep 668: Reward -0.204 | W* 0.840 | Success: False


Phase 2: Assembly Task:  67%|██████▋   | 669/1000 [03:23<02:23,  2.31it/s]

Phase 2 - Ep 669: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  67%|██████▋   | 670/1000 [03:23<02:19,  2.37it/s]

Phase 2 - Ep 670: Reward -0.186 | W* 0.834 | Success: False


Phase 2: Assembly Task:  67%|██████▋   | 671/1000 [03:23<02:13,  2.46it/s]

Phase 2 - Ep 671: Reward -0.204 | W* 0.902 | Success: False


Phase 2: Assembly Task:  67%|██████▋   | 672/1000 [03:24<02:14,  2.43it/s]

Phase 2 - Ep 672: Reward -0.186 | W* 0.817 | Success: False


Phase 2: Assembly Task:  67%|██████▋   | 673/1000 [03:24<02:27,  2.22it/s]

Phase 2 - Ep 673: Reward -0.186 | W* 0.857 | Success: False


Phase 2: Assembly Task:  67%|██████▋   | 674/1000 [03:25<02:30,  2.17it/s]

Phase 2 - Ep 674: Reward -0.204 | W* 0.937 | Success: False


Phase 2: Assembly Task:  68%|██████▊   | 675/1000 [03:25<02:36,  2.08it/s]

Phase 2 - Ep 675: Reward -0.204 | W* 0.852 | Success: False


Phase 2: Assembly Task:  68%|██████▊   | 676/1000 [03:26<02:12,  2.44it/s]

Phase 2 - Ep 676: Reward -0.114 | W* 0.854 | Success: True


Phase 2: Assembly Task:  68%|██████▊   | 677/1000 [03:26<02:22,  2.27it/s]

Phase 2 - Ep 677: Reward -0.204 | W* 0.832 | Success: False


Phase 2: Assembly Task:  68%|██████▊   | 678/1000 [03:27<02:32,  2.12it/s]

Phase 2 - Ep 678: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  68%|██████▊   | 679/1000 [03:27<02:29,  2.14it/s]

Phase 2 - Ep 679: Reward -0.153 | W* 0.826 | Success: True


Phase 2: Assembly Task:  68%|██████▊   | 680/1000 [03:28<02:25,  2.19it/s]

Phase 2 - Ep 680: Reward -0.186 | W* 0.827 | Success: False


Phase 2: Assembly Task:  68%|██████▊   | 681/1000 [03:28<02:15,  2.36it/s]

Phase 2 - Ep 681: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  68%|██████▊   | 682/1000 [03:28<02:11,  2.41it/s]

Phase 2 - Ep 682: Reward -0.204 | W* 0.915 | Success: False


Phase 2: Assembly Task:  68%|██████▊   | 683/1000 [03:29<02:06,  2.51it/s]

Phase 2 - Ep 683: Reward -0.204 | W* 0.885 | Success: False


Phase 2: Assembly Task:  68%|██████▊   | 684/1000 [03:29<02:04,  2.53it/s]

Phase 2 - Ep 684: Reward -0.186 | W* 0.869 | Success: False


Phase 2: Assembly Task:  68%|██████▊   | 685/1000 [03:30<02:04,  2.53it/s]

Phase 2 - Ep 685: Reward -0.204 | W* 0.920 | Success: False


Phase 2: Assembly Task:  69%|██████▊   | 686/1000 [03:30<02:01,  2.58it/s]

Phase 2 - Ep 686: Reward -0.186 | W* 0.859 | Success: False


Phase 2: Assembly Task:  69%|██████▊   | 687/1000 [03:30<01:58,  2.64it/s]

Phase 2 - Ep 687: Reward -0.158 | W* 0.858 | Success: True


Phase 2: Assembly Task:  69%|██████▉   | 688/1000 [03:31<01:58,  2.63it/s]

Phase 2 - Ep 688: Reward -0.204 | W* 0.902 | Success: False


Phase 2: Assembly Task:  69%|██████▉   | 689/1000 [03:31<01:57,  2.65it/s]

Phase 2 - Ep 689: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  69%|██████▉   | 690/1000 [03:31<01:58,  2.62it/s]

Phase 2 - Ep 690: Reward -0.204 | W* 0.845 | Success: False


Phase 2: Assembly Task:  69%|██████▉   | 691/1000 [03:32<01:44,  2.96it/s]

Phase 2 - Ep 691: Reward -0.114 | W* 0.874 | Success: True


Phase 2: Assembly Task:  69%|██████▉   | 692/1000 [03:32<01:40,  3.07it/s]

Phase 2 - Ep 692: Reward -0.133 | W* 0.834 | Success: True


Phase 2: Assembly Task:  69%|██████▉   | 693/1000 [03:32<01:53,  2.70it/s]

Phase 2 - Ep 693: Reward -0.204 | W* 0.932 | Success: False


Phase 2: Assembly Task:  69%|██████▉   | 694/1000 [03:33<01:52,  2.72it/s]

Phase 2 - Ep 694: Reward -0.143 | W* 0.852 | Success: True


Phase 2: Assembly Task:  70%|██████▉   | 695/1000 [03:33<01:53,  2.69it/s]

Phase 2 - Ep 695: Reward -0.186 | W* 0.829 | Success: False


Phase 2: Assembly Task:  70%|██████▉   | 696/1000 [03:33<01:45,  2.87it/s]

Phase 2 - Ep 696: Reward -0.147 | W* 0.886 | Success: True


Phase 2: Assembly Task:  70%|██████▉   | 697/1000 [03:34<01:49,  2.77it/s]

Phase 2 - Ep 697: Reward -0.204 | W* 0.890 | Success: False


Phase 2: Assembly Task:  70%|██████▉   | 698/1000 [03:34<01:49,  2.76it/s]

Phase 2 - Ep 698: Reward -0.143 | W* 0.880 | Success: True


Phase 2: Assembly Task:  70%|██████▉   | 699/1000 [03:35<02:00,  2.50it/s]

Phase 2 - Ep 699: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  70%|███████   | 700/1000 [03:35<01:59,  2.51it/s]

Phase 2 - Ep 700: Reward -0.186 | W* 0.859 | Success: False


Phase 2: Assembly Task:  70%|███████   | 701/1000 [03:35<01:57,  2.54it/s]

Phase 2 - Ep 701: Reward -0.204 | W* 0.822 | Success: False


Phase 2: Assembly Task:  70%|███████   | 702/1000 [03:36<01:57,  2.54it/s]

Phase 2 - Ep 702: Reward -0.204 | W* 0.882 | Success: False


Phase 2: Assembly Task:  70%|███████   | 703/1000 [03:36<02:02,  2.43it/s]

Phase 2 - Ep 703: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  70%|███████   | 704/1000 [03:37<01:58,  2.49it/s]

Phase 2 - Ep 704: Reward -0.148 | W* 0.843 | Success: True


Phase 2: Assembly Task:  70%|███████   | 705/1000 [03:37<02:01,  2.42it/s]

Phase 2 - Ep 705: Reward -0.204 | W* 0.892 | Success: False


Phase 2: Assembly Task:  71%|███████   | 706/1000 [03:38<02:10,  2.25it/s]

Phase 2 - Ep 706: Reward -0.204 | W* 0.852 | Success: False


Phase 2: Assembly Task:  71%|███████   | 707/1000 [03:38<02:18,  2.12it/s]

Phase 2 - Ep 707: Reward -0.204 | W* 0.825 | Success: False


Phase 2: Assembly Task:  71%|███████   | 708/1000 [03:39<02:17,  2.12it/s]

Phase 2 - Ep 708: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  71%|███████   | 709/1000 [03:39<02:04,  2.33it/s]

Phase 2 - Ep 709: Reward -0.135 | W* 0.837 | Success: True


Phase 2: Assembly Task:  71%|███████   | 710/1000 [03:40<02:12,  2.19it/s]

Phase 2 - Ep 710: Reward -0.204 | W* 0.852 | Success: False


Phase 2: Assembly Task:  71%|███████   | 711/1000 [03:40<02:15,  2.13it/s]

Phase 2 - Ep 711: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  71%|███████▏  | 713/1000 [03:41<01:51,  2.58it/s]

Phase 2 - Ep 712: Reward -0.158 | W* 0.822 | Success: True
Phase 2 - Ep 713: Reward -0.084 | W* 0.872 | Success: True


Phase 2: Assembly Task:  71%|███████▏  | 714/1000 [03:41<01:40,  2.83it/s]

Phase 2 - Ep 714: Reward -0.143 | W* 0.883 | Success: True


Phase 2: Assembly Task:  72%|███████▏  | 715/1000 [03:41<01:47,  2.66it/s]

Phase 2 - Ep 715: Reward -0.154 | W* 0.834 | Success: True


Phase 2: Assembly Task:  72%|███████▏  | 716/1000 [03:42<01:46,  2.67it/s]

Phase 2 - Ep 716: Reward -0.150 | W* 0.904 | Success: True


Phase 2: Assembly Task:  72%|███████▏  | 717/1000 [03:42<01:54,  2.47it/s]

Phase 2 - Ep 717: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  72%|███████▏  | 718/1000 [03:43<01:59,  2.36it/s]

Phase 2 - Ep 718: Reward -0.160 | W* 0.835 | Success: True


Phase 2: Assembly Task:  72%|███████▏  | 719/1000 [03:43<01:59,  2.35it/s]

Phase 2 - Ep 719: Reward -0.186 | W* 0.829 | Success: False


Phase 2: Assembly Task:  72%|███████▏  | 720/1000 [03:44<01:55,  2.42it/s]

Phase 2 - Ep 720: Reward -0.204 | W* 0.922 | Success: False


Phase 2: Assembly Task:  72%|███████▏  | 721/1000 [03:44<01:51,  2.49it/s]

Phase 2 - Ep 721: Reward -0.204 | W* 0.862 | Success: False


Phase 2: Assembly Task:  72%|███████▏  | 722/1000 [03:44<01:47,  2.59it/s]

Phase 2 - Ep 722: Reward -0.204 | W* 0.927 | Success: False


Phase 2: Assembly Task:  72%|███████▏  | 723/1000 [03:45<01:46,  2.61it/s]

Phase 2 - Ep 723: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  72%|███████▏  | 724/1000 [03:45<01:44,  2.65it/s]

Phase 2 - Ep 724: Reward -0.186 | W* 0.824 | Success: False


Phase 2: Assembly Task:  72%|███████▎  | 725/1000 [03:45<01:45,  2.62it/s]

Phase 2 - Ep 725: Reward -0.204 | W* 0.877 | Success: False


Phase 2: Assembly Task:  73%|███████▎  | 726/1000 [03:46<01:42,  2.67it/s]

Phase 2 - Ep 726: Reward -0.204 | W* 0.930 | Success: False


Phase 2: Assembly Task:  73%|███████▎  | 727/1000 [03:46<01:42,  2.66it/s]

Phase 2 - Ep 727: Reward -0.204 | W* 0.915 | Success: False


Phase 2: Assembly Task:  73%|███████▎  | 728/1000 [03:47<01:50,  2.45it/s]

Phase 2 - Ep 728: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  73%|███████▎  | 729/1000 [03:47<01:35,  2.82it/s]

Phase 2 - Ep 729: Reward -0.096 | W* 0.837 | Success: True


Phase 2: Assembly Task:  73%|███████▎  | 730/1000 [03:47<01:43,  2.60it/s]

Phase 2 - Ep 730: Reward -0.186 | W* 0.874 | Success: False


Phase 2: Assembly Task:  73%|███████▎  | 731/1000 [03:48<01:51,  2.41it/s]

Phase 2 - Ep 731: Reward -0.186 | W* 0.839 | Success: False


Phase 2: Assembly Task:  73%|███████▎  | 732/1000 [03:48<01:54,  2.34it/s]

Phase 2 - Ep 732: Reward -0.186 | W* 0.874 | Success: False


Phase 2: Assembly Task:  73%|███████▎  | 733/1000 [03:49<01:57,  2.27it/s]

Phase 2 - Ep 733: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  73%|███████▎  | 734/1000 [03:49<02:00,  2.21it/s]

Phase 2 - Ep 734: Reward -0.204 | W* 0.835 | Success: False


Phase 2: Assembly Task:  74%|███████▎  | 735/1000 [03:50<01:54,  2.32it/s]

Phase 2 - Ep 735: Reward -0.204 | W* 0.937 | Success: False


Phase 2: Assembly Task:  74%|███████▎  | 736/1000 [03:50<01:47,  2.45it/s]

Phase 2 - Ep 736: Reward -0.204 | W* 0.937 | Success: False


Phase 2: Assembly Task:  74%|███████▎  | 737/1000 [03:50<01:45,  2.50it/s]

Phase 2 - Ep 737: Reward -0.186 | W* 0.852 | Success: False


Phase 2: Assembly Task:  74%|███████▍  | 738/1000 [03:51<01:50,  2.37it/s]

Phase 2 - Ep 738: Reward -0.204 | W* 0.912 | Success: False


Phase 2: Assembly Task:  74%|███████▍  | 739/1000 [03:51<01:56,  2.24it/s]

Phase 2 - Ep 739: Reward -0.204 | W* 0.930 | Success: False


Phase 2: Assembly Task:  74%|███████▍  | 740/1000 [03:52<02:02,  2.12it/s]

Phase 2 - Ep 740: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  74%|███████▍  | 741/1000 [03:52<01:52,  2.30it/s]

Phase 2 - Ep 741: Reward -0.143 | W* 0.887 | Success: True


Phase 2: Assembly Task:  74%|███████▍  | 742/1000 [03:53<01:57,  2.20it/s]

Phase 2 - Ep 742: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  74%|███████▍  | 743/1000 [03:53<02:03,  2.07it/s]

Phase 2 - Ep 743: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  74%|███████▍  | 744/1000 [03:54<02:08,  1.99it/s]

Phase 2 - Ep 744: Reward -0.186 | W* 0.827 | Success: False


Phase 2: Assembly Task:  74%|███████▍  | 745/1000 [03:54<01:59,  2.13it/s]

Phase 2 - Ep 745: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  75%|███████▍  | 747/1000 [03:55<01:29,  2.83it/s]

Phase 2 - Ep 746: Reward -0.186 | W* 0.914 | Success: False
Phase 2 - Ep 747: Reward -0.069 | W* 0.850 | Success: True


Phase 2: Assembly Task:  75%|███████▍  | 748/1000 [03:55<01:32,  2.72it/s]

Phase 2 - Ep 748: Reward -0.204 | W* 0.862 | Success: False
Phase 2 - Ep 749: Reward 0.090 | W* 0.826 | Success: True


Phase 2: Assembly Task:  75%|███████▌  | 750/1000 [03:56<01:18,  3.19it/s]

Phase 2 - Ep 750: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  75%|███████▌  | 751/1000 [03:56<01:29,  2.77it/s]

Phase 2 - Ep 751: Reward -0.204 | W* 0.820 | Success: False


Phase 2: Assembly Task:  75%|███████▌  | 752/1000 [03:56<01:33,  2.66it/s]

Phase 2 - Ep 752: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  75%|███████▌  | 753/1000 [03:57<01:32,  2.67it/s]

Phase 2 - Ep 753: Reward -0.204 | W* 0.872 | Success: False


Phase 2: Assembly Task:  75%|███████▌  | 754/1000 [03:57<01:32,  2.67it/s]

Phase 2 - Ep 754: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  76%|███████▌  | 755/1000 [03:58<01:32,  2.66it/s]

Phase 2 - Ep 755: Reward -0.159 | W* 0.883 | Success: True


Phase 2: Assembly Task:  76%|███████▌  | 756/1000 [03:58<01:24,  2.90it/s]

Phase 2 - Ep 756: Reward -0.141 | W* 0.889 | Success: True


Phase 2: Assembly Task:  76%|███████▌  | 757/1000 [03:58<01:25,  2.84it/s]

Phase 2 - Ep 757: Reward -0.186 | W* 0.914 | Success: False


Phase 2: Assembly Task:  76%|███████▌  | 758/1000 [03:59<01:31,  2.63it/s]

Phase 2 - Ep 758: Reward -0.204 | W* 0.927 | Success: False


Phase 2: Assembly Task:  76%|███████▌  | 759/1000 [03:59<01:40,  2.41it/s]

Phase 2 - Ep 759: Reward -0.186 | W* 0.889 | Success: False


Phase 2: Assembly Task:  76%|███████▌  | 760/1000 [04:00<01:43,  2.31it/s]

Phase 2 - Ep 760: Reward -0.186 | W* 0.854 | Success: False


Phase 2: Assembly Task:  76%|███████▌  | 761/1000 [04:00<01:46,  2.24it/s]

Phase 2 - Ep 761: Reward -0.204 | W* 0.870 | Success: False


Phase 2: Assembly Task:  76%|███████▌  | 762/1000 [04:01<01:43,  2.29it/s]

Phase 2 - Ep 762: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  76%|███████▋  | 763/1000 [04:01<01:38,  2.42it/s]

Phase 2 - Ep 763: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  76%|███████▋  | 764/1000 [04:01<01:35,  2.46it/s]

Phase 2 - Ep 764: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  76%|███████▋  | 765/1000 [04:02<01:37,  2.41it/s]

Phase 2 - Ep 765: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  77%|███████▋  | 766/1000 [04:02<01:41,  2.31it/s]

Phase 2 - Ep 766: Reward -0.204 | W* 0.937 | Success: False


Phase 2: Assembly Task:  77%|███████▋  | 767/1000 [04:03<01:42,  2.28it/s]

Phase 2 - Ep 767: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  77%|███████▋  | 768/1000 [04:03<01:45,  2.20it/s]

Phase 2 - Ep 768: Reward -0.204 | W* 0.822 | Success: False


Phase 2: Assembly Task:  77%|███████▋  | 769/1000 [04:04<01:45,  2.18it/s]

Phase 2 - Ep 769: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  77%|███████▋  | 770/1000 [04:04<01:58,  1.94it/s]

Phase 2 - Ep 770: Reward -0.204 | W* 0.907 | Success: False


Phase 2: Assembly Task:  77%|███████▋  | 771/1000 [04:05<02:01,  1.88it/s]

Phase 2 - Ep 771: Reward -0.204 | W* 0.857 | Success: False


Phase 2: Assembly Task:  77%|███████▋  | 772/1000 [04:05<01:59,  1.92it/s]

Phase 2 - Ep 772: Reward -0.186 | W* 0.892 | Success: False
Phase 2 - Ep 773: Reward 0.190 | W* 0.851 | Success: True


Phase 2: Assembly Task:  77%|███████▋  | 774/1000 [04:06<01:34,  2.40it/s]

Phase 2 - Ep 774: Reward -0.186 | W* 0.842 | Success: False


Phase 2: Assembly Task:  78%|███████▊  | 775/1000 [04:06<01:40,  2.23it/s]

Phase 2 - Ep 775: Reward -0.204 | W* 0.862 | Success: False


Phase 2: Assembly Task:  78%|███████▊  | 776/1000 [04:07<01:46,  2.10it/s]

Phase 2 - Ep 776: Reward -0.204 | W* 0.837 | Success: False


Phase 2: Assembly Task:  78%|███████▊  | 777/1000 [04:07<01:39,  2.24it/s]

Phase 2 - Ep 777: Reward -0.204 | W* 0.895 | Success: False


Phase 2: Assembly Task:  78%|███████▊  | 778/1000 [04:08<01:35,  2.34it/s]

Phase 2 - Ep 778: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  78%|███████▊  | 779/1000 [04:08<01:33,  2.37it/s]

Phase 2 - Ep 779: Reward -0.204 | W* 0.850 | Success: False


Phase 2: Assembly Task:  78%|███████▊  | 780/1000 [04:09<01:30,  2.42it/s]

Phase 2 - Ep 780: Reward -0.204 | W* 0.860 | Success: False


Phase 2: Assembly Task:  78%|███████▊  | 781/1000 [04:09<01:35,  2.29it/s]

Phase 2 - Ep 781: Reward -0.204 | W* 0.882 | Success: False


Phase 2: Assembly Task:  78%|███████▊  | 782/1000 [04:09<01:32,  2.34it/s]

Phase 2 - Ep 782: Reward -0.151 | W* 0.848 | Success: True


Phase 2: Assembly Task:  78%|███████▊  | 783/1000 [04:10<01:27,  2.49it/s]

Phase 2 - Ep 783: Reward -0.143 | W* 0.821 | Success: True


Phase 2: Assembly Task:  78%|███████▊  | 784/1000 [04:10<01:24,  2.55it/s]

Phase 2 - Ep 784: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  78%|███████▊  | 785/1000 [04:10<01:17,  2.77it/s]

Phase 2 - Ep 785: Reward -0.145 | W* 0.902 | Success: True


Phase 2: Assembly Task:  79%|███████▊  | 786/1000 [04:11<01:18,  2.72it/s]

Phase 2 - Ep 786: Reward -0.204 | W* 0.880 | Success: False


Phase 2: Assembly Task:  79%|███████▊  | 787/1000 [04:11<01:19,  2.69it/s]

Phase 2 - Ep 787: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  79%|███████▉  | 788/1000 [04:12<01:19,  2.68it/s]

Phase 2 - Ep 788: Reward -0.204 | W* 0.885 | Success: False


Phase 2: Assembly Task:  79%|███████▉  | 789/1000 [04:12<01:25,  2.47it/s]

Phase 2 - Ep 789: Reward -0.161 | W* 0.881 | Success: True


Phase 2: Assembly Task:  79%|███████▉  | 790/1000 [04:13<01:28,  2.36it/s]

Phase 2 - Ep 790: Reward -0.204 | W* 0.922 | Success: False


Phase 2: Assembly Task:  79%|███████▉  | 791/1000 [04:13<01:27,  2.38it/s]

Phase 2 - Ep 791: Reward -0.204 | W* 0.822 | Success: False


Phase 2: Assembly Task:  79%|███████▉  | 792/1000 [04:13<01:19,  2.63it/s]

Phase 2 - Ep 792: Reward -0.139 | W* 0.817 | Success: True


Phase 2: Assembly Task:  79%|███████▉  | 793/1000 [04:14<01:18,  2.64it/s]

Phase 2 - Ep 793: Reward -0.204 | W* 0.920 | Success: False


Phase 2: Assembly Task:  79%|███████▉  | 794/1000 [04:14<01:23,  2.46it/s]

Phase 2 - Ep 794: Reward -0.186 | W* 0.882 | Success: False


Phase 2: Assembly Task:  80%|███████▉  | 795/1000 [04:15<01:27,  2.35it/s]

Phase 2 - Ep 795: Reward -0.186 | W* 0.864 | Success: False


Phase 2: Assembly Task:  80%|███████▉  | 796/1000 [04:15<01:19,  2.57it/s]

Phase 2 - Ep 796: Reward -0.135 | W* 0.865 | Success: True


Phase 2: Assembly Task:  80%|███████▉  | 797/1000 [04:15<01:24,  2.41it/s]

Phase 2 - Ep 797: Reward -0.186 | W* 0.847 | Success: False


Phase 2: Assembly Task:  80%|███████▉  | 798/1000 [04:16<01:25,  2.36it/s]

Phase 2 - Ep 798: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  80%|███████▉  | 799/1000 [04:16<01:23,  2.41it/s]

Phase 2 - Ep 799: Reward -0.204 | W* 0.880 | Success: False


Phase 2: Assembly Task:  80%|████████  | 800/1000 [04:17<01:20,  2.48it/s]

Phase 2 - Ep 800: Reward -0.204 | W* 0.855 | Success: False


Phase 2: Assembly Task:  80%|████████  | 801/1000 [04:17<01:22,  2.40it/s]

Phase 2 - Ep 801: Reward -0.204 | W* 0.882 | Success: False


Phase 2: Assembly Task:  80%|████████  | 802/1000 [04:18<01:38,  2.01it/s]

Phase 2 - Ep 802: Reward -0.186 | W* 0.818 | Success: False


Phase 2: Assembly Task:  80%|████████  | 803/1000 [04:18<01:48,  1.82it/s]

Phase 2 - Ep 803: Reward -0.204 | W* 0.872 | Success: False


Phase 2: Assembly Task:  80%|████████  | 804/1000 [04:19<01:51,  1.76it/s]

Phase 2 - Ep 804: Reward -0.204 | W* 0.817 | Success: False


Phase 2: Assembly Task:  80%|████████  | 805/1000 [04:20<01:51,  1.75it/s]

Phase 2 - Ep 805: Reward -0.204 | W* 0.830 | Success: False


Phase 2: Assembly Task:  81%|████████  | 806/1000 [04:20<01:48,  1.78it/s]

Phase 2 - Ep 806: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  81%|████████  | 807/1000 [04:21<01:39,  1.95it/s]

Phase 2 - Ep 807: Reward -0.204 | W* 0.842 | Success: False


Phase 2: Assembly Task:  81%|████████  | 808/1000 [04:21<01:30,  2.13it/s]

Phase 2 - Ep 808: Reward -0.186 | W* 0.894 | Success: False


Phase 2: Assembly Task:  81%|████████  | 809/1000 [04:21<01:23,  2.28it/s]

Phase 2 - Ep 809: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  81%|████████  | 810/1000 [04:22<01:20,  2.35it/s]

Phase 2 - Ep 810: Reward -0.204 | W* 0.887 | Success: False


Phase 2: Assembly Task:  81%|████████  | 811/1000 [04:22<01:17,  2.44it/s]

Phase 2 - Ep 811: Reward -0.186 | W* 0.862 | Success: False


Phase 2: Assembly Task:  81%|████████  | 812/1000 [04:22<01:15,  2.49it/s]

Phase 2 - Ep 812: Reward -0.186 | W* 0.867 | Success: False


Phase 2: Assembly Task:  81%|████████▏ | 813/1000 [04:23<01:13,  2.53it/s]

Phase 2 - Ep 813: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  81%|████████▏ | 814/1000 [04:23<01:14,  2.48it/s]

Phase 2 - Ep 814: Reward -0.196 | W* 0.920 | Success: False


Phase 2: Assembly Task:  82%|████████▏ | 815/1000 [04:24<01:19,  2.31it/s]

Phase 2 - Ep 815: Reward -0.204 | W* 0.835 | Success: False


Phase 2: Assembly Task:  82%|████████▏ | 816/1000 [04:24<01:14,  2.48it/s]

Phase 2 - Ep 816: Reward -0.135 | W* 0.849 | Success: True


Phase 2: Assembly Task:  82%|████████▏ | 817/1000 [04:25<01:18,  2.33it/s]

Phase 2 - Ep 817: Reward -0.204 | W* 0.857 | Success: False


Phase 2: Assembly Task:  82%|████████▏ | 818/1000 [04:25<01:19,  2.29it/s]

Phase 2 - Ep 818: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  82%|████████▏ | 819/1000 [04:25<01:15,  2.39it/s]

Phase 2 - Ep 819: Reward -0.204 | W* 0.922 | Success: False


Phase 2: Assembly Task:  82%|████████▏ | 820/1000 [04:26<01:14,  2.42it/s]

Phase 2 - Ep 820: Reward -0.204 | W* 0.862 | Success: False


Phase 2: Assembly Task:  82%|████████▏ | 821/1000 [04:26<01:14,  2.40it/s]

Phase 2 - Ep 821: Reward -0.161 | W* 0.855 | Success: True


Phase 2: Assembly Task:  82%|████████▏ | 822/1000 [04:27<01:17,  2.30it/s]

Phase 2 - Ep 822: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  82%|████████▏ | 823/1000 [04:27<01:19,  2.22it/s]

Phase 2 - Ep 823: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  82%|████████▏ | 824/1000 [04:28<01:21,  2.16it/s]

Phase 2 - Ep 824: Reward -0.204 | W* 0.885 | Success: False


Phase 2: Assembly Task:  82%|████████▎ | 825/1000 [04:28<01:22,  2.12it/s]

Phase 2 - Ep 825: Reward -0.204 | W* 0.872 | Success: False


Phase 2: Assembly Task:  83%|████████▎ | 826/1000 [04:29<01:23,  2.09it/s]

Phase 2 - Ep 826: Reward -0.204 | W* 0.832 | Success: False


Phase 2: Assembly Task:  83%|████████▎ | 827/1000 [04:29<01:23,  2.08it/s]

Phase 2 - Ep 827: Reward -0.186 | W* 0.842 | Success: False


Phase 2: Assembly Task:  83%|████████▎ | 828/1000 [04:29<01:17,  2.23it/s]

Phase 2 - Ep 828: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  83%|████████▎ | 829/1000 [04:30<01:13,  2.33it/s]

Phase 2 - Ep 829: Reward -0.204 | W* 0.847 | Success: False


Phase 2: Assembly Task:  83%|████████▎ | 830/1000 [04:30<01:16,  2.21it/s]

Phase 2 - Ep 830: Reward -0.186 | W* 0.844 | Success: False


Phase 2: Assembly Task:  83%|████████▎ | 831/1000 [04:31<01:19,  2.13it/s]

Phase 2 - Ep 831: Reward -0.186 | W* 0.829 | Success: False


Phase 2: Assembly Task:  83%|████████▎ | 832/1000 [04:31<01:21,  2.06it/s]

Phase 2 - Ep 832: Reward -0.204 | W* 0.832 | Success: False


Phase 2: Assembly Task:  83%|████████▎ | 833/1000 [04:32<01:20,  2.07it/s]

Phase 2 - Ep 833: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  83%|████████▎ | 834/1000 [04:32<01:21,  2.04it/s]

Phase 2 - Ep 834: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  84%|████████▎ | 835/1000 [04:33<01:22,  2.00it/s]

Phase 2 - Ep 835: Reward -0.204 | W* 0.887 | Success: False


Phase 2: Assembly Task:  84%|████████▎ | 836/1000 [04:33<01:23,  1.96it/s]

Phase 2 - Ep 836: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  84%|████████▎ | 837/1000 [04:34<01:16,  2.13it/s]

Phase 2 - Ep 837: Reward -0.186 | W* 0.837 | Success: False


Phase 2: Assembly Task:  84%|████████▍ | 838/1000 [04:34<01:11,  2.26it/s]

Phase 2 - Ep 838: Reward -0.204 | W* 0.882 | Success: False


Phase 2: Assembly Task:  84%|████████▍ | 839/1000 [04:35<01:14,  2.17it/s]

Phase 2 - Ep 839: Reward -0.186 | W* 0.852 | Success: False


Phase 2: Assembly Task:  84%|████████▍ | 840/1000 [04:35<01:14,  2.14it/s]

Phase 2 - Ep 840: Reward -0.204 | W* 0.870 | Success: False


Phase 2: Assembly Task:  84%|████████▍ | 841/1000 [04:36<01:14,  2.13it/s]

Phase 2 - Ep 841: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  84%|████████▍ | 842/1000 [04:36<01:13,  2.15it/s]

Phase 2 - Ep 842: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  84%|████████▍ | 843/1000 [04:37<01:13,  2.13it/s]

Phase 2 - Ep 843: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  84%|████████▍ | 844/1000 [04:37<01:13,  2.13it/s]

Phase 2 - Ep 844: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  84%|████████▍ | 845/1000 [04:37<01:10,  2.18it/s]

Phase 2 - Ep 845: Reward -0.204 | W* 0.892 | Success: False


Phase 2: Assembly Task:  85%|████████▍ | 846/1000 [04:38<01:05,  2.33it/s]

Phase 2 - Ep 846: Reward -0.157 | W* 0.820 | Success: True


Phase 2: Assembly Task:  85%|████████▍ | 847/1000 [04:38<01:02,  2.44it/s]

Phase 2 - Ep 847: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  85%|████████▍ | 848/1000 [04:39<01:01,  2.47it/s]

Phase 2 - Ep 848: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  85%|████████▍ | 849/1000 [04:39<00:59,  2.54it/s]

Phase 2 - Ep 849: Reward -0.204 | W* 0.927 | Success: False


Phase 2: Assembly Task:  85%|████████▌ | 850/1000 [04:39<00:58,  2.55it/s]

Phase 2 - Ep 850: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  85%|████████▌ | 851/1000 [04:40<01:01,  2.40it/s]

Phase 2 - Ep 851: Reward -0.204 | W* 0.940 | Success: False


Phase 2: Assembly Task:  85%|████████▌ | 852/1000 [04:40<01:03,  2.33it/s]

Phase 2 - Ep 852: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  85%|████████▌ | 853/1000 [04:41<01:05,  2.25it/s]

Phase 2 - Ep 853: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  85%|████████▌ | 854/1000 [04:41<01:06,  2.20it/s]

Phase 2 - Ep 854: Reward -0.204 | W* 0.875 | Success: False


Phase 2: Assembly Task:  86%|████████▌ | 855/1000 [04:42<01:03,  2.27it/s]

Phase 2 - Ep 855: Reward -0.186 | W* 0.879 | Success: False


Phase 2: Assembly Task:  86%|████████▌ | 856/1000 [04:42<01:00,  2.39it/s]

Phase 2 - Ep 856: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  86%|████████▌ | 857/1000 [04:42<00:57,  2.47it/s]

Phase 2 - Ep 857: Reward -0.186 | W* 0.922 | Success: False


Phase 2: Assembly Task:  86%|████████▌ | 858/1000 [04:43<00:59,  2.37it/s]

Phase 2 - Ep 858: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  86%|████████▌ | 859/1000 [04:43<01:00,  2.32it/s]

Phase 2 - Ep 859: Reward -0.204 | W* 0.907 | Success: False


Phase 2: Assembly Task:  86%|████████▌ | 860/1000 [04:44<01:09,  2.03it/s]

Phase 2 - Ep 860: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  86%|████████▌ | 861/1000 [04:45<01:12,  1.91it/s]

Phase 2 - Ep 861: Reward -0.204 | W* 0.915 | Success: False


Phase 2: Assembly Task:  86%|████████▌ | 862/1000 [04:45<01:15,  1.83it/s]

Phase 2 - Ep 862: Reward -0.186 | W* 0.817 | Success: False


Phase 2: Assembly Task:  86%|████████▋ | 863/1000 [04:46<01:14,  1.84it/s]

Phase 2 - Ep 863: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  86%|████████▋ | 864/1000 [04:46<01:13,  1.86it/s]

Phase 2 - Ep 864: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  86%|████████▋ | 865/1000 [04:47<01:11,  1.89it/s]

Phase 2 - Ep 865: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  87%|████████▋ | 866/1000 [04:47<01:04,  2.07it/s]

Phase 2 - Ep 866: Reward -0.204 | W* 0.897 | Success: False


Phase 2: Assembly Task:  87%|████████▋ | 867/1000 [04:47<01:00,  2.21it/s]

Phase 2 - Ep 867: Reward -0.196 | W* 0.845 | Success: False


Phase 2: Assembly Task:  87%|████████▋ | 868/1000 [04:48<00:56,  2.36it/s]

Phase 2 - Ep 868: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  87%|████████▋ | 869/1000 [04:48<00:53,  2.43it/s]

Phase 2 - Ep 869: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  87%|████████▋ | 870/1000 [04:49<00:51,  2.50it/s]

Phase 2 - Ep 870: Reward -0.204 | W* 0.862 | Success: False


Phase 2: Assembly Task:  87%|████████▋ | 871/1000 [04:49<00:49,  2.58it/s]

Phase 2 - Ep 871: Reward -0.204 | W* 0.905 | Success: False


Phase 2: Assembly Task:  87%|████████▋ | 872/1000 [04:49<00:49,  2.61it/s]

Phase 2 - Ep 872: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  87%|████████▋ | 873/1000 [04:50<00:48,  2.61it/s]

Phase 2 - Ep 873: Reward -0.204 | W* 0.850 | Success: False


Phase 2: Assembly Task:  87%|████████▋ | 874/1000 [04:50<00:47,  2.65it/s]

Phase 2 - Ep 874: Reward -0.204 | W* 0.940 | Success: False


Phase 2: Assembly Task:  88%|████████▊ | 875/1000 [04:50<00:47,  2.63it/s]

Phase 2 - Ep 875: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  88%|████████▊ | 876/1000 [04:51<00:47,  2.63it/s]

Phase 2 - Ep 876: Reward -0.186 | W* 0.857 | Success: False


Phase 2: Assembly Task:  88%|████████▊ | 877/1000 [04:51<00:47,  2.60it/s]

Phase 2 - Ep 877: Reward -0.204 | W* 0.912 | Success: False


Phase 2: Assembly Task:  88%|████████▊ | 878/1000 [04:52<00:46,  2.62it/s]

Phase 2 - Ep 878: Reward -0.186 | W* 0.877 | Success: False


Phase 2: Assembly Task:  88%|████████▊ | 879/1000 [04:52<00:46,  2.61it/s]

Phase 2 - Ep 879: Reward -0.204 | W* 0.852 | Success: False


Phase 2: Assembly Task:  88%|████████▊ | 880/1000 [04:52<00:46,  2.56it/s]

Phase 2 - Ep 880: Reward -0.204 | W* 0.830 | Success: False


Phase 2: Assembly Task:  88%|████████▊ | 881/1000 [04:53<00:45,  2.63it/s]

Phase 2 - Ep 881: Reward -0.204 | W* 0.915 | Success: False


Phase 2: Assembly Task:  88%|████████▊ | 882/1000 [04:53<00:44,  2.66it/s]

Phase 2 - Ep 882: Reward -0.204 | W* 0.905 | Success: False


Phase 2: Assembly Task:  88%|████████▊ | 883/1000 [04:54<00:44,  2.65it/s]

Phase 2 - Ep 883: Reward -0.204 | W* 0.927 | Success: False


Phase 2: Assembly Task:  88%|████████▊ | 884/1000 [04:54<00:45,  2.55it/s]

Phase 2 - Ep 884: Reward -0.204 | W* 0.900 | Success: False


Phase 2: Assembly Task:  88%|████████▊ | 885/1000 [04:54<00:44,  2.61it/s]

Phase 2 - Ep 885: Reward -0.143 | W* 0.835 | Success: True


Phase 2: Assembly Task:  89%|████████▊ | 886/1000 [04:55<00:48,  2.37it/s]

Phase 2 - Ep 886: Reward -0.204 | W* 0.927 | Success: False


Phase 2: Assembly Task:  89%|████████▊ | 887/1000 [04:55<00:49,  2.28it/s]

Phase 2 - Ep 887: Reward -0.204 | W* 0.825 | Success: False


Phase 2: Assembly Task:  89%|████████▉ | 888/1000 [04:56<00:50,  2.24it/s]

Phase 2 - Ep 888: Reward -0.204 | W* 0.895 | Success: False


Phase 2: Assembly Task:  89%|████████▉ | 889/1000 [04:56<00:46,  2.37it/s]

Phase 2 - Ep 889: Reward -0.204 | W* 0.940 | Success: False


Phase 2: Assembly Task:  89%|████████▉ | 890/1000 [04:57<00:45,  2.42it/s]

Phase 2 - Ep 890: Reward -0.186 | W* 0.884 | Success: False


Phase 2: Assembly Task:  89%|████████▉ | 891/1000 [04:57<00:41,  2.66it/s]

Phase 2 - Ep 891: Reward -0.127 | W* 0.857 | Success: True


Phase 2: Assembly Task:  89%|████████▉ | 892/1000 [04:57<00:44,  2.40it/s]

Phase 2 - Ep 892: Reward -0.186 | W* 0.917 | Success: False


Phase 2: Assembly Task:  89%|████████▉ | 893/1000 [04:58<00:48,  2.20it/s]

Phase 2 - Ep 893: Reward -0.186 | W* 0.892 | Success: False


Phase 2: Assembly Task:  89%|████████▉ | 894/1000 [04:58<00:49,  2.14it/s]

Phase 2 - Ep 894: Reward -0.204 | W* 0.875 | Success: False


Phase 2: Assembly Task:  90%|████████▉ | 895/1000 [04:59<00:50,  2.08it/s]

Phase 2 - Ep 895: Reward -0.204 | W* 0.930 | Success: False


Phase 2: Assembly Task:  90%|████████▉ | 896/1000 [04:59<00:51,  2.02it/s]

Phase 2 - Ep 896: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  90%|████████▉ | 897/1000 [05:00<00:54,  1.90it/s]

Phase 2 - Ep 897: Reward -0.204 | W* 0.852 | Success: False


Phase 2: Assembly Task:  90%|████████▉ | 898/1000 [05:00<00:48,  2.08it/s]

Phase 2 - Ep 898: Reward -0.204 | W* 0.930 | Success: False


Phase 2: Assembly Task:  90%|████████▉ | 899/1000 [05:01<00:45,  2.24it/s]

Phase 2 - Ep 899: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  90%|█████████ | 900/1000 [05:01<00:42,  2.38it/s]

Phase 2 - Ep 900: Reward -0.155 | W* 0.894 | Success: True


Phase 2: Assembly Task:  90%|█████████ | 901/1000 [05:01<00:40,  2.43it/s]

Phase 2 - Ep 901: Reward -0.204 | W* 0.862 | Success: False


Phase 2: Assembly Task:  90%|█████████ | 902/1000 [05:02<00:40,  2.44it/s]

Phase 2 - Ep 902: Reward -0.204 | W* 0.842 | Success: False


Phase 2: Assembly Task:  90%|█████████ | 903/1000 [05:02<00:38,  2.51it/s]

Phase 2 - Ep 903: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  90%|█████████ | 904/1000 [05:03<00:37,  2.57it/s]

Phase 2 - Ep 904: Reward -0.186 | W* 0.897 | Success: False


Phase 2: Assembly Task:  90%|█████████ | 905/1000 [05:03<00:38,  2.49it/s]

Phase 2 - Ep 905: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  91%|█████████ | 906/1000 [05:04<00:39,  2.37it/s]

Phase 2 - Ep 906: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  91%|█████████ | 907/1000 [05:04<00:40,  2.29it/s]

Phase 2 - Ep 907: Reward -0.204 | W* 0.832 | Success: False


Phase 2: Assembly Task:  91%|█████████ | 908/1000 [05:04<00:38,  2.36it/s]

Phase 2 - Ep 908: Reward -0.204 | W* 0.857 | Success: False


Phase 2: Assembly Task:  91%|█████████ | 909/1000 [05:05<00:37,  2.45it/s]

Phase 2 - Ep 909: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  91%|█████████ | 910/1000 [05:05<00:36,  2.47it/s]

Phase 2 - Ep 910: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  91%|█████████ | 911/1000 [05:06<00:35,  2.54it/s]

Phase 2 - Ep 911: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  91%|█████████ | 912/1000 [05:06<00:34,  2.58it/s]

Phase 2 - Ep 912: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  91%|█████████▏| 913/1000 [05:06<00:36,  2.39it/s]

Phase 2 - Ep 913: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  91%|█████████▏| 914/1000 [05:07<00:37,  2.29it/s]

Phase 2 - Ep 914: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  92%|█████████▏| 915/1000 [05:07<00:38,  2.21it/s]

Phase 2 - Ep 915: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  92%|█████████▏| 916/1000 [05:08<00:39,  2.14it/s]

Phase 2 - Ep 916: Reward -0.186 | W* 0.842 | Success: False


Phase 2: Assembly Task:  92%|█████████▏| 917/1000 [05:08<00:37,  2.21it/s]

Phase 2 - Ep 917: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  92%|█████████▏| 918/1000 [05:09<00:35,  2.33it/s]

Phase 2 - Ep 918: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  92%|█████████▏| 919/1000 [05:09<00:33,  2.41it/s]

Phase 2 - Ep 919: Reward -0.204 | W* 0.902 | Success: False


Phase 2: Assembly Task:  92%|█████████▏| 920/1000 [05:09<00:32,  2.44it/s]

Phase 2 - Ep 920: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  92%|█████████▏| 921/1000 [05:10<00:31,  2.49it/s]

Phase 2 - Ep 921: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  92%|█████████▏| 922/1000 [05:10<00:33,  2.33it/s]

Phase 2 - Ep 922: Reward -0.204 | W* 0.915 | Success: False


Phase 2: Assembly Task:  92%|█████████▏| 923/1000 [05:11<00:35,  2.19it/s]

Phase 2 - Ep 923: Reward -0.186 | W* 0.827 | Success: False


Phase 2: Assembly Task:  92%|█████████▏| 924/1000 [05:11<00:37,  2.04it/s]

Phase 2 - Ep 924: Reward -0.204 | W* 0.847 | Success: False


Phase 2: Assembly Task:  92%|█████████▎| 925/1000 [05:12<00:37,  2.02it/s]

Phase 2 - Ep 925: Reward -0.204 | W* 0.880 | Success: False


Phase 2: Assembly Task:  93%|█████████▎| 926/1000 [05:12<00:38,  1.94it/s]

Phase 2 - Ep 926: Reward -0.204 | W* 0.827 | Success: False


Phase 2: Assembly Task:  93%|█████████▎| 927/1000 [05:13<00:37,  1.94it/s]

Phase 2 - Ep 927: Reward -0.204 | W* 0.935 | Success: False


Phase 2: Assembly Task:  93%|█████████▎| 928/1000 [05:14<00:38,  1.87it/s]

Phase 2 - Ep 928: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  93%|█████████▎| 929/1000 [05:14<00:36,  1.93it/s]

Phase 2 - Ep 929: Reward -0.204 | W* 0.837 | Success: False


Phase 2: Assembly Task:  93%|█████████▎| 930/1000 [05:14<00:34,  2.01it/s]

Phase 2 - Ep 930: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  93%|█████████▎| 931/1000 [05:15<00:31,  2.19it/s]

Phase 2 - Ep 931: Reward -0.204 | W* 0.920 | Success: False


Phase 2: Assembly Task:  93%|█████████▎| 932/1000 [05:15<00:29,  2.33it/s]

Phase 2 - Ep 932: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  93%|█████████▎| 933/1000 [05:16<00:28,  2.39it/s]

Phase 2 - Ep 933: Reward -0.186 | W* 0.822 | Success: False


Phase 2: Assembly Task:  93%|█████████▎| 934/1000 [05:16<00:26,  2.50it/s]

Phase 2 - Ep 934: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  94%|█████████▎| 935/1000 [05:16<00:26,  2.48it/s]

Phase 2 - Ep 935: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  94%|█████████▎| 936/1000 [05:17<00:27,  2.32it/s]

Phase 2 - Ep 936: Reward -0.204 | W* 0.935 | Success: False


Phase 2: Assembly Task:  94%|█████████▎| 937/1000 [05:17<00:27,  2.32it/s]

Phase 2 - Ep 937: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  94%|█████████▍| 938/1000 [05:18<00:25,  2.38it/s]

Phase 2 - Ep 938: Reward -0.204 | W* 0.882 | Success: False


Phase 2: Assembly Task:  94%|█████████▍| 939/1000 [05:18<00:24,  2.48it/s]

Phase 2 - Ep 939: Reward -0.186 | W* 0.892 | Success: False


Phase 2: Assembly Task:  94%|█████████▍| 940/1000 [05:18<00:24,  2.42it/s]

Phase 2 - Ep 940: Reward -0.186 | W* 0.844 | Success: False


Phase 2: Assembly Task:  94%|█████████▍| 941/1000 [05:19<00:25,  2.34it/s]

Phase 2 - Ep 941: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  94%|█████████▍| 942/1000 [05:19<00:25,  2.32it/s]

Phase 2 - Ep 942: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  94%|█████████▍| 943/1000 [05:20<00:24,  2.36it/s]

Phase 2 - Ep 943: Reward -0.204 | W* 0.842 | Success: False


Phase 2: Assembly Task:  94%|█████████▍| 944/1000 [05:20<00:22,  2.44it/s]

Phase 2 - Ep 944: Reward -0.204 | W* 0.830 | Success: False


Phase 2: Assembly Task:  94%|█████████▍| 945/1000 [05:21<00:21,  2.52it/s]

Phase 2 - Ep 945: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  95%|█████████▍| 946/1000 [05:21<00:21,  2.53it/s]

Phase 2 - Ep 946: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  95%|█████████▍| 947/1000 [05:21<00:20,  2.60it/s]

Phase 2 - Ep 947: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  95%|█████████▍| 948/1000 [05:22<00:19,  2.65it/s]

Phase 2 - Ep 948: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  95%|█████████▍| 949/1000 [05:22<00:19,  2.65it/s]

Phase 2 - Ep 949: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  95%|█████████▌| 950/1000 [05:22<00:19,  2.58it/s]

Phase 2 - Ep 950: Reward -0.204 | W* 0.852 | Success: False


Phase 2: Assembly Task:  95%|█████████▌| 951/1000 [05:23<00:19,  2.48it/s]

Phase 2 - Ep 951: Reward -0.157 | W* 0.861 | Success: True


Phase 2: Assembly Task:  95%|█████████▌| 952/1000 [05:23<00:20,  2.36it/s]

Phase 2 - Ep 952: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  95%|█████████▌| 953/1000 [05:24<00:23,  2.02it/s]

Phase 2 - Ep 953: Reward -0.204 | W* 0.935 | Success: False


Phase 2: Assembly Task:  95%|█████████▌| 954/1000 [05:25<00:23,  1.92it/s]

Phase 2 - Ep 954: Reward -0.204 | W* 0.832 | Success: False


Phase 2: Assembly Task:  96%|█████████▌| 955/1000 [05:25<00:23,  1.93it/s]

Phase 2 - Ep 955: Reward -0.204 | W* 0.877 | Success: False


Phase 2: Assembly Task:  96%|█████████▌| 956/1000 [05:26<00:22,  1.97it/s]

Phase 2 - Ep 956: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  96%|█████████▌| 957/1000 [05:26<00:22,  1.89it/s]

Phase 2 - Ep 957: Reward -0.186 | W* 0.839 | Success: False


Phase 2: Assembly Task:  96%|█████████▌| 958/1000 [05:27<00:23,  1.76it/s]

Phase 2 - Ep 958: Reward -0.186 | W* 0.867 | Success: False


Phase 2: Assembly Task:  96%|█████████▌| 959/1000 [05:27<00:22,  1.85it/s]

Phase 2 - Ep 959: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  96%|█████████▌| 960/1000 [05:28<00:21,  1.87it/s]

Phase 2 - Ep 960: Reward -0.204 | W* 0.817 | Success: False


Phase 2: Assembly Task:  96%|█████████▌| 961/1000 [05:28<00:20,  1.89it/s]

Phase 2 - Ep 961: Reward -0.204 | W* 0.820 | Success: False


Phase 2: Assembly Task:  96%|█████████▌| 962/1000 [05:29<00:18,  2.03it/s]

Phase 2 - Ep 962: Reward -0.196 | W* 0.900 | Success: False


Phase 2: Assembly Task:  96%|█████████▋| 963/1000 [05:29<00:16,  2.18it/s]

Phase 2 - Ep 963: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  96%|█████████▋| 964/1000 [05:30<00:15,  2.28it/s]

Phase 2 - Ep 964: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  96%|█████████▋| 965/1000 [05:30<00:14,  2.40it/s]

Phase 2 - Ep 965: Reward -0.204 | W* 0.902 | Success: False


Phase 2: Assembly Task:  97%|█████████▋| 966/1000 [05:30<00:13,  2.44it/s]

Phase 2 - Ep 966: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  97%|█████████▋| 967/1000 [05:31<00:13,  2.49it/s]

Phase 2 - Ep 967: Reward -0.204 | W* 0.850 | Success: False


Phase 2: Assembly Task:  97%|█████████▋| 968/1000 [05:31<00:12,  2.53it/s]

Phase 2 - Ep 968: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  97%|█████████▋| 969/1000 [05:31<00:12,  2.53it/s]

Phase 2 - Ep 969: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  97%|█████████▋| 970/1000 [05:32<00:11,  2.58it/s]

Phase 2 - Ep 970: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  97%|█████████▋| 971/1000 [05:32<00:11,  2.61it/s]

Phase 2 - Ep 971: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  97%|█████████▋| 972/1000 [05:33<00:10,  2.60it/s]

Phase 2 - Ep 972: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  97%|█████████▋| 973/1000 [05:33<00:11,  2.44it/s]

Phase 2 - Ep 973: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  97%|█████████▋| 974/1000 [05:34<00:11,  2.34it/s]

Phase 2 - Ep 974: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  98%|█████████▊| 975/1000 [05:34<00:10,  2.43it/s]

Phase 2 - Ep 975: Reward -0.204 | W* 0.925 | Success: False


Phase 2: Assembly Task:  98%|█████████▊| 976/1000 [05:34<00:09,  2.48it/s]

Phase 2 - Ep 976: Reward -0.186 | W* 0.822 | Success: False


Phase 2: Assembly Task:  98%|█████████▊| 977/1000 [05:35<00:09,  2.43it/s]

Phase 2 - Ep 977: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  98%|█████████▊| 978/1000 [05:35<00:09,  2.28it/s]

Phase 2 - Ep 978: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  98%|█████████▊| 979/1000 [05:36<00:09,  2.20it/s]

Phase 2 - Ep 979: Reward -0.204 | W* 0.912 | Success: False


Phase 2: Assembly Task:  98%|█████████▊| 980/1000 [05:36<00:09,  2.14it/s]

Phase 2 - Ep 980: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  98%|█████████▊| 981/1000 [05:37<00:09,  2.08it/s]

Phase 2 - Ep 981: Reward -0.186 | W* 0.829 | Success: False


Phase 2: Assembly Task:  98%|█████████▊| 982/1000 [05:37<00:09,  2.00it/s]

Phase 2 - Ep 982: Reward -0.186 | W* 0.877 | Success: False


Phase 2: Assembly Task:  98%|█████████▊| 983/1000 [05:38<00:08,  1.96it/s]

Phase 2 - Ep 983: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  98%|█████████▊| 984/1000 [05:38<00:08,  1.94it/s]

Phase 2 - Ep 984: Reward -0.145 | W* 0.854 | Success: True


Phase 2: Assembly Task:  98%|█████████▊| 985/1000 [05:39<00:08,  1.84it/s]

Phase 2 - Ep 985: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  99%|█████████▊| 986/1000 [05:39<00:07,  1.82it/s]

Phase 2 - Ep 986: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  99%|█████████▊| 987/1000 [05:40<00:07,  1.84it/s]

Phase 2 - Ep 987: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  99%|█████████▉| 988/1000 [05:40<00:05,  2.04it/s]

Phase 2 - Ep 988: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  99%|█████████▉| 989/1000 [05:41<00:04,  2.21it/s]

Phase 2 - Ep 989: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  99%|█████████▉| 990/1000 [05:41<00:04,  2.28it/s]

Phase 2 - Ep 990: Reward -0.186 | W* 0.872 | Success: False


Phase 2: Assembly Task:  99%|█████████▉| 991/1000 [05:42<00:03,  2.41it/s]

Phase 2 - Ep 991: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  99%|█████████▉| 992/1000 [05:42<00:03,  2.49it/s]

Phase 2 - Ep 992: Reward -0.204 | W* 0.892 | Success: False


Phase 2: Assembly Task:  99%|█████████▉| 993/1000 [05:42<00:02,  2.52it/s]

Phase 2 - Ep 993: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task:  99%|█████████▉| 994/1000 [05:43<00:02,  2.57it/s]

Phase 2 - Ep 994: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task: 100%|█████████▉| 995/1000 [05:43<00:01,  2.62it/s]

Phase 2 - Ep 995: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task: 100%|█████████▉| 996/1000 [05:43<00:01,  2.61it/s]

Phase 2 - Ep 996: Reward -0.204 | W* 0.925 | Success: False


Phase 2: Assembly Task: 100%|█████████▉| 997/1000 [05:44<00:01,  2.61it/s]

Phase 2 - Ep 997: Reward -0.204 | W* 0.825 | Success: False


Phase 2: Assembly Task: 100%|█████████▉| 998/1000 [05:44<00:00,  2.60it/s]

Phase 2 - Ep 998: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task: 100%|█████████▉| 999/1000 [05:45<00:00,  2.65it/s]

Phase 2 - Ep 999: Reward -0.210 | W* 0.940 | Success: False


Phase 2: Assembly Task: 100%|██████████| 1000/1000 [05:45<00:00,  2.90it/s]


Phase 2 - Ep 1000: Reward -0.186 | W* 0.857 | Success: False

Simulation complete. Results saved to two_step_rdt_sim_results_20250722_021814
