<a href="https://colab.research.google.com/github/Appleking123456/astro-platform-starter/blob/main/Successful_rdt_system_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import umap.umap_ as umap
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import warnings
import json
from datetime import datetime
import os

# Suppress specific UserWarnings from UMAP and KMeans
warnings.filterwarnings("ignore", category=UserWarning)
sns.set_style("whitegrid")

# ===== CONFIGURATION =====
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Explicitly set default dtype for float operations to float32
torch.set_default_dtype(torch.float32) # ADDED THIS LINE
torch.manual_seed(42)
np.random.seed(42)

# Create directory for saving results
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
results_dir = f"rdt_results_{timestamp}"
os.makedirs(results_dir, exist_ok=True)

# ===== ENHANCED RCS AGENT =====
class RCSAgent(nn.Module):
    def __init__(self, state_dim, action_dim, d_model=128, num_heads=8):
        super().__init__()
        self.state_dim = state_dim
        self.d_model = d_model

        # Enhanced input processing with residual connections
        self.input_embedding = nn.Sequential(
            nn.Linear(state_dim, d_model),
            nn.LayerNorm(d_model),
            nn.SiLU(),
            nn.Dropout(0.1)
        ).to(device)

        # Multi-scale feature extraction
        self.local_features = nn.Conv1d(1, d_model//4, kernel_size=3, padding=1).to(device)
        self.global_features = nn.Conv1d(1, d_model//4, kernel_size=7, padding=3).to(device)

        # Convolutional feature projection layer - Initialized after state_dim is known
        conv_feature_dim = self.state_dim * (self.d_model // 4) * 2
        self.conv_proj = nn.Linear(conv_feature_dim, self.d_model).to(device)

        # Enhanced transformer with more sophisticated architecture
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=num_heads,
            dim_feedforward=d_model * 4,
            dropout=0.1,
            batch_first=True,
            activation='gelu',
            device=device
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=4).to(device)

        # Attention mechanism for memory
        self.memory_attention = nn.MultiheadAttention(
            embed_dim=d_model,
            num_heads=4,
            batch_first=True,
            device=device
        )

        # Enhanced actor network with residual connections
        self.actor_layers = nn.ModuleList([
            nn.Linear(d_model, 128),
            nn.Linear(128, 64),
            nn.Linear(64, action_dim)
        ]).to(device)

        self.actor_norms = nn.ModuleList([
            nn.LayerNorm(128),
            nn.LayerNorm(64)
        ]).to(device)

        # Enhanced critic with uncertainty estimation
        self.critic = nn.Sequential(
            nn.Linear(d_model, 128),
            nn.SiLU(),
            nn.Dropout(0.1),
            nn.Linear(128, 64),
            nn.SiLU(),
            nn.Linear(64, 32),
            nn.SiLU(),
            nn.Linear(32, 2)  # [value, uncertainty]
        ).to(device)

        # Expanded memory with episodic and semantic components
        self.register_buffer('episodic_memory', torch.zeros(20, d_model, device=device)) # Stores recent states
        self.register_buffer('semantic_memory', torch.zeros(10, d_model, device=device)) # Stores significant states
        self.register_buffer('memory_ptr', torch.tensor(0, device=device, dtype=torch.long)) # Pointer for episodic memory
        self.register_buffer('semantic_ptr', torch.tensor(0, device=device, dtype=torch.long)) # Pointer for semantic memory

        # Meta-learning parameters
        self.adaptation_rate = nn.Parameter(torch.tensor(0.01, device=device))

        self._init_weights()

    def _init_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.kaiming_normal_(module.weight, nonlinearity='relu')
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)

    def update_memory(self, state_embedding, is_significant=False):
        # Use .data to update registered buffers without involving autograd
        self.episodic_memory.data[self.memory_ptr] = state_embedding.detach().data
        self.memory_ptr.data = (self.memory_ptr.data + 1) % self.episodic_memory.size(0)

        if is_significant:
            self.semantic_memory.data[self.semantic_ptr] = state_embedding.detach().data
            self.semantic_ptr.data = (self.semantic_ptr.data + 1) % self.semantic_memory.size(0)

    def forward(self, state_sequence):
        batch_size = state_sequence.size(0)
        seq_len = state_sequence.size(1)

        # Multi-scale feature extraction
        # Unsqueeze to add a channel dimension for Conv1d, expects (N, C, L)
        # Here L is state_dim, C is 1.
        last_state = state_sequence[:, -1, :].unsqueeze(1) # (batch_size, 1, state_dim)

        # Process through conv layers
        local_feat = self.local_features(last_state).transpose(1, 2) # (batch_size, state_dim, d_model//4)
        global_feat = self.global_features(last_state).transpose(1, 2) # (batch_size, state_dim, d_model//4)

        # The input embedding transforms each state in the sequence
        x_seq_embedding = self.input_embedding(state_sequence.float()) # (batch_size, seq_len, d_model)

        # Positional encoding for the transformer sequence
        pos_encoding = torch.zeros(seq_len, self.d_model, device=device, dtype=torch.float32) # Explicit dtype
        position = torch.arange(0, seq_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, self.d_model, 2).float() * -(np.log(10000.0) / self.d_model))
        pos_encoding[:, 0::2] = torch.sin(position * div_term)
        pos_encoding[:, 1::2] = torch.cos(position * div_term)
        x = x_seq_embedding + pos_encoding.unsqueeze(0) # Add positional encoding

        # Transformer processing
        memory = self.transformer(x)
        state_embedding = memory[:, -1, :] # Take the embedding of the last element

        # Combine with multi-scale features
        combined_conv_features = torch.cat([local_feat.flatten(start_dim=1), global_feat.flatten(start_dim=1)], dim=1)
        projected_conv = self.conv_proj(combined_conv_features) # Project to d_model
        state_embedding = state_embedding + projected_conv # Add to state embedding

        # Memory attention mechanism
        # Ensure episodic_memory is not empty
        if self.episodic_memory.norm() > 0:
            memory_for_attention = self.episodic_memory.detach() # Detach to prevent gradient flow through memory
            attended_memory, _ = self.memory_attention(
                state_embedding.unsqueeze(1),
                memory_for_attention.unsqueeze(0).expand(batch_size, -1, -1),
                memory_for_attention.unsqueeze(0).expand(batch_size, -1, -1)
            )
            state_embedding = state_embedding + 0.1 * attended_memory.squeeze(1)

        # Enhanced actor with residual connections
        actor_x = state_embedding
        for i, (layer, norm) in enumerate(zip(self.actor_layers[:-1], self.actor_norms)):
            residual = actor_x
            actor_x = layer(actor_x)
            actor_x = norm(actor_x)
            actor_x = F.silu(actor_x)
            # Ensure residual connection is only added if dimensions match
            if residual.size(-1) == actor_x.size(-1):
                actor_x = actor_x + residual
            actor_x = F.dropout(actor_x, 0.1, training=self.training)

        action_raw = self.actor_layers[-1](actor_x)
        action_raw = torch.tanh(action_raw)

        # Enhanced value and uncertainty estimation
        value_uncertainty = self.critic(state_embedding)
        value = value_uncertainty[:, 0:1]
        uncertainty = torch.sigmoid(value_uncertainty[:, 1:2])

        # Adaptive action scaling
        state_complexity = (state_sequence.std(dim=(1,2)).mean() + 1e-8) # Corrected parenthesis here
        exploration_factor = 1.0 + uncertainty.squeeze() * 0.5
        scaling_factor = torch.tensor([0.12, 0.06], device=device, dtype=torch.float32) * exploration_factor.unsqueeze(-1) # Explicit dtype
        action_scaled = action_raw * scaling_factor

        # Update memory
        is_significant_state = False # This can be made dynamic based on internal criteria
        self.update_memory(state_embedding[0] if batch_size == 1 else state_embedding.mean(0), is_significant_state)

        return action_scaled, value, state_embedding, uncertainty

# ===== ENHANCED DYNAMIC TENSOR =====
class DynamicTensor(nn.Module):
    def __init__(self, size=(4,4,4)):
        super().__init__()
        self.initial_size = size
        self.history = []
        self.phase_history = []

        # Use nn.Parameter for tensor_data, entanglement_level, and chaoticity_index
        # so they are part of the model's state_dict and can be saved/loaded
        self._tensor_data = nn.Parameter(self._initialize_tensor(size))
        self._entanglement_level = nn.Parameter(torch.tensor(0.5, device=device, dtype=torch.float32)) # Explicit dtype
        self._chaoticity_index = nn.Parameter(torch.tensor(0.1, device=device, dtype=torch.float32)) # Explicit dtype

        # Enhanced learnable evolution parameters
        self.evolution_rate = nn.Parameter(torch.tensor(0.08, device=device, dtype=torch.float32)) # Explicit dtype
        self.syntropic_strength = nn.Parameter(torch.tensor(0.15, device=device, dtype=torch.float32)) # Explicit dtype
        self.quantum_coupling = nn.Parameter(torch.tensor(0.03, device=device, dtype=torch.float32)) # Explicit dtype

        # Phase transition parameters
        self.critical_entanglement = nn.Parameter(torch.tensor(0.618, device=device, dtype=torch.float32)) # Explicit dtype
        self.phase_transition_sharpness = nn.Parameter(torch.tensor(5.0, device=device, dtype=torch.float32)) # Explicit dtype

    def _initialize_tensor(self, size):
        base_tensor = torch.randn(size, device=device, dtype=torch.float32) # Explicit dtype
        pattern = torch.zeros(size, device=device, dtype=torch.float32) # Explicit dtype
        for i in range(size[0]):
            for j in range(size[1]):
                x = torch.linspace(0, 2*np.pi, size[2], device=device, dtype=torch.float32) # Explicit dtype
                structure = (0.3 * torch.sin(x + i * np.pi/4) +
                           0.2 * torch.cos(2*x + j * np.pi/3) +
                           0.1 * torch.sin(3*x))
                pattern[i, j] = structure
        return base_tensor + pattern

    def compute_phase_order(self, tensor_data):
        """Compute an order parameter for phase transitions"""
        if tensor_data.numel() < 2:
            return torch.tensor(0.0, device=device, dtype=torch.float32) # Explicit dtype

        flat_data = tensor_data.flatten()
        correlations = []

        for shift in [1, 2]:
            if len(flat_data) > shift:
                x_shifted = flat_data[:-shift]
                y_unshifted = flat_data[shift:]

                mean_x = x_shifted.mean()
                mean_y = y_unshifted.mean()

                cov_xy = ((x_shifted - mean_x) * (y_unshifted - mean_y)).mean()
                std_x = x_shifted.std()
                std_y = y_unshifted.std()

                if std_x > 1e-8 and std_y > 1e-8:
                    corr = cov_xy / (std_x * std_y)
                else:
                    corr = torch.tensor(0.0, device=device, dtype=torch.float32) # Explicit dtype

                correlations.append(corr)

        return torch.stack(correlations).mean() if correlations else torch.tensor(0.0, device=device, dtype=torch.float32) # Explicit dtype

    # Forward pass for the tensor's internal evolution
    def forward(self, current_tensor_data, current_entanglement_level, current_chaoticity_index):
        # Ensure all inputs are float32
        current_tensor_data = current_tensor_data.to(torch.float32)
        current_entanglement_level = current_entanglement_level.to(torch.float32)
        current_chaoticity_index = current_chaoticity_index.to(torch.float32)

        # Enhanced multi-scale evolution
        noise = torch.randn_like(current_tensor_data, dtype=torch.float32) * self.evolution_rate # Explicit dtype
        evolved_data = current_tensor_data + noise

        # Phase transition dynamics
        phase_order = self.compute_phase_order(evolved_data)
        entanglement_diff = current_entanglement_level - self.critical_entanglement
        phase_transition_factor = torch.tanh(
            self.phase_transition_sharpness * entanglement_diff
        )

        # Enhanced syntropic dynamics
        std_val = evolved_data.std() + 1e-8
        mean_val = evolved_data.mean()

        syntropic_target = (
            mean_val +
            self.syntropic_strength * std_val * torch.randn_like(mean_val, dtype=torch.float32) * # Explicit dtype
            (1 + phase_transition_factor * 0.5)
        )

        # Multi-dimensional quantum-inspired dynamics
        quantum_influence = torch.zeros_like(evolved_data, dtype=torch.float32) # Explicit dtype
        if evolved_data.numel() > 1:
            # Handle different shapes for FFT2
            if evolved_data.dim() > 2: # If 3D, take first slice
                fft_input = evolved_data[0].to(torch.float32) # Explicit dtype
            elif evolved_data.dim() == 2: # If 2D, use directly
                fft_input = evolved_data.to(torch.float32) # Explicit dtype
            else: # If 1D, reshape to 2D for FFT2 if possible
                num_elements = evolved_data.numel()
                side_len = int(np.sqrt(num_elements))
                if side_len * side_len == num_elements:
                    fft_input = evolved_data.reshape(side_len, side_len).to(torch.float32) # Explicit dtype
                else: # Cannot reshape to square, fall back to a simpler influence
                    fft_input = evolved_data.unsqueeze(0).to(torch.float32) # Add a batch dim, Explicit dtype

            # Ensure fft_input is 2D for torch.fft.fft2
            if fft_input.dim() == 1:
                fft_input = fft_input.unsqueeze(0) # Make it (1, L)

            if fft_input.dim() == 2 and fft_input.numel() > 0: # Check if it's actually 2D and not empty
                fft_data = torch.fft.fft2(fft_input)
                phase_spectrum = torch.angle(fft_data)
                quantum_influence_scalar = self.quantum_coupling * torch.cos(phase_spectrum.real).mean()
                quantum_influence = quantum_influence_scalar * torch.randn_like(evolved_data, dtype=torch.float32) # Explicit dtype

        # Combine all evolution mechanisms
        evolved_data = (
            (1 - current_chaoticity_index) * evolved_data +
            current_chaoticity_index * syntropic_target +
            quantum_influence +
            0.05 * phase_transition_factor * torch.randn_like(evolved_data, dtype=torch.float32) # Explicit dtype
        )

        # Enhanced entanglement-driven stabilization
        entanglement_effect = torch.sigmoid((current_entanglement_level - 0.5) * 8)
        evolved_data = (
            (1 - entanglement_effect) * evolved_data +
            entanglement_effect * (0.8 * mean_val + 0.2 * evolved_data)
        )

        # Store extended history (detached from graph)
        with torch.no_grad():
            if len(self.history) > 150:
                self.history.pop(0)
            if len(self.phase_history) > 100:
                self.phase_history.pop(0)

            self.history.append(evolved_data.detach().cpu().numpy().astype(np.float32)) # Explicitly cast to float32 NumPy array
            self.phase_history.append(phase_order.detach().cpu().numpy().astype(np.float32)) # Explicitly cast to float32 NumPy array

        return evolved_data

    # Getter methods for direct access to the Parameter objects
    def get_current_tensor_data(self):
        return self._tensor_data

    def get_entanglement_level(self):
        return self._entanglement_level

    def get_chaoticity_index(self):
        return self._chaoticity_index

    # Setter methods for updating Parameter data
    def set_current_tensor_data(self, new_data):
        self._tensor_data.data = new_data.data.to(torch.float32) # Explicitly cast new data

    def set_entanglement_level(self, new_level):
        self._entanglement_level.data = torch.clamp(new_level, 0.01, 0.99).data.to(torch.float32) # Explicitly cast new data

    def set_chaoticity_index(self, new_index):
        self._chaoticity_index.data = torch.clamp(new_index, 0.001, 0.8).data.to(torch.float32) # Explicitly cast new data

    def metrics(self):
        # Detach here as metrics are for monitoring, not for gradient computation through these values
        t_data = self._tensor_data.detach()
        ent_level = self._entanglement_level.detach().item()
        cha_index = self._chaoticity_index.detach().item()

        # Enhanced metrics
        t_flat = t_data.flatten()
        t_normalized = F.softmax(t_flat, dim=0) + 1e-8
        entropy_val = -(t_normalized * torch.log(t_normalized)).sum().item()
        variance = t_data.var().item()
        complexity = entropy_val * variance

        # Fractal dimension estimation
        fractal_dim = 1.5
        try:
            if len(self.history) > 10:
                # Ensure history elements are float32 NumPy arrays
                recent_history_flat = np.array(self.history[-10:], dtype=np.float32).flatten()
                if len(recent_history_flat) > 0 and np.std(recent_history_flat) > 1e-8:
                    data_range = np.max(recent_history_flat) - np.min(recent_history_flat) + 1e-8
                    num_boxes = 10
                    scales = np.linspace(data_range / num_boxes, data_range, num_boxes, dtype=np.float32)
                    counts = []
                    for scale in scales:
                        boxes = np.floor((recent_history_flat - np.min(recent_history_flat)) / scale).astype(int)
                        counts.append(len(np.unique(boxes)))
                    log_scales = np.log(scales)
                    log_counts = np.log(np.array(counts, dtype=np.float32) + 1e-8)
                    if len(log_scales) > 1 and np.std(log_counts) > 1e-8:
                        slope, _ = np.polyfit(log_scales, log_counts, 1)
                        fractal_dim = -slope
        except Exception as e:
            pass

        # Phase coherence
        phase_coherence = 0.0
        if len(self.phase_history) > 5:
            recent_phases = np.array(self.phase_history[-5:], dtype=np.float32) # Explicit dtype
            mean_abs_phase = np.mean(np.abs(recent_phases)) + 1e-8
            if mean_abs_phase > 0:
                phase_coherence = 1.0 - np.std(recent_phases) / mean_abs_phase
            phase_coherence = np.clip(phase_coherence, 0.0, 1.0)

        return {
            'entropy': entropy_val,
            'entanglement_value': ent_level,
            'chaoticity_value': cha_index,
            'variance': variance,
            'complexity': complexity,
            'fractal_dimension': fractal_dim,
            'phase_coherence': phase_coherence
        }

    def reset(self):
        # Reset parameters by assigning new Parameter objects or new data to existing ones
        self._tensor_data.data = self._initialize_tensor(self.initial_size).data
        self._entanglement_level.data = torch.tensor(0.5, device=device, dtype=torch.float32).data # Explicit dtype
        self._chaoticity_index.data = torch.tensor(0.1, device=device, dtype=torch.float32).data # Explicit dtype
        self.history.clear()
        self.phase_history.clear()

# ===== ENHANCED CONSCIOUSNESS KERNEL =====
class ConsciousnessKernel(nn.Module):
    def __init__(self, target_spectrum_size=4):
        super().__init__()
        self.spectrum_size = target_spectrum_size

        # Enhanced learnable frequencies
        self.delta_freq = nn.Parameter(torch.tensor(0.5, device=device, dtype=torch.float32)) # Explicit dtype
        self.theta_freq = nn.Parameter(torch.tensor(0.8, device=device, dtype=torch.float32)) # Explicit dtype
        self.alpha_freq = nn.Parameter(torch.tensor(1.2, device=device, dtype=torch.float32)) # Explicit dtype
        self.beta_freq = nn.Parameter(torch.tensor(1.8, device=device, dtype=torch.float32)) # Explicit dtype
        self.gamma_freq = nn.Parameter(torch.tensor(2.5, device=device, dtype=torch.float32)) # Explicit dtype

        # Dynamic weights with attention mechanism
        self.attention_weights = nn.Parameter(torch.ones(5, target_spectrum_size, device=device, dtype=torch.float32)) # Explicit dtype

        # Neural entanglement processor
        self.entanglement_processor = nn.Sequential(
            nn.Linear(1, 32),
            nn.SiLU(),
            nn.Dropout(0.1),
            nn.Linear(32, 64),
            nn.SiLU(),
            nn.Linear(64, target_spectrum_size * 5),
            nn.Tanh()
        ).to(device)

    def forward(self, tensor_data, entanglement_level):
        # Ensure tensor_data is float for calculations
        tensor_data = tensor_data.to(torch.float32) # Ensure float32
        entanglement_level = entanglement_level.to(torch.float32) # Ensure float32

        # Handle scalar input
        if tensor_data.dim() == 0:
            spatial_norms = [tensor_data.unsqueeze(0)]
        else:
            spatial_norms = []
            for dim_idx in range(len(tensor_data.shape)):
                dims_to_reduce = [i for i in range(len(tensor_data.shape)) if i != dim_idx]
                if dims_to_reduce:
                    if tensor_data.numel() > 1:
                        norm_val = torch.norm(tensor_data, dim=dims_to_reduce)
                        spatial_norms.append(norm_val.mean().unsqueeze(0))
                    else:
                        spatial_norms.append(torch.norm(tensor_data).unsqueeze(0))
                else:
                    spatial_norms.append(torch.norm(tensor_data).unsqueeze(0))

        if not spatial_norms:
            r_total = torch.norm(tensor_data)
            spatial_norms.append(r_total.unsqueeze(0))
        else:
            r_total = torch.norm(tensor_data)

        # Pad or truncate spatial_norms to match spectrum_size
        if len(spatial_norms) < self.spectrum_size:
            spatial_norms_extended = spatial_norms * ((self.spectrum_size // len(spatial_norms)) + 1)
            spatial_norms = spatial_norms_extended[:self.spectrum_size]
        elif len(spatial_norms) > self.spectrum_size:
            spatial_norms = spatial_norms[:self.spectrum_size]

        # Process entanglement
        ent_processed_raw = self.entanglement_processor(entanglement_level.unsqueeze(0))
        ent_processed = ent_processed_raw.reshape(5, self.spectrum_size) * 0.5 + 0.5

        # Generate multi-frequency spectrum components
        frequencies = [self.delta_freq, self.theta_freq, self.alpha_freq,
                      self.beta_freq, self.gamma_freq]

        spectrum_components = []

        for freq_idx, freq in enumerate(frequencies):
            components = []
            for i in range(self.spectrum_size):
                phase = (i / self.spectrum_size) * 2 * np.pi
                current_spatial_component = spatial_norms[i].squeeze()
                arg = freq * ent_processed[freq_idx, i] * (current_spatial_component + r_total * 0.1)

                # Use bessel functions and trigonometric terms, ensure phase is float32
                if freq_idx == 0:
                    value = torch.special.bessel_j0(arg) * torch.cos(torch.tensor(phase, device=device, dtype=torch.float32))
                elif freq_idx == 1:
                    value = torch.special.bessel_j1(arg) * torch.sin(torch.tensor(phase, device=device, dtype=torch.float32))
                elif freq_idx == 2:
                    value = torch.special.bessel_j0(arg + np.pi/4) * torch.cos(torch.tensor(phase + np.pi/8, device=device, dtype=torch.float32))
                elif freq_idx == 3:
                    value = torch.special.bessel_j1(arg + np.pi/3) * torch.sin(torch.tensor(phase + np.pi/6, device=device, dtype=torch.float32))
                else: # freq_idx == 4 (gamma)
                    value = torch.special.bessel_j0(arg + 0.5) * torch.cos(torch.tensor(phase + np.pi/4, device=device, dtype=torch.float32))

                components.append(value)

            spectrum_components.append(torch.stack(components))

        # Attention-weighted combination
        combined_spectrum = torch.zeros(self.spectrum_size, device=device, dtype=torch.float32) # Explicit dtype
        attn_weights_norm = F.softmax(self.attention_weights, dim=0)

        for freq_idx, spectrum in enumerate(spectrum_components):
            combined_spectrum += attn_weights_norm[freq_idx] * spectrum

        # Normalize the spectrum
        combined_spectrum_min = combined_spectrum.min()
        combined_spectrum_max = combined_spectrum.max()
        if combined_spectrum_max - combined_spectrum_min > 1e-8:
            normalized_spectrum = (combined_spectrum - combined_spectrum_min) / (combined_spectrum_max - combined_spectrum_min)
        else:
            normalized_spectrum = torch.zeros_like(combined_spectrum, dtype=torch.float32) # Avoid division by zero, explicit dtype

        return normalized_spectrum

# ===== ENHANCED ENVIRONMENT =====
class RDTEnvironment:
    def __init__(self, device):
        self.device = device
        self.tensor = DynamicTensor().to(device)
        self.kernel = ConsciousnessKernel(target_spectrum_size=self.tensor.initial_size[-1]).to(device)

        self.time = 0
        self.max_time = 20
        self.episode_count = 0
        self.difficulty = 1.0  # Initialize difficulty BEFORE calling _generate_adaptive_target
        self.target = self._generate_adaptive_target(self.tensor.initial_size[-1]).to(device)
        self.performance_history = []
        self.prev_action = None
        self.current_episode_similarities = [] # Track similarities for current episode
        self.state_history_buffer = None # Buffer for agent's state sequence

    def _generate_adaptive_target(self, size):
        x = torch.linspace(0, 6*np.pi, size, device=self.device, dtype=torch.float32) # Explicit dtype
        target = (
            0.4 * torch.sin(x) +
            0.25 * torch.sin(2*x + np.pi/3) +
            0.15 * torch.sin(3*x + np.pi/2) +
            0.1 * torch.sin(5*x) +
            0.05 * torch.randn(size, device=self.device, dtype=torch.float32) # Explicit dtype
        )
        return torch.sigmoid(target * self.difficulty)

    def update_curriculum(self):
        # Only update curriculum after a full episode has been completed and tracked
        if self.episode_count > 0 and self.episode_count % 40 == 0:
            # Ensure performance_history has enough data
            recent_performance = np.mean(self.performance_history[-40:]) if len(self.performance_history) >= 40 else 0
            if recent_performance > 0.7:
                self.difficulty = min(2.5, self.difficulty * 1.15)
            elif recent_performance < 0.3:
                self.difficulty = max(0.5, self.difficulty * 0.95)
            # Regenerate target with new difficulty
            self.target = self._generate_adaptive_target(self.tensor.initial_size[-1])
            self.performance_history.clear() # Clear for next cycle

    def step(self, action):
        action = action.squeeze().to(torch.float32) # Ensure action is 1D and float32

        # Enhanced action processing with momentum
        momentum_factor = 0.9
        if self.prev_action is not None:
            action = momentum_factor * self.prev_action + (1 - momentum_factor) * action
        self.prev_action = action.detach().clone() # Store detached copy for next step

        scaled_action = action * self.difficulty * 0.8

        # Update entanglement and chaoticity
        new_entanglement_level = self.tensor.get_entanglement_level() + scaled_action[0]
        new_chaoticity_index = self.tensor.get_chaoticity_index() + scaled_action[1]
        self.tensor.set_entanglement_level(new_entanglement_level)
        self.tensor.set_chaoticity_index(new_chaoticity_index)

        current_tensor_data = self.tensor.get_current_tensor_data()
        current_entanglement_level = self.tensor.get_entanglement_level()
        current_chaoticity_index = self.tensor.get_chaoticity_index()

        # Evolve the tensor
        evolved_tensor_data = self.tensor(
            current_tensor_data,
            current_entanglement_level,
            current_chaoticity_index
        )
        self.tensor.set_current_tensor_data(evolved_tensor_data)

        # Project to consciousness spectrum
        projected_spectrum = self.kernel(
            self.tensor.get_current_tensor_data(),
            self.tensor.get_entanglement_level()
        )

        # Enhanced Reward Calculation
        spectral_similarity_loss = F.mse_loss(projected_spectrum, self.target)
        spectral_similarity = torch.exp(-spectral_similarity_loss * 3.0) # Higher means better similarity

        optimal_entanglement = 0.618
        entanglement_deviation = (current_entanglement_level - optimal_entanglement) ** 2

        metrics = self.tensor.metrics()
        # Ensure metrics values are tensors for gradient tracking if needed for reward.backward()
        complexity_reward = torch.tensor(metrics['complexity'], device=self.device, dtype=torch.float32) # Explicit dtype
        phase_coherence_reward = torch.tensor(metrics['phase_coherence'], device=self.device, dtype=torch.float32) # Explicit dtype

        stability_penalty = torch.tensor(0.0, device=self.device, dtype=torch.float32) # Explicit dtype
        if len(self.tensor.history) >= 2: # Need at least 2 for previous state
            current_state_detached = evolved_tensor_data.detach().cpu().numpy() # This is already float32 from tensor
            prev_state = self.tensor.history[-2] # This is already float32 numpy

            # Convert to torch.tensor for MSE loss, ensures correct device and dtype
            prev_state_tensor = torch.tensor(prev_state, device=self.device, dtype=torch.float32)
            current_state_tensor = torch.tensor(current_state_detached, device=self.device, dtype=torch.float32)

            if len(self.tensor.history) >= 3: # Need at least 3 for third-last state
                third_last_state = self.tensor.history[-3] # Already numpy float32
                third_last_state_tensor = torch.tensor(third_last_state, device=self.device, dtype=torch.float32)
                stability_penalty = F.mse_loss(current_state_tensor, prev_state_tensor) * 0.5 + F.mse_loss(current_state_tensor, third_last_state_tensor) * 0.5
            else:
                stability_penalty = F.mse_loss(current_state_tensor, prev_state_tensor)

        # Entropic cost and balance
        t_normalized_for_grad = F.softmax(evolved_tensor_data.flatten(), dim=0) + 1e-8
        entropic_cost = -(t_normalized_for_grad * torch.log(t_normalized_for_grad)).sum()
        optimal_entropy = np.log(len(t_normalized_for_grad)) * 0.75 # Optimal entropy for a certain level of randomness
        entropy_balance = torch.abs(entropic_cost - optimal_entropy).to(torch.float32) # Ensure float32

        # Fractal reward calculation: closer to 1.8 is better
        fractal_reward = torch.exp(-torch.abs(
            torch.tensor(metrics['fractal_dimension'], device=self.device, dtype=torch.float32) - 1.8 # Explicit dtype
        ))

        # Combine all rewards and penalties
        reward = (
            3.0 * spectral_similarity +
            0.5 * complexity_reward +
            0.5 * phase_coherence_reward +
            0.5 * fractal_reward -
            0.1 * entanglement_deviation -
            0.3 * stability_penalty -
            0.2 * entropy_balance
        ).to(torch.float32) # Ensure final reward is float32

        # Track spectral similarity for curriculum learning within the episode
        self.current_episode_similarities.append(spectral_similarity.item())

        # Update state history for agent observation
        current_tensor_flat = self.tensor.get_current_tensor_data().detach().flatten().cpu().numpy().astype(np.float32) # Explicitly cast to float32 NumPy array
        state_dim = current_tensor_flat.shape[0]

        if self.state_history_buffer is None:
            self.state_history_buffer = [np.zeros(state_dim, dtype=np.float32)] * 10 # Initialize with float32 zeros

        self.state_history_buffer.pop(0)
        self.state_history_buffer.append(current_tensor_flat)
        state_sequence_for_agent = np.array(self.state_history_buffer, dtype=np.float32) # Ensure output array is float32

        # Increment time and check termination
        self.time += 1
        done = self.time >= self.max_time

        # Update curriculum at episode end
        if done:
            episode_performance = np.mean(self.current_episode_similarities)
            self.performance_history.append(episode_performance)
            self.current_episode_similarities = [] # Reset for next episode
            self.episode_count += 1 # Increment episode count AFTER performance is recorded
            self.update_curriculum()

        # Prepare info dictionary
        info = {
            "spectral_similarity": spectral_similarity.item(),
            "complexity": complexity_reward.item(),
            "phase_coherence": phase_coherence_reward.item(),
            "fractal_reward": fractal_reward.item(),
            "entanglement_deviation": entanglement_deviation.item(),
            "stability_penalty": stability_penalty.item(),
            "entropy_balance": entropy_balance.item(),
            "metrics": metrics, # Include all raw metrics for detailed logging
            "difficulty": self.difficulty # Include current difficulty
        }

        return torch.FloatTensor(state_sequence_for_agent).unsqueeze(0).to(self.device), reward, done, info # Return reward as tensor

    def reset(self):
        self.tensor.reset()
        self.time = 0
        self.prev_action = None
        self.current_episode_similarities = [] # Reset episode-specific metrics

        # Initialize state_history_buffer with zeros for the new episode
        state_dim = np.prod(self.tensor.initial_size)
        self.state_history_buffer = [np.zeros(state_dim, dtype=np.float32)] * 10

        # Return initial state sequence (all zeros initially as nothing has evolved yet)
        return torch.FloatTensor(np.array(self.state_history_buffer, dtype=np.float32)).unsqueeze(0).to(self.device) # Explicit dtype

# ===== TRAINING LOOP =====

def train_rdt_system(num_episodes=500, visualize_every=50):
    env = RDTEnvironment(device)
    state_dim = np.prod(env.tensor.initial_size)
    action_dim = 2
    agent = RCSAgent(
        state_dim=state_dim,
        action_dim=action_dim
    ).to(device)

    optimizer = torch.optim.AdamW(agent.parameters(), lr=1e-4, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_episodes)

    history = {
        'rewards': [], 'entanglement': [], 'chaoticity': [], 'entropy': [],
        'spectral_similarity': [], 'entanglement_deviation_abs': [],
        'entropic_cost': [], 'complexity': [], 'fractal_dimension': [],
        'phase_coherence': [], 'learning_rate': [], 'loss': [],
        'value_predictions': [], 'uncertainties': [], 'difficulty': []
    }

    best_reward = float('-inf')
    best_model_path = os.path.join(results_dir, "rdt_agent_best.pth")

    print(f"Starting RDT System Training on {device}")

    for episode in tqdm(range(num_episodes), desc="Training RDT System"):
        state = env.reset() # This returns the initial state sequence
        episode_reward = 0
        done = False

        episode_metrics = {
            'reward_sum': 0, 'loss_sum': 0, 'spectral_similarity_sum': 0,
            'entanglement_deviation_sum': 0, 'entropic_cost_sum': 0,
            'complexity_sum': 0, 'phase_coherence_sum': 0, 'fractal_sum': 0,
            'value_sum': 0, 'uncertainty_sum': 0, 'step_count': 0
        }

        # Inner loop for steps within an episode
        for step_idx in range(env.max_time): # Iterate up to max_time, `done` can break early
            # Get action from agent
            action, value_prediction, state_embedding, uncertainty = agent(state)

            # Environment step
            next_state, reward_tensor, done, info = env.step(action) # reward_tensor is already a tensor

            # Accumulate metrics for current episode
            episode_metrics['reward_sum'] += reward_tensor.item() # Use .item() for sum, keep tensor for loss
            episode_metrics['spectral_similarity_sum'] += info['spectral_similarity']
            episode_metrics['entanglement_deviation_sum'] += abs(info['entanglement_deviation'])
            episode_metrics['entropic_cost_sum'] += info['entropy_balance'] # Use entropy_balance as cost
            episode_metrics['complexity_sum'] += info['complexity']
            episode_metrics['phase_coherence_sum'] += info['phase_coherence']
            episode_metrics['fractal_sum'] += info['metrics']['fractal_dimension']
            episode_metrics['value_sum'] += value_prediction.item()
            episode_metrics['uncertainty_sum'] += uncertainty.item()
            episode_metrics['step_count'] += 1

            # Calculate losses
            actor_loss = -reward_tensor # reward_tensor is already a float32 tensor
            critic_loss = F.mse_loss(value_prediction, reward_tensor.detach().unsqueeze(0)) # Unsqueeze for batch dim

            # Action entropy: encourages exploration
            action_dist = torch.distributions.Normal(action.mean(), action.std().clamp(min=1e-8))
            action_entropy = action_dist.entropy().mean()
            exploration_bonus = 0.005 * action_entropy # Small bonus for entropy

            total_loss = actor_loss + 0.5 * critic_loss - exploration_bonus
            episode_metrics['loss_sum'] += total_loss.item()

            # Optimization step
            optimizer.zero_grad()
            total_loss.backward()
            torch.nn.utils.clip_grad_norm_(agent.parameters(), max_norm=1.0) # Clip gradients to prevent exploding gradients
            optimizer.step()

            # Update state for next step
            state = next_state # next_state is already the sequence

            if done:
                break

        # Step scheduler after each episode
        scheduler.step()

        # Record episode averages into history
        avg_steps = max(1, episode_metrics['step_count'])
        history['rewards'].append(episode_metrics['reward_sum'])
        history['entanglement'].append(env.tensor.get_entanglement_level().detach().item())
        history['chaoticity'].append(env.tensor.get_chaoticity_index().detach().item())
        history['entropy'].append(env.tensor.metrics()['entropy']) # Get current entropy from tensor
        history['spectral_similarity'].append(episode_metrics['spectral_similarity_sum'] / avg_steps)
        history['entanglement_deviation_abs'].append(episode_metrics['entanglement_deviation_sum'] / avg_steps)
        history['entropic_cost'].append(episode_metrics['entropic_cost_sum'] / avg_steps)
        history['complexity'].append(episode_metrics['complexity_sum'] / avg_steps)
        history['fractal_dimension'].append(episode_metrics['fractal_sum'] / avg_steps)
        history['phase_coherence'].append(episode_metrics['phase_coherence_sum'] / avg_steps)
        history['learning_rate'].append(scheduler.get_last_lr()[0])
        history['loss'].append(episode_metrics['loss_sum'] / avg_steps)
        history['value_predictions'].append(episode_metrics['value_sum'] / avg_steps)
        history['uncertainties'].append(episode_metrics['uncertainty_sum'] / avg_steps)
        history['difficulty'].append(env.difficulty)

        # Save best model
        current_reward = episode_metrics['reward_sum']
        if current_reward > best_reward:
            best_reward = current_reward
            torch.save(agent.state_dict(), best_model_path)

        # Enhanced logging
        if (episode + 1) % 25 == 0 or episode == num_episodes - 1:
            tqdm.write(f"Ep {episode+1}/{num_episodes} | R: {history['rewards'][-1]:.3f} | "
                      f"E: {history['entanglement'][-1]:.3f} | C: {history['chaoticity'][-1]:.3f} | "
                      f"Sim: {history['spectral_similarity'][-1]:.3f} | Comp: {history['complexity'][-1]:.3f} | "
                      f"Uncert: {history['uncertainties'][-1]:.3f} | LR: {history['learning_rate'][-1]:.6f} | "
                      f"Diff: {history['difficulty'][-1]:.2f}")

            # Visualize progress periodically
            visualize_progress(env, agent, episode, history['rewards'], history, final_save=False)

    # Final visualization and save
    visualize_progress(env, agent, num_episodes, history['rewards'], history, final_save=True)

    # Save final model and history
    torch.save(agent.state_dict(), os.path.join(results_dir, "rdt_agent_final.pth"))
    # Save DynamicTensor state if needed (e.g., its parameters)
    torch.save(env.tensor.state_dict(), os.path.join(results_dir, "dynamic_tensor_final.pth"))

    history_path = os.path.join(results_dir, "training_history.json")
    with open(history_path, "w") as f:
        # Convert all numpy arrays/tensors in history to lists for JSON serialization
        serializable_history = {k: [item if isinstance(item, (int, float, str)) else item.tolist() if isinstance(item, np.ndarray) else item for item in v] for k, v in history.items()}
        json.dump(serializable_history, f, indent=4)

    print(f"Training complete. Results saved to {results_dir}")

    # Return the trained agent and environment
    return agent, env

# ===== VISUALIZATION UTILITIES =====

def visualize_progress(env, agent, episode, episode_rewards, history, final_save=False):
    plt.figure(figsize=(18, 12))

    # Ensure history keys are lists and not dicts of dicts for plotting
    # Plot reward progression
    plt.subplot(2, 3, 1)
    if episode_rewards:
        plt.plot(episode_rewards)
        plt.title(f"Episode Rewards (Current: {episode_rewards[-1]:.2f})")
    else:
        plt.title("Episode Rewards (No data)")
    plt.xlabel("Episode")
    plt.ylabel("Total Reward")
    plt.grid(True, alpha=0.6)

    # Plot key metrics (last N steps for recent trend or all if few episodes)
    plt.subplot(2, 3, 2)
    metrics_to_plot = ['spectral_similarity', 'complexity', 'phase_coherence']
    plot_steps = min(len(history['spectral_similarity']), 100) # Plot last 100 steps

    if plot_steps > 0:
        for metric in metrics_to_plot:
            plt.plot(history[metric][-plot_steps:], label=metric)
        plt.title(f"Recent Metric Values (Last {plot_steps} Steps)")
        plt.xlabel("Relative Step in Window")
        plt.ylabel("Value")
        plt.legend()
    else:
        plt.title("Recent Metric Values (No data)")
    plt.grid(True, alpha=0.6)

    # Plot tensor evolution (a slice or flattened version of the last state)
    plt.subplot(2, 3, 3)
    if env.tensor.history:
        tensor_data_to_plot = env.tensor.history[-1]
        if len(tensor_data_to_plot.shape) == 3:
            plt.imshow(tensor_data_to_plot.mean(axis=0), cmap='viridis') # Mean across first dim if 3D
            plt.title("Current Tensor State (Mean Slice)")
            plt.colorbar(label="Value")
        elif len(tensor_data_to_plot.shape) == 2:
            plt.imshow(tensor_data_to_plot, cmap='viridis') # If 2D
            plt.title("Current Tensor State (2D)")
            plt.colorbar(label="Value")
        else: # Flattened 1D representation
            plt.plot(tensor_data_to_plot.flatten())
            plt.title("Current Tensor State (Flattened)")
            plt.ylabel("Value")
    else:
        plt.title("Current Tensor State (No data)")
    plt.xlabel("Index")
    plt.grid(True, alpha=0.6)

    # Plot fractal dimension evolution
    plt.subplot(2, 3, 4)
    plot_steps_fd = min(len(history['fractal_dimension']), 100)
    if plot_steps_fd > 0:
        plt.plot(history['fractal_dimension'][-plot_steps_fd:])
        plt.axhline(y=1.8, color='r', linestyle='--', label='Target FD (1.8)') # Updated target
        plt.title("Fractal Dimension Evolution")
        plt.xlabel("Relative Episode in Window")
        plt.ylabel("Dimension")
        plt.legend()
    else:
        plt.title("Fractal Dimension Evolution (No data)")
    plt.grid(True, alpha=0.6)

    # Plot phase coherence
    plt.subplot(2, 3, 5)
    plot_steps_pc = min(len(history['phase_coherence']), 100)
    if plot_steps_pc > 0:
        plt.plot(history['phase_coherence'][-plot_steps_pc:])
        plt.title("Phase Coherence")
        plt.xlabel("Relative Episode in Window")
        plt.ylabel("Coherence (0-1)")
    else:
        plt.title("Phase Coherence (No data)")
    plt.grid(True, alpha=0.6)

    # Plot entanglement vs chaoticity
    plt.subplot(2, 3, 6)
    plot_steps_ec = min(len(history['entanglement']), 100)
    if plot_steps_ec > 0:
        entanglements = history['entanglement'][-plot_steps_ec:]
        chaoticities = history['chaoticity'][-plot_steps_ec:]
        plt.scatter(entanglements, chaoticities, c=range(len(entanglements)), cmap='viridis', s=20)
        plt.title("Entanglement vs Chaoticity")
        plt.xlabel("Entanglement")
        plt.ylabel("Chaoticity")
        plt.colorbar(label="Time Step (Relative)")
    else:
        plt.title("Entanglement vs Chaoticity (No data)")
    plt.grid(True, alpha=0.6)

    plt.tight_layout()

    filename_suffix = "final_training_progress" if final_save else f"training_progress_episode_{episode+1}"
    plot_path = os.path.join(results_dir, f"{filename_suffix}.png")
    plt.savefig(plot_path, dpi=300 if final_save else 200, bbox_inches='tight')
    plt.close()

# ===== MAIN EXECUTION =====

if __name__ == "__main__":
    print(f"Starting RDT System Training on {device}")
    trained_agent, trained_env = train_rdt_system(num_episodes=500)

    print(f"Training complete. Results saved to {results_dir}")

Starting RDT System Training on cuda
Starting RDT System Training on cuda


Training RDT System:   5%|▍         | 24/500 [00:14<04:39,  1.70it/s]

Ep 25/500 | R: 41.897 | E: 0.010 | C: 0.800 | Sim: 0.659 | Comp: 0.148 | Uncert: 0.890 | LR: 0.000099 | Diff: 1.00


Training RDT System:  10%|▉         | 49/500 [00:29<04:00,  1.87it/s]

Ep 50/500 | R: 41.955 | E: 0.010 | C: 0.800 | Sim: 0.655 | Comp: 0.176 | Uncert: 0.930 | LR: 0.000098 | Diff: 1.00


Training RDT System:  15%|█▍        | 74/500 [00:43<03:37,  1.95it/s]

Ep 75/500 | R: 42.256 | E: 0.010 | C: 0.800 | Sim: 0.658 | Comp: 0.146 | Uncert: 0.972 | LR: 0.000095 | Diff: 1.00


Training RDT System:  20%|█▉        | 99/500 [00:59<03:28,  1.93it/s]

Ep 100/500 | R: 41.632 | E: 0.010 | C: 0.800 | Sim: 0.647 | Comp: 0.149 | Uncert: 0.979 | LR: 0.000090 | Diff: 1.00


Training RDT System:  25%|██▍       | 124/500 [01:14<03:50,  1.63it/s]

Ep 125/500 | R: 41.988 | E: 0.010 | C: 0.800 | Sim: 0.646 | Comp: 0.177 | Uncert: 0.987 | LR: 0.000085 | Diff: 1.00


Training RDT System:  30%|██▉       | 149/500 [01:29<03:08,  1.86it/s]

Ep 150/500 | R: 40.130 | E: 0.010 | C: 0.800 | Sim: 0.622 | Comp: 0.165 | Uncert: 0.990 | LR: 0.000079 | Diff: 1.00


Training RDT System:  35%|███▍      | 174/500 [01:44<02:47,  1.95it/s]

Ep 175/500 | R: 43.339 | E: 0.010 | C: 0.800 | Sim: 0.665 | Comp: 0.167 | Uncert: 0.995 | LR: 0.000073 | Diff: 1.00


Training RDT System:  40%|███▉      | 199/500 [01:59<02:32,  1.98it/s]

Ep 200/500 | R: 44.643 | E: 0.010 | C: 0.800 | Sim: 0.663 | Comp: 0.192 | Uncert: 0.996 | LR: 0.000065 | Diff: 1.00


Training RDT System:  45%|████▍     | 224/500 [02:15<02:55,  1.58it/s]

Ep 225/500 | R: 40.247 | E: 0.010 | C: 0.800 | Sim: 0.630 | Comp: 0.178 | Uncert: 0.996 | LR: 0.000058 | Diff: 1.00


Training RDT System:  50%|████▉     | 249/500 [02:31<02:13,  1.88it/s]

Ep 250/500 | R: 43.811 | E: 0.010 | C: 0.800 | Sim: 0.661 | Comp: 0.190 | Uncert: 0.996 | LR: 0.000050 | Diff: 1.00


Training RDT System:  55%|█████▍    | 274/500 [02:46<01:56,  1.94it/s]

Ep 275/500 | R: 43.427 | E: 0.010 | C: 0.800 | Sim: 0.658 | Comp: 0.191 | Uncert: 0.998 | LR: 0.000042 | Diff: 1.00


Training RDT System:  60%|█████▉    | 299/500 [03:02<01:54,  1.76it/s]

Ep 300/500 | R: 43.367 | E: 0.010 | C: 0.800 | Sim: 0.655 | Comp: 0.175 | Uncert: 0.996 | LR: 0.000035 | Diff: 1.00


Training RDT System:  65%|██████▍   | 324/500 [03:18<01:39,  1.78it/s]

Ep 325/500 | R: 39.888 | E: 0.010 | C: 0.800 | Sim: 0.603 | Comp: 0.170 | Uncert: 0.997 | LR: 0.000027 | Diff: 1.00


Training RDT System:  70%|██████▉   | 349/500 [03:33<01:19,  1.90it/s]

Ep 350/500 | R: 43.042 | E: 0.010 | C: 0.800 | Sim: 0.639 | Comp: 0.149 | Uncert: 0.998 | LR: 0.000021 | Diff: 1.00


Training RDT System:  75%|███████▍  | 374/500 [03:48<01:08,  1.85it/s]

Ep 375/500 | R: 43.182 | E: 0.010 | C: 0.800 | Sim: 0.671 | Comp: 0.190 | Uncert: 0.999 | LR: 0.000015 | Diff: 1.00


Training RDT System:  80%|███████▉  | 399/500 [04:04<01:04,  1.56it/s]

Ep 400/500 | R: 44.056 | E: 0.010 | C: 0.800 | Sim: 0.669 | Comp: 0.213 | Uncert: 0.999 | LR: 0.000010 | Diff: 1.00


Training RDT System:  85%|████████▍ | 424/500 [04:20<00:41,  1.82it/s]

Ep 425/500 | R: 44.379 | E: 0.010 | C: 0.800 | Sim: 0.666 | Comp: 0.196 | Uncert: 0.998 | LR: 0.000005 | Diff: 1.00


Training RDT System:  90%|████████▉ | 449/500 [04:35<00:26,  1.93it/s]

Ep 450/500 | R: 42.738 | E: 0.010 | C: 0.800 | Sim: 0.659 | Comp: 0.176 | Uncert: 0.998 | LR: 0.000002 | Diff: 1.00


Training RDT System:  95%|█████████▍| 474/500 [04:50<00:13,  1.97it/s]

Ep 475/500 | R: 41.940 | E: 0.010 | C: 0.800 | Sim: 0.660 | Comp: 0.155 | Uncert: 0.998 | LR: 0.000001 | Diff: 1.00


Training RDT System: 100%|█████████▉| 499/500 [05:06<00:00,  1.71it/s]

Ep 500/500 | R: 43.069 | E: 0.010 | C: 0.800 | Sim: 0.657 | Comp: 0.242 | Uncert: 0.999 | LR: 0.000000 | Diff: 1.00


Training RDT System: 100%|██████████| 500/500 [05:08<00:00,  1.62it/s]


Training complete. Results saved to rdt_results_20250720_234442
Training complete. Results saved to rdt_results_20250720_234442
