In [ ]:
# Example integration code
integration_example = """
# Integration with AlgoSpace MARL System

## 1. Load the trained model in your trading system:

```python
from agents.marl.agents.structure_analyzer import StructureAnalyzer
import torch

# Initialize structure analyzer
config = {
    'window': 48,
    'input_features': 8,
    'hidden_dim': 256,
    'n_heads': 8,
    'n_layers': 4,
    'dropout': 0.1
}

structure_agent = StructureAnalyzer(config)

# Load pre-trained weights
checkpoint = torch.load('models/agents/structure_analyzer_finetuned.pt')
structure_agent.load_state_dict(checkpoint['model_state_dict'])
structure_agent.eval()
```

## 2. Use in MARL consensus:

```python
from training.marl_trainer import MARLTrainer

# Configure MARL system
marl_config = {
    'agents': {
        'structure_analyzer': structure_agent,
        'regime_detector': regime_agent,
        'tactical_agent': tactical_agent
    },
    'consensus_weights': {
        'structure_analyzer': 0.4,
        'regime_detector': 0.3,
        'tactical_agent': 0.3
    }
}

# Initialize MARL trainer
trainer = MARLTrainer(marl_config)
```

## 3. Real-time trading integration:

```python
# Process incoming market data
market_data_30m = matrix_assembler.process_30m_data(raw_data)

# Get structure analysis
with torch.no_grad():
    structure_output = structure_agent({
        'market_matrix': market_data_30m,
        'regime_embedding': regime_embedding,
        'synergy_context': synergy_context
    })

# Extract trading signals
action = structure_output['action']  # [pass, long, short]
confidence = structure_output['confidence']
structure_score = structure_agent._calculate_structure_score(synergy_context)

# Use in decision making
if confidence > 0.7 and structure_score > 0.6:
    execute_trade(action, position_size=calculate_size(confidence))
```

## 4. Monitoring and logging:

```python
# Log structure metrics
logger.info(
    "Structure Analysis",
    action=action,
    confidence=confidence,
    structure_score=structure_score,
    structure_type=get_structure_type(market_data_30m),
    synergy_alignment=synergy_context['synergy_type']
)
```
"""

print(integration_example)

# Save integration guide
with open(RESULTS_PATH / 'integration_guide.md', 'w') as f:
    f.write(integration_example)

print("\n✅ Structure Agent Training notebook complete!")
print("\n📚 Next Steps:")
print("1. Review the training summary and metrics")
print("2. Test the model with live market data")
print("3. Integrate with MARL consensus mechanism")
print("4. Monitor performance in production")
print("5. Retrain periodically with new data")

## 10. Model Deployment Integration

This section shows how to integrate the trained Structure Analyzer into the AlgoSpace MARL system.

In [ ]:
# Generate comprehensive training summary
summary = f"""
# Structure Analyzer Training Summary

## Training Configuration
- Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
- Device: {device}
- Model Parameters: {total_params:,}
- Window Size: {config['window']} bars (30-minute)
- Input Features: {config['input_features']}

## Supervised Pre-training Results
- Epochs: {len(history['train_loss'])}
- Best Validation F1: {best_val_f1:.4f}
- Test Set Performance:
  - Accuracy: {test_acc:.2f}%
  - F1 Score: {test_f1:.4f}

## Structure Detection Performance
"""

# Add per-class performance
for i, name in enumerate(structure_names):
    class_mask = np.array(test_labels) == i
    if class_mask.sum() > 0:
        class_acc = (np.array(test_preds)[class_mask] == i).mean() * 100
        summary += f"- {name}: {class_acc:.1f}% accuracy\n"

summary += f"""
## Reinforcement Learning Fine-tuning
- Episodes: {n_episodes}
- Final Average Return: {np.mean(episode_returns[-10:])*100:.2f}%
- Final Win Rate: {np.mean(win_rates[-10:])*100:.1f}%
- Average Trades per Episode: {np.mean(episode_lengths[-10:]):.1f}

## Model Architecture
- Embedder: Transformer-based (8 features → 256 dim)
- Attention: {config['n_heads']} heads, {config['n_layers']} layers
- Policy Head: 3 actions (hold, long, short)
- Classification Head: 4 structure types

## Training Features
- Volume Profile Analysis: ✓
- Market Depth Proxy: ✓
- Microstructure Features: ✓
- Support/Resistance Detection: ✓
- Trend Structure Evaluation: ✓

## Output Files
- Pre-trained Model: {MODELS_PATH}/structure_analyzer_pretrained.pt
- Fine-tuned Model: {MODELS_PATH}/structure_analyzer_finetuned.pt
- Training History: {RESULTS_PATH}/structure_training_history.png
- Confusion Matrix: {RESULTS_PATH}/structure_confusion_matrix.png
- RL Metrics: {RESULTS_PATH}/rl_training_metrics.png

## Integration Notes
- Compatible with MatrixAssembler30m output
- Integrates with SynergyDetector patterns
- Provides structure scores for MARL consensus
- Weights 40% in multi-agent decision making
"""

print(summary)

# Save summary
with open(RESULTS_PATH / 'training_summary.txt', 'w') as f:
    f.write(summary)

# Export model configuration
model_config = {
    'architecture': 'StructureAnalyzer',
    'config': config,
    'input_shape': (48, 8),  # 30m bars × features
    'output_heads': {
        'action': 3,
        'confidence': 1,
        'reasoning': 64,
        'structure_class': 4
    },
    'training_metrics': {
        'pretrain_f1': best_val_f1,
        'test_accuracy': test_acc,
        'rl_return': np.mean(episode_returns[-10:]),
        'rl_win_rate': np.mean(win_rates[-10:])
    },
    'synergy_integration': True,
    'agent_weight': 0.4
}

with open(MODELS_PATH / 'structure_analyzer_config.json', 'w') as f:
    json.dump(model_config, f, indent=2)

print("\n✅ Training complete! All models and results saved.")
print(f"📁 Models directory: {MODELS_PATH}")
print(f"📊 Results directory: {RESULTS_PATH}")

## 9. Training Summary and Model Export

In [ ]:
# Analyze learned features
def analyze_structure_features(model, dataset, n_samples=100):
    """Analyze what features the model has learned."""
    model.eval()
    
    feature_importance = {
        'Strong Trend': [],
        'Range Bound': [],
        'Breakout': [],
        'Reversal': []
    }
    
    attention_patterns = {i: [] for i in range(4)}
    
    with torch.no_grad():
        for idx in range(min(n_samples, len(dataset))):
            sample = dataset[idx]
            market_data = sample['market_data'].unsqueeze(0).to(device)
            label = sample['label'].item()
            
            # Forward pass with attention
            x = market_data.transpose(1, 2)
            embedded = model.embedder(x)
            
            # Extract attention weights if available
            if hasattr(model.embedder, 'attention_weights'):
                attention = model.embedder.attention_weights
                if attention is not None:
                    attention_patterns[label].append(attention.cpu().numpy())
            
            # Get representation
            representation = embedded.mean(dim=1)
            
            # Store features by structure type
            structure_name = structure_names[label]
            feature_importance[structure_name].append(representation.cpu().numpy())
    
    return feature_importance, attention_patterns

# Run analysis
print("🔍 Analyzing learned features...")
feature_importance, attention_patterns = analyze_structure_features(model, test_dataset)

# Visualize feature importance
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()

for idx, (structure_type, features) in enumerate(feature_importance.items()):
    if features:
        features_array = np.vstack(features)
        mean_features = features_array.mean(axis=0)
        std_features = features_array.std(axis=0)
        
        # Plot top 20 features
        top_indices = np.argsort(np.abs(mean_features))[-20:]
        
        ax = axes[idx]
        ax.barh(range(20), mean_features[top_indices], xerr=std_features[top_indices])
        ax.set_yticks(range(20))
        ax.set_yticklabels([f'F{i}' for i in top_indices])
        ax.set_xlabel('Feature Importance')
        ax.set_title(f'{structure_type} - Top Features', fontweight='bold')
        ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(RESULTS_PATH / 'structure_feature_importance.png', dpi=300, bbox_inches='tight')
plt.show()

## 8. Feature Analysis and Interpretability

In [ ]:
# Plot RL training results
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Episode returns
ax1 = axes[0]
ax1.plot(episode_returns, alpha=0.3, label='Episode Return')
ax1.plot(pd.Series(episode_returns).rolling(10).mean(), linewidth=2, label='10-Episode MA')
ax1.axhline(y=0, color='k', linestyle='--', alpha=0.5)
ax1.set_xlabel('Episode')
ax1.set_ylabel('Return (%)')
ax1.set_title('Episode Returns', fontsize=14, fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Win rates
ax2 = axes[1]
ax2.plot(win_rates, alpha=0.3, label='Win Rate')
ax2.plot(pd.Series(win_rates).rolling(10).mean(), linewidth=2, label='10-Episode MA')
ax2.axhline(y=0.5, color='k', linestyle='--', alpha=0.5)
ax2.set_xlabel('Episode')
ax2.set_ylabel('Win Rate')
ax2.set_title('Trading Win Rate', fontsize=14, fontweight='bold')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Trade frequency
ax3 = axes[2]
ax3.plot(episode_lengths, alpha=0.3, label='Trades per Episode')
ax3.plot(pd.Series(episode_lengths).rolling(10).mean(), linewidth=2, label='10-Episode MA')
ax3.set_xlabel('Episode')
ax3.set_ylabel('Number of Trades')
ax3.set_title('Trading Activity', fontsize=14, fontweight='bold')
ax3.legend()
ax3.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(RESULTS_PATH / 'rl_training_metrics.png', dpi=300, bbox_inches='tight')
plt.show()

In [ ]:
# RL training loop
n_episodes = 100
episode_returns = []
episode_lengths = []
win_rates = []

# Use test data for RL fine-tuning
env_data = test_dataset.data[0]  # Use first sample for demonstration
env = StructureTradingEnv(env_data)

print("\n🚀 Starting RL fine-tuning...\n")

for episode in range(n_episodes):
    # Reset environment
    obs = env.reset()
    done = False
    trajectory = {
        'states': [],
        'actions': [],
        'rewards': [],
        'log_probs': [],
        'values': []
    }
    
    # Collect trajectory
    while not done:
        # Convert observation to tensor
        state_tensor = torch.FloatTensor(obs).unsqueeze(0).to(device)
        
        # Get action
        with torch.no_grad():
            # Forward pass
            x = state_tensor.transpose(1, 2)
            embedded = model.embedder(x)
            representation = embedded.mean(dim=1)
            
            # Get action from policy
            output = model.policy_head({
                'embedded': embedded,
                'regime_embedding': torch.zeros(1, 8).to(device),
                'synergy_features': torch.zeros(1, 32).to(device)
            })
            
            action_logits = output['action']
            dist = torch.distributions.Categorical(logits=action_logits)
            action = dist.sample()
            log_prob = dist.log_prob(action)
            
            # Get value
            value = ppo.value_head(representation)
        
        # Step environment
        next_obs, reward, done, info = env.step(action.item())
        
        # Store trajectory
        trajectory['states'].append(state_tensor)
        trajectory['actions'].append(action)
        trajectory['rewards'].append(torch.tensor([reward]))
        trajectory['log_probs'].append(log_prob)
        trajectory['values'].append(value.squeeze())
        
        obs = next_obs
    
    # Process trajectory
    for key in trajectory:
        if trajectory[key]:
            trajectory[key] = torch.stack(trajectory[key]).to(device)
    
    # Update policy
    if len(trajectory['states']) > 0:
        policy_loss, value_loss = ppo.update([trajectory])
    
    # Record metrics
    episode_return = info['total_return']
    episode_returns.append(episode_return)
    episode_lengths.append(len(env.trades))
    
    # Calculate win rate
    winning_trades = sum(1 for r in env.episode_rewards if r > 0)
    total_trades = len([r for r in env.episode_rewards if r != 0])
    win_rate = winning_trades / total_trades if total_trades > 0 else 0
    win_rates.append(win_rate)
    
    # Log progress
    if episode % 10 == 0:
        avg_return = np.mean(episode_returns[-10:])
        avg_win_rate = np.mean(win_rates[-10:])
        print(f"Episode {episode}:")
        print(f"  Avg Return: {avg_return*100:.2f}%")
        print(f"  Avg Win Rate: {avg_win_rate*100:.1f}%")
        print(f"  Trades: {len(env.trades)}")

# Save fine-tuned model
torch.save({
    'model_state_dict': model.state_dict(),
    'value_head_state_dict': ppo.value_head.state_dict(),
    'episode': episode,
    'avg_return': np.mean(episode_returns[-10:]),
    'config': config
}, MODELS_PATH / 'structure_analyzer_finetuned.pt')

print(f"\n✅ RL fine-tuning complete!")
print(f"   Final average return: {np.mean(episode_returns[-10:])*100:.2f}%")
print(f"   Final win rate: {np.mean(win_rates[-10:])*100:.1f}%")

In [ ]:
# RL Environment for Structure Analysis
class StructureTradingEnv:
    """Simplified trading environment for structure-based trading."""
    
    def __init__(self, data, initial_capital=100000, transaction_cost=0.001):
        self.data = data
        self.initial_capital = initial_capital
        self.transaction_cost = transaction_cost
        self.reset()
    
    def reset(self):
        """Reset environment to initial state."""
        self.capital = self.initial_capital
        self.position = 0  # -1: short, 0: neutral, 1: long
        self.current_idx = 48  # Start after first window
        self.episode_rewards = []
        self.trades = []
        
        # Get initial observation
        return self._get_observation()
    
    def _get_observation(self):
        """Get current market observation."""
        # Get 48-bar window
        window = self.data[self.current_idx - 48:self.current_idx]
        return window
    
    def step(self, action):
        """Execute action and return next state."""
        # Actions: 0=hold, 1=buy/long, 2=sell/short
        prev_capital = self.capital
        
        # Get current and next price
        current_price = self.data[self.current_idx, 3]  # Close price
        next_price = self.data[self.current_idx + 1, 3] if self.current_idx + 1 < len(self.data) else current_price
        
        # Execute trade
        if action == 1 and self.position <= 0:  # Buy
            # Close short if exists
            if self.position < 0:
                self.capital = self.capital * (2 - next_price / current_price)
                self.capital *= (1 - self.transaction_cost)
            # Open long
            self.position = 1
            self.capital *= (1 - self.transaction_cost)
            self.trades.append(('BUY', self.current_idx, current_price))
            
        elif action == 2 and self.position >= 0:  # Sell/Short
            # Close long if exists
            if self.position > 0:
                self.capital = self.capital * (next_price / current_price)
                self.capital *= (1 - self.transaction_cost)
            # Open short
            self.position = -1
            self.capital *= (1 - self.transaction_cost)
            self.trades.append(('SELL', self.current_idx, current_price))
        
        # Update position value
        if self.position == 1:  # Long
            self.capital = self.capital * (next_price / current_price)
        elif self.position == -1:  # Short
            self.capital = self.capital * (2 - next_price / current_price)
        
        # Calculate reward
        reward = (self.capital - prev_capital) / prev_capital
        self.episode_rewards.append(reward)
        
        # Move to next step
        self.current_idx += 1
        done = self.current_idx >= len(self.data) - 1
        
        # Get next observation
        next_obs = self._get_observation() if not done else None
        
        info = {
            'capital': self.capital,
            'position': self.position,
            'total_return': (self.capital - self.initial_capital) / self.initial_capital
        }
        
        return next_obs, reward, done, info

# PPO implementation for fine-tuning
class StructurePPO:
    """PPO algorithm for structure-based trading."""
    
    def __init__(self, model, lr=3e-4, gamma=0.99, eps_clip=0.2):
        self.model = model
        self.optimizer = optim.Adam(model.parameters(), lr=lr)
        self.gamma = gamma
        self.eps_clip = eps_clip
        self.value_loss_coef = 0.5
        self.entropy_coef = 0.01
        
        # Add value head for RL
        self.value_head = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        ).to(device)
        
        self.value_optimizer = optim.Adam(self.value_head.parameters(), lr=lr)
    
    def get_action(self, state, deterministic=False):
        """Get action from policy."""
        with torch.no_grad():
            # Forward pass
            x = state.transpose(1, 2)
            embedded = self.model.embedder(x)
            representation = embedded.mean(dim=1)
            
            # Get action logits from policy head
            output = self.model.policy_head({
                'embedded': embedded,
                'regime_embedding': torch.zeros(1, 8).to(device),  # Dummy regime
                'synergy_features': torch.zeros(1, 32).to(device)  # Dummy synergy
            })
            
            action_logits = output['action']
            
            # Sample action
            if deterministic:
                action = torch.argmax(action_logits, dim=1)
            else:
                dist = torch.distributions.Categorical(logits=action_logits)
                action = dist.sample()
            
            # Get value
            value = self.value_head(representation)
            
            return action.item(), value.item()
    
    def compute_returns(self, rewards, values, dones):
        """Compute discounted returns."""
        returns = []
        R = 0
        
        for step in reversed(range(len(rewards))):
            R = rewards[step] + self.gamma * R * (1 - dones[step])
            returns.insert(0, R)
        
        return torch.tensor(returns, dtype=torch.float32)
    
    def update(self, trajectories):
        """Update policy using collected trajectories."""
        # Prepare data
        states = torch.cat([t['states'] for t in trajectories])
        actions = torch.cat([t['actions'] for t in trajectories])
        rewards = torch.cat([t['rewards'] for t in trajectories])
        old_log_probs = torch.cat([t['log_probs'] for t in trajectories])
        values = torch.cat([t['values'] for t in trajectories])
        
        # Compute returns
        returns = self.compute_returns(rewards, values, torch.zeros_like(rewards))
        advantages = returns - values
        
        # Normalize advantages
        advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
        
        # PPO update
        for _ in range(4):  # PPO epochs
            # Forward pass
            x = states.transpose(1, 2)
            embedded = self.model.embedder(x)
            representation = embedded.mean(dim=1)
            
            # Policy output
            output = self.model.policy_head({
                'embedded': embedded,
                'regime_embedding': torch.zeros(len(states), 8).to(device),
                'synergy_features': torch.zeros(len(states), 32).to(device)
            })
            
            action_logits = output['action']
            dist = torch.distributions.Categorical(logits=action_logits)
            new_log_probs = dist.log_prob(actions)
            entropy = dist.entropy().mean()
            
            # Value output
            new_values = self.value_head(representation).squeeze()
            
            # PPO loss
            ratio = torch.exp(new_log_probs - old_log_probs)
            surr1 = ratio * advantages
            surr2 = torch.clamp(ratio, 1 - self.eps_clip, 1 + self.eps_clip) * advantages
            
            policy_loss = -torch.min(surr1, surr2).mean()
            value_loss = F.mse_loss(new_values, returns)
            
            total_loss = policy_loss + self.value_loss_coef * value_loss - self.entropy_coef * entropy
            
            # Update
            self.optimizer.zero_grad()
            self.value_optimizer.zero_grad()
            total_loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
            self.optimizer.step()
            self.value_optimizer.step()
        
        return policy_loss.item(), value_loss.item()

# Initialize PPO
ppo = StructurePPO(model, lr=1e-4)

print("✅ RL fine-tuning setup complete")
print("   Algorithm: PPO")
print("   Learning rate: 1e-4")
print("   Gamma: 0.99")
print("   Epsilon clip: 0.2")

## 7. Reinforcement Learning Fine-tuning

In [ ]:
# Load best model
checkpoint = torch.load(MODELS_PATH / 'structure_analyzer_pretrained.pt', map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

print(f"📊 Evaluating best model from epoch {checkpoint['epoch'] + 1}")
print(f"   Validation F1: {checkpoint['val_f1']:.4f}")
print(f"   Validation Accuracy: {checkpoint['val_acc']:.2f}%")

# Test set evaluation
test_loss, test_acc, test_f1, test_preds, test_labels, test_probs = validate(
    model, test_loader, criterion, device
)

print(f"\n📈 Test Set Performance:")
print(f"   Loss: {test_loss:.4f}")
print(f"   Accuracy: {test_acc:.2f}%")
print(f"   F1 Score: {test_f1:.4f}")

# Confusion matrix
cm = confusion_matrix(test_labels, test_preds)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=structure_names, yticklabels=structure_names)
plt.title('Structure Detection Confusion Matrix', fontsize=16, fontweight='bold')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.savefig(RESULTS_PATH / 'structure_confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

# Classification report
print("\n📋 Classification Report:")
print(classification_report(test_labels, test_preds, target_names=structure_names))

# Per-class performance analysis
print("\n📊 Per-Class Performance:")
for i, name in enumerate(structure_names):
    class_mask = np.array(test_labels) == i
    if class_mask.sum() > 0:
        class_acc = (np.array(test_preds)[class_mask] == i).mean() * 100
        class_samples = class_mask.sum()
        print(f"   {name}: {class_acc:.1f}% accuracy ({class_samples} samples)")

## 6. Model Evaluation

In [ ]:
# Plot training history
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Loss
ax1 = axes[0]
ax1.plot(history['train_loss'], label='Train', linewidth=2)
ax1.plot(history['val_loss'], label='Validation', linewidth=2)
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training Loss', fontsize=14, fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Accuracy
ax2 = axes[1]
ax2.plot(history['train_acc'], label='Train', linewidth=2)
ax2.plot(history['val_acc'], label='Validation', linewidth=2)
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy (%)')
ax2.set_title('Structure Detection Accuracy', fontsize=14, fontweight='bold')
ax2.legend()
ax2.grid(True, alpha=0.3)

# F1 Score
ax3 = axes[2]
ax3.plot(history['train_f1'], label='Train', linewidth=2)
ax3.plot(history['val_f1'], label='Validation', linewidth=2)
ax3.set_xlabel('Epoch')
ax3.set_ylabel('F1 Score')
ax3.set_title('F1 Score (Weighted)', fontsize=14, fontweight='bold')
ax3.legend()
ax3.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(RESULTS_PATH / 'structure_training_history.png', dpi=300, bbox_inches='tight')
plt.show()

In [ ]:
# Training functions
def train_epoch(model, loader, criterion, optimizer, device):
    """Train for one epoch."""
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    progress_bar = tqdm(loader, desc="Training", leave=False)
    for batch in progress_bar:
        # Move data to device
        market_data = batch['market_data'].to(device)
        labels = batch['label'].to(device)
        
        # Forward pass through embedder
        x = market_data.transpose(1, 2)  # Shape: (batch, features, sequence)
        embedded = model.embedder(x)  # Shape: (batch, sequence, hidden_dim)
        
        # Global pooling to get representation
        representation = embedded.mean(dim=1)  # Shape: (batch, hidden_dim)
        
        # Classification
        logits = model.classification_head(representation)
        loss = criterion(logits, labels)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        # Track metrics
        total_loss += loss.item()
        _, preds = torch.max(logits, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        
        # Update progress bar
        progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})
    
    # Calculate metrics
    accuracy = 100 * np.mean(np.array(all_preds) == np.array(all_labels))
    avg_loss = total_loss / len(loader)
    
    # Calculate F1 score
    from sklearn.metrics import f1_score
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return avg_loss, accuracy, f1

def validate(model, loader, criterion, device):
    """Validate the model."""
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    all_probs = []
    
    with torch.no_grad():
        for batch in tqdm(loader, desc="Validating", leave=False):
            market_data = batch['market_data'].to(device)
            labels = batch['label'].to(device)
            
            # Forward pass
            x = market_data.transpose(1, 2)
            embedded = model.embedder(x)
            representation = embedded.mean(dim=1)
            logits = model.classification_head(representation)
            
            # Calculate loss
            loss = criterion(logits, labels)
            total_loss += loss.item()
            
            # Get predictions
            probs = F.softmax(logits, dim=1)
            _, preds = torch.max(logits, 1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
    
    # Calculate metrics
    accuracy = 100 * np.mean(np.array(all_preds) == np.array(all_labels))
    avg_loss = total_loss / len(loader)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return avg_loss, accuracy, f1, all_preds, all_labels, all_probs

# Training loop
n_epochs = 50
best_val_f1 = 0
patience = 10
patience_counter = 0

print("\n🚀 Starting supervised pre-training...\n")

for epoch in range(n_epochs):
    # Train
    train_loss, train_acc, train_f1 = train_epoch(model, train_loader, criterion, optimizer, device)
    
    # Validate
    val_loss, val_acc, val_f1, _, _, _ = validate(model, val_loader, criterion, device)
    
    # Update scheduler
    scheduler.step()
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['train_f1'].append(train_f1)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    history['val_f1'].append(val_f1)
    
    # Save best model
    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        patience_counter = 0
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_f1': val_f1,
            'val_acc': val_acc,
            'config': config
        }, MODELS_PATH / 'structure_analyzer_pretrained.pt')
        print(f"  💾 New best model saved (F1: {val_f1:.4f})")
    else:
        patience_counter += 1
    
    # Print progress
    print(f"Epoch {epoch+1}/{n_epochs}:")
    print(f"  Train - Loss: {train_loss:.4f}, Acc: {train_acc:.2f}%, F1: {train_f1:.4f}")
    print(f"  Val   - Loss: {val_loss:.4f}, Acc: {val_acc:.2f}%, F1: {val_f1:.4f}")
    print(f"  LR: {scheduler.get_last_lr()[0]:.6f}")
    
    # Early stopping
    if patience_counter >= patience:
        print(f"\n⚠️ Early stopping triggered after {epoch+1} epochs")
        break

print(f"\n✅ Pre-training complete! Best validation F1: {best_val_f1:.4f}")

In [ ]:
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False, num_workers=2)

# Training setup
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=config['learning_rate'], weight_decay=config['weight_decay'])
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50, eta_min=1e-5)

# Training metrics
history = {
    'train_loss': [], 'train_acc': [], 'train_f1': [],
    'val_loss': [], 'val_acc': [], 'val_f1': []
}

print(f"📚 Training setup complete:")
print(f"   Optimizer: AdamW (lr={config['learning_rate']}, wd={config['weight_decay']})")
print(f"   Scheduler: CosineAnnealingLR")
print(f"   Batch size: {config['batch_size']}")
print(f"   Train batches: {len(train_loader)}")
print(f"   Val batches: {len(val_loader)}")

## 5. Supervised Pre-training

In [ ]:
# Load configuration
config = {
    'window': 48,  # 48 30-minute bars = 24 hours
    'input_features': 8,  # OHLCV + EMA21 + EMA50 + ATR
    'hidden_dim': 256,
    'n_heads': 8,
    'n_layers': 4,
    'dropout': 0.1,
    'learning_rate': 1e-3,
    'weight_decay': 1e-5,
    'batch_size': 32,
    'n_structure_classes': 4,  # Number of structure types
}

# Initialize Structure Analyzer
print("🏗️ Initializing Structure Analyzer model...")
model = StructureAnalyzer(config).to(device)

# Add structure classification head for supervised training
class StructureClassificationHead(nn.Module):
    """Classification head for structure type prediction."""
    
    def __init__(self, input_dim=256, hidden_dim=128, n_classes=4):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim // 2, n_classes)
        )
    
    def forward(self, x):
        return self.layers(x)

# Attach classification head
model.classification_head = StructureClassificationHead(
    input_dim=256,
    n_classes=config['n_structure_classes']
).to(device)

# Model summary
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"\n✅ Model initialized successfully:")
print(f"   Total parameters: {total_params:,}")
print(f"   Trainable parameters: {trainable_params:,}")
print(f"   Device: {device}")
print(f"\n📊 Model Architecture:")
print(f"   - Window size: {config['window']} bars")
print(f"   - Input features: {config['input_features']}")
print(f"   - Hidden dimension: {config['hidden_dim']}")
print(f"   - Attention heads: {config['n_heads']}")
print(f"   - Transformer layers: {config['n_layers']}")

## 4. Initialize Structure Analyzer Model

In [ ]:
# Analyze label distribution
structure_names = ['Strong Trend', 'Range Bound', 'Breakout', 'Reversal']

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for idx, (dataset, name) in enumerate([(train_dataset, 'Train'), 
                                        (val_dataset, 'Validation'), 
                                        (test_dataset, 'Test')]):
    # Count labels
    labels = dataset.labels
    counts = np.bincount(labels, minlength=4)
    
    # Plot
    ax = axes[idx]
    bars = ax.bar(range(4), counts, color=['#2ecc71', '#3498db', '#e74c3c', '#f39c12'])
    ax.set_title(f'{name} Set Distribution', fontsize=14, fontweight='bold')
    ax.set_xlabel('Structure Type')
    ax.set_ylabel('Count')
    ax.set_xticks(range(4))
    ax.set_xticklabels(structure_names, rotation=45, ha='right')
    
    # Add count labels on bars
    for bar, count in zip(bars, counts):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{count}\n({count/len(labels)*100:.1f}%)',
                ha='center', va='bottom')

plt.tight_layout()
plt.savefig(RESULTS_PATH / 'structure_label_distribution.png', dpi=300, bbox_inches='tight')
plt.show()

# Analyze feature distributions
print("\n📊 Feature Statistics:")
for split_name, dataset in [('Train', train_dataset), ('Val', val_dataset), ('Test', test_dataset)]:
    features = dataset.structure_features
    print(f"\n{split_name} Set:")
    feature_names = ['Price Change', 'Trend Consistency', 'Volatility', 'Volume Mean', 
                     'Volume Trend', 'Price Position', 'Volume Imbalance', 'Avg Bar Range', 'Close Location']
    
    for i, name in enumerate(feature_names):
        print(f"  {name}: μ={features[:, i].mean():.4f}, σ={features[:, i].std():.4f}")

## 3. Visualize Data Distribution

In [ ]:
# Create custom dataset for structure analysis
class StructureDataset(Dataset):
    """Custom dataset for structure analyzer training."""
    
    def __init__(self, data_file, split='train', transform=None):
        """Initialize dataset.
        
        Args:
            data_file: Path to HDF5 data file
            split: Dataset split ('train', 'val', 'test')
            transform: Optional data transformations
        """
        self.data_file = Path(data_file)
        self.split = split
        self.transform = transform
        
        # Load data
        self._load_data()
        
        # Generate structure labels
        self._generate_labels()
        
        logger.info(f"Loaded {split} dataset with {len(self)} samples")
    
    def _load_data(self):
        """Load data from HDF5 file."""
        if self.data_file.exists():
            with h5py.File(self.data_file, 'r') as f:
                # Load 30m data for structure analyzer
                if f'structure/{self.split}/data' in f:
                    self.data = f[f'structure/{self.split}/data'][:]
                    self.dates = [d.decode('utf-8') for d in f[f'structure/{self.split}/dates'][:]]
                else:
                    # Fallback to generic data structure
                    self.data = f[f'{self.split}/30m'][:]
                    self.dates = None
                
                # Load prices for label generation
                if f'prices/{self.split}' in f:
                    self.prices = f[f'prices/{self.split}'][:]
                else:
                    # Extract from data (assuming OHLC format)
                    self.prices = self.data[:, :, 3]  # Close prices
                
                # Load metadata
                self.metadata = {}
                if 'metadata' in f:
                    for key in f['metadata'].attrs:
                        self.metadata[key] = f['metadata'].attrs[key]
        else:
            # Generate synthetic data for demonstration
            print("Generating synthetic data...")
            self._generate_synthetic_data()
    
    def _generate_synthetic_data(self):
        """Generate synthetic market data for demonstration."""
        n_samples = {'train': 10000, 'val': 2000, 'test': 2000}[self.split]
        
        # Generate 30m data (48 bars × 8 features)
        # Features: open, high, low, close, volume, ema_21, ema_50, atr
        self.data = np.zeros((n_samples, 48, 8))
        
        for i in range(n_samples):
            # Generate price series with trend
            base_price = 1.0 + np.random.randn() * 0.1
            trend = np.random.choice([-1, 0, 1]) * 0.001
            volatility = 0.01 + np.random.rand() * 0.02
            
            prices = [base_price]
            for t in range(1, 48):
                change = trend + np.random.randn() * volatility
                prices.append(prices[-1] * (1 + change))
            
            prices = np.array(prices)
            
            # Generate OHLC
            for t in range(48):
                self.data[i, t, 3] = prices[t]  # Close
                self.data[i, t, 0] = prices[t] * (1 - np.random.rand() * 0.002)  # Open
                self.data[i, t, 1] = prices[t] * (1 + np.random.rand() * 0.003)  # High
                self.data[i, t, 2] = prices[t] * (1 - np.random.rand() * 0.003)  # Low
                self.data[i, t, 4] = 1000 + np.random.rand() * 1000  # Volume
                
                # Technical indicators
                if t >= 21:
                    self.data[i, t, 5] = prices[t-21:t+1].mean()  # EMA 21
                if t >= 50:
                    self.data[i, t, 6] = prices[t-50:t+1].mean()  # EMA 50
                self.data[i, t, 7] = volatility * prices[t]  # ATR proxy
        
        self.prices = self.data[:, :, 3]  # Close prices
        self.dates = None
        self.metadata = {'synthetic': True}
    
    def _generate_labels(self):
        """Generate structure labels from market data."""
        self.labels = []
        self.structure_features = []
        
        for i in range(len(self.data)):
            # Extract features for structure analysis
            features = self._extract_structure_features(self.data[i])
            self.structure_features.append(features)
            
            # Generate label based on structure type
            label = self._classify_structure(features)
            self.labels.append(label)
        
        self.labels = np.array(self.labels)
        self.structure_features = np.array(self.structure_features)
    
    def _extract_structure_features(self, window):
        """Extract structure-relevant features from data window."""
        # Price features
        close_prices = window[:, 3]
        highs = window[:, 1]
        lows = window[:, 2]
        volumes = window[:, 4]
        
        # Trend features
        price_change = (close_prices[-1] - close_prices[0]) / close_prices[0]
        trend_consistency = np.sum(np.diff(close_prices) > 0) / len(close_prices)
        
        # Volatility features
        returns = np.diff(close_prices) / close_prices[:-1]
        volatility = np.std(returns) if len(returns) > 0 else 0
        
        # Volume profile
        volume_mean = np.mean(volumes)
        volume_std = np.std(volumes)
        volume_trend = np.polyfit(range(len(volumes)), volumes, 1)[0] if len(volumes) > 1 else 0
        
        # Support/Resistance features
        pivot_high = np.max(highs)
        pivot_low = np.min(lows)
        price_position = (close_prices[-1] - pivot_low) / (pivot_high - pivot_low + 1e-8)
        
        # Market depth proxy (using volume distribution)
        volume_imbalance = np.sum(volumes[len(volumes)//2:]) / (np.sum(volumes[:len(volumes)//2]) + 1e-8)
        
        # Microstructure features
        avg_bar_range = np.mean(highs - lows)
        close_location = np.mean((close_prices - lows) / (highs - lows + 1e-8))
        
        features = {
            'price_change': price_change,
            'trend_consistency': trend_consistency,
            'volatility': volatility,
            'volume_mean': volume_mean,
            'volume_trend': volume_trend,
            'price_position': price_position,
            'volume_imbalance': volume_imbalance,
            'avg_bar_range': avg_bar_range,
            'close_location': close_location
        }
        
        return np.array(list(features.values()))
    
    def _classify_structure(self, features):
        """Classify market structure type.
        
        Structure types:
        0: Strong Trend (Clear directional movement)
        1: Range Bound (Consolidation between support/resistance)
        2: Breakout (High volatility with directional bias)
        3: Reversal (Trend exhaustion patterns)
        """
        price_change = features[0]
        trend_consistency = features[1]
        volatility = features[2]
        price_position = features[5]
        
        # Classification logic
        if abs(price_change) > 0.02 and trend_consistency > 0.7:
            return 0  # Strong Trend
        elif volatility < 0.01 and abs(price_change) < 0.005:
            return 1  # Range Bound
        elif volatility > 0.02 and abs(price_change) > 0.01:
            return 2  # Breakout
        elif (price_position > 0.9 or price_position < 0.1) and trend_consistency < 0.5:
            return 3  # Reversal
        else:
            # Default based on dominant characteristic
            if trend_consistency > 0.6:
                return 0
            else:
                return 1
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        """Get a single sample."""
        # Get data
        market_data = torch.FloatTensor(self.data[idx])
        structure_features = torch.FloatTensor(self.structure_features[idx])
        label = torch.LongTensor([self.labels[idx]])
        
        # Apply transform if provided
        if self.transform:
            market_data = self.transform(market_data)
        
        return {
            'market_data': market_data,
            'structure_features': structure_features,
            'label': label.squeeze(),
            'prices': torch.FloatTensor(self.prices[idx])
        }

# Create datasets
print("\n📊 Creating datasets...")

# Use existing data file or create synthetic data
data_file = DATA_PATH / "structure_training_data.h5"

train_dataset = StructureDataset(data_file, split='train')
val_dataset = StructureDataset(data_file, split='val')
test_dataset = StructureDataset(data_file, split='test')

print(f"\n✅ Datasets created:")
print(f"   Train: {len(train_dataset)} samples")
print(f"   Val: {len(val_dataset)} samples")
print(f"   Test: {len(test_dataset)} samples")

In [ ]:
# Data loading configuration
DATA_PATH = BASE_PATH / "data" / "processed"

# Check available data files
print("📂 Checking available data files...")
if DATA_PATH.exists():
    data_files = list(DATA_PATH.glob("*.h5"))
    print(f"Found {len(data_files)} HDF5 files:")
    for f in data_files:
        print(f"  - {f.name}")
else:
    print(f"❌ Data path does not exist: {DATA_PATH}")
    print("Creating mock data for demonstration...")
    DATA_PATH.mkdir(parents=True, exist_ok=True)

## 2. Load Training Data

In [ ]:
# Import dependencies
try:
    # Core ML libraries
    import torch.nn as nn
    import torch.optim as optim
    import torch.nn.functional as F
    from torch.utils.data import DataLoader, TensorDataset, Dataset
    
    # Scientific computing
    from scipy import stats
    from sklearn.metrics import confusion_matrix, classification_report
    from sklearn.preprocessing import StandardScaler
    
    # Visualization
    import matplotlib.patches as mpatches
    from matplotlib.gridspec import GridSpec
    
    # Local imports
    from agents.marl.agents.structure_analyzer import StructureAnalyzer
    from agents.synergy.detector import SynergyDetector
    from training.data_prep import MarketDataPipeline, DataLoader as MARLDataLoader
    from training.rewards.reward_functions import StructureReward
    
    print("✅ All dependencies loaded successfully")
    
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("Installing missing dependencies...")
    
    # Install missing packages
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "scipy", "scikit-learn"])
    
    # Retry imports
    from scipy import stats
    from sklearn.metrics import confusion_matrix, classification_report
    from sklearn.preprocessing import StandardScaler

In [ ]:
# Environment setup and imports
import torch
import os
import sys
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import json
from tqdm import tqdm
import structlog
from pathlib import Path

# Configure structured logging
structlog.configure(
    processors=[
        structlog.stdlib.filter_by_level,
        structlog.stdlib.add_logger_name,
        structlog.stdlib.add_log_level,
        structlog.stdlib.PositionalArgumentsFormatter(),
        structlog.processors.TimeStamper(fmt="iso"),
        structlog.processors.StackInfoRenderer(),
        structlog.processors.format_exc_info,
        structlog.dev.ConsoleRenderer()
    ],
    context_class=dict,
    logger_factory=structlog.stdlib.LoggerFactory(),
    cache_logger_on_first_use=True,
)

logger = structlog.get_logger()

# GPU check
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if device.type == 'cuda':
    print(f"✅ GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("⚠️ No GPU available, using CPU")

# Set paths
BASE_PATH = Path("/home/QuantNova/AlgoSpace")
sys.path.insert(0, str(BASE_PATH))
sys.path.insert(0, str(BASE_PATH / "src"))

# Create necessary directories
MODELS_PATH = BASE_PATH / "models" / "agents"
RESULTS_PATH = BASE_PATH / "results" / "structure_agent"
MODELS_PATH.mkdir(parents=True, exist_ok=True)
RESULTS_PATH.mkdir(parents=True, exist_ok=True)

print(f"✅ Base path: {BASE_PATH}")
print(f"✅ Models path: {MODELS_PATH}")
print(f"✅ Results path: {RESULTS_PATH}")

## 1. Environment Setup

# Structure Analyzer Agent Training

This notebook trains the Structure Analyzer agent individually before multi-agent training.

## Agent Overview:
The Structure Analyzer identifies major market trends, support/resistance levels, and overall market structure using:
- Volume profiles and market depth analysis
- Microstructure feature extraction
- Support/resistance level detection
- Trend structure evaluation

## Training Strategy:
- Pre-train on historical structure patterns
- Fine-tune with reinforcement learning
- Optimize for structure pattern recognition and trend analysis

## Key Features:
- 30-minute timeframe analysis (48×8 matrix)
- Integration with synergy detection patterns
- Focus on market microstructure and volume analysis