---
## Step 1: Setup and Imports

In [None]:
import os
import sys
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score

# Add parent directory for MAPS import
sys.path.insert(0, '..')

# Clear cached imports
for mod_name in list(sys.modules.keys()):
    if 'MAPS' in mod_name or 'maps' in mod_name:
        del sys.modules[mod_name]

from MAPS.cell_phenotyping import Trainer, Predictor
from MAPS.cell_phenotyping.datasets import CellExpressionCSV

print("‚úÖ All imports successful!")
print(f"üìÇ Working directory: {os.getcwd()}")

‚úÖ All imports successful!
üìÇ Working directory: c:\Users\mahee\OneDrive\Documents\Semester-12\FYDP-2\MAPS\Experiments_my_ideas


In [None]:
# Check GPU
print("üñ•Ô∏è Hardware Check:")
print(f"   CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   CUDA version: {torch.version.cuda}")
    device = torch.device('cuda')
else:
    print("   Running on CPU")
    device = torch.device('cpu')

üñ•Ô∏è Hardware Check:
   CUDA available: True
   GPU: NVIDIA GeForce GTX 1650
   CUDA version: 12.1


---
## Step 2: Define Custom 6-Layer MLP

### Architecture Comparison:

**MAPS 4-Layer MLP:**
```
Input ‚Üí Linear(512) ‚Üí ReLU ‚Üí Dropout
      ‚Üí Linear(512) ‚Üí ReLU ‚Üí Dropout
      ‚Üí Linear(512) ‚Üí ReLU ‚Üí Dropout
      ‚Üí Linear(512) ‚Üí ReLU ‚Üí Dropout
      ‚Üí Classifier(num_classes)
```

**Our 6-Layer MLP:**
```
Input ‚Üí Linear(512) ‚Üí ReLU ‚Üí Dropout
      ‚Üí Linear(512) ‚Üí ReLU ‚Üí Dropout
      ‚Üí Linear(512) ‚Üí ReLU ‚Üí Dropout
      ‚Üí Linear(512) ‚Üí ReLU ‚Üí Dropout
      ‚Üí Linear(512) ‚Üí ReLU ‚Üí Dropout  ‚Üê NEW
      ‚Üí Linear(512) ‚Üí ReLU ‚Üí Dropout  ‚Üê NEW
      ‚Üí Classifier(num_classes)
```

In [None]:
class MLP_6Layer(nn.Module):
    """
    6-Layer Multi-Layer Perceptron
    
    Deeper architecture with 6 hidden layers instead of 4.
    """
    def __init__(self, input_dim=50, hidden_dim=512, num_classes=16, dropout=0.10):
        super(MLP_6Layer, self).__init__()
        
        # 6 hidden layers
        self.fc = nn.Sequential(
            # Layer 1
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            
            # Layer 2
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            
            # Layer 3
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            
            # Layer 4
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            
            # Layer 5 (NEW)
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            
            # Layer 6 (NEW)
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout)
        )
        
        # Output classifier
        self.classifier = nn.Linear(hidden_dim, num_classes)
    
    def forward(self, batch):
        """
        Forward pass
        
        Args:
            batch: Input tensor of shape (batch_size, input_dim)
        
        Returns:
            logits: Raw output scores
            probs: Softmax probabilities
        """
        features = self.fc(batch)
        logits = self.classifier(features)
        probs = torch.softmax(logits, dim=-1)
        return logits, probs

print("‚úÖ 6-Layer MLP defined!")

# Test instantiation
test_model = MLP_6Layer(input_dim=50, hidden_dim=512, num_classes=16, dropout=0.10)
total_params = sum(p.numel() for p in test_model.parameters())
print(f"\nüìä Model Statistics:")
print(f"   Total parameters: {total_params:,}")
print(f"\n   (4-layer MAPS has ~1.3M params, this has more!)")

‚úÖ 6-Layer MLP defined!

üìä Model Statistics:
   Total parameters: 1,347,600

   (4-layer MAPS has ~1.3M params, this has more!)


---
## Step 3: Load Preprocessed Data

In [None]:
# Use the data preprocessed from previous notebook
data_dir = './cHL_CODEX_processed'
train_path = os.path.join(data_dir, 'train.csv')
valid_path = os.path.join(data_dir, 'valid.csv')
class_path = os.path.join(data_dir, 'class_names.csv')

# Check if files exist
if not os.path.exists(train_path):
    print("‚ùå Preprocessed data not found!")
    print("   Please run 'cHL_CODEX_training_comparison.ipynb' first to generate the data.")
else:
    print("‚úÖ Found preprocessed data!")
    
    # Load class info
    class_df = pd.read_csv(class_path)
    NUM_CLASSES = len(class_df)
    
    # Load to check dimensions
    train_df = pd.read_csv(train_path)
    NUM_FEATURES = len(train_df.columns) - 1  # Exclude 'cell_label'
    
    print(f"\nüìä Dataset Info:")
    print(f"   Training samples: {len(train_df):,}")
    print(f"   Features: {NUM_FEATURES}")
    print(f"   Classes: {NUM_CLASSES}")
    print(f"\n   Class names: {', '.join(class_df['class_name'].tolist())}")

‚úÖ Found preprocessed data!

üìä Dataset Info:
   Training samples: 114,984
   Features: 50
   Classes: 16

   Class names: B, CD4, CD8, DC, Endothelial, Epithelial, Lymphatic, M1, M2, Mast, Monocyte, NK, Neutrophil, Other, TReg, Tumor


---
## Step 4: Custom Training Loop for 6-Layer MLP

Since we're using a custom architecture, we need a custom training loop.

In [None]:
# Training configuration
BATCH_SIZE = 512 if torch.cuda.is_available() else 128
LEARNING_RATE = 0.001
DROPOUT = 0.10
MAX_EPOCHS = 100
MIN_EPOCHS = 20
PATIENCE = 20
SEED = 42

print("‚öôÔ∏è Training Configuration:")
print(f"   Batch size: {BATCH_SIZE}")
print(f"   Learning rate: {LEARNING_RATE}")
print(f"   Dropout: {DROPOUT}")
print(f"   Max epochs: {MAX_EPOCHS}")
print(f"   Patience: {PATIENCE}")
print(f"   Device: {device}")

‚öôÔ∏è Training Configuration:
   Batch size: 512
   Learning rate: 0.001
   Dropout: 0.1
   Max epochs: 100
   Patience: 20
   Device: cuda


In [None]:
# Set seeds for reproducibility
import random

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

print("‚úÖ Seeds set for reproducibility")

‚úÖ Seeds set for reproducibility


In [None]:
# Load data
print("üìÇ Loading datasets...")

train_dataset = CellExpressionCSV(train_path, is_train=True)
valid_dataset = CellExpressionCSV(valid_path, is_train=False, 
                                  mean=train_dataset.mean, 
                                  std=train_dataset.std)

train_loader = CellExpressionCSV.get_data_loader(train_dataset, batch_size=BATCH_SIZE, 
                                                  is_train=True, num_workers=4 if torch.cuda.is_available() else 0)
valid_loader = CellExpressionCSV.get_data_loader(valid_dataset, batch_size=BATCH_SIZE, 
                                                  is_train=False, num_workers=4 if torch.cuda.is_available() else 0)

print(f"‚úÖ Data loaded!")
print(f"   Train batches: {len(train_loader)}")
print(f"   Valid batches: {len(valid_loader)}")

üìÇ Loading datasets...
‚úÖ Data loaded!
   Train batches: 224
   Valid batches: 57


In [None]:
# Initialize model
print("üöÄ Initializing 6-Layer MLP...")

model = MLP_6Layer(input_dim=NUM_FEATURES, hidden_dim=512, 
                   num_classes=NUM_CLASSES, dropout=DROPOUT)
model.to(device, dtype=torch.float64)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

print("‚úÖ Model initialized!")
print(f"   Parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"   Device: {next(model.parameters()).device}")

üöÄ Initializing 6-Layer MLP...
‚úÖ Model initialized!
   Parameters: 1,347,600
   Device: cuda:0


In [None]:
# Training function
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    for features, labels in loader:
        features = features.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        logits, probs = model(features)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        preds = torch.argmax(probs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
    
    avg_loss = total_loss / len(loader)
    accuracy = accuracy_score(all_labels, all_preds)
    return avg_loss, accuracy

# Validation function
def validate_epoch(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for features, labels in loader:
            features = features.to(device)
            labels = labels.to(device)
            
            logits, probs = model(features)
            loss = criterion(logits, labels)
            
            total_loss += loss.item()
            preds = torch.argmax(probs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    avg_loss = total_loss / len(loader)
    accuracy = accuracy_score(all_labels, all_preds)
    return avg_loss, accuracy, all_preds, all_labels

print("‚úÖ Training functions defined!")

‚úÖ Training functions defined!


---
## Step 5: Train the 6-Layer Model

In [None]:
print("\n" + "="*60)
print("TRAINING 6-LAYER MLP")
print("="*60)

# Training tracking
results_dir = './results_6layer_model/'
os.makedirs(results_dir, exist_ok=True)

history = {
    'train_loss': [], 'train_acc': [],
    'valid_loss': [], 'valid_acc': []
}

best_valid_loss = float('inf')
patience_counter = 0
best_model_path = os.path.join(results_dir, 'best_6layer_model.pt')

start_time = time.time()

for epoch in range(MAX_EPOCHS):
    epoch_start = time.time()
    
    # Train
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    
    # Validate
    valid_loss, valid_acc, _, _ = validate_epoch(model, valid_loader, criterion, device)
    
    # Record
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['valid_loss'].append(valid_loss)
    history['valid_acc'].append(valid_acc)
    
    epoch_time = time.time() - epoch_start
    
    # Print progress
    print(f"Epoch {epoch+1:3d}/{MAX_EPOCHS} | "
          f"Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | "
          f"Valid Loss: {valid_loss:.4f} Acc: {valid_acc:.4f} | "
          f"Time: {epoch_time:.1f}s")
    
    # Save best model
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        patience_counter = 0
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_mean': train_dataset.mean,
            'train_std': train_dataset.std,
            'valid_loss': valid_loss,
            'valid_acc': valid_acc
        }, best_model_path)
        print(f"  ‚Üí üíæ Saved best model (valid_loss: {valid_loss:.4f})")
    else:
        patience_counter += 1
        if patience_counter >= PATIENCE and epoch >= MIN_EPOCHS:
            print(f"\n‚èπÔ∏è  Early stopping triggered after {epoch+1} epochs")
            break

training_time = time.time() - start_time

print("\n" + "="*60)
print("‚úÖ TRAINING COMPLETE!")
print("="*60)
print(f"\n‚è±Ô∏è  Total time: {training_time:.2f} seconds ({training_time/60:.2f} minutes)")
print(f"üìä Epochs: {len(history['train_loss'])}")
print(f"üéØ Best valid loss: {best_valid_loss:.4f}")

# Save training history
history_df = pd.DataFrame(history)
history_df.to_csv(os.path.join(results_dir, 'training_logs.csv'), index=False)
print(f"üíæ Training logs saved to: {results_dir}training_logs.csv")


TRAINING 6-LAYER MLP
Epoch   1/100 | Train Loss: 2.4323 Acc: 0.1515 | Valid Loss: 2.1758 Acc: 0.2343 | Time: 48.1s
  ‚Üí üíæ Saved best model (valid_loss: 2.1758)
Epoch   2/100 | Train Loss: 1.6592 Acc: 0.4250 | Valid Loss: 1.4830 Acc: 0.4708 | Time: 44.8s
  ‚Üí üíæ Saved best model (valid_loss: 1.4830)
Epoch   3/100 | Train Loss: 1.0859 Acc: 0.6524 | Valid Loss: 1.2219 Acc: 0.5892 | Time: 46.7s
  ‚Üí üíæ Saved best model (valid_loss: 1.2219)
Epoch   4/100 | Train Loss: 0.9274 Acc: 0.7087 | Valid Loss: 1.0693 Acc: 0.6444 | Time: 47.4s
  ‚Üí üíæ Saved best model (valid_loss: 1.0693)
Epoch   5/100 | Train Loss: 0.8178 Acc: 0.7434 | Valid Loss: 0.8966 Acc: 0.7027 | Time: 45.2s
  ‚Üí üíæ Saved best model (valid_loss: 0.8966)
Epoch   6/100 | Train Loss: 0.7249 Acc: 0.7734 | Valid Loss: 0.9113 Acc: 0.7071 | Time: 45.2s
Epoch   7/100 | Train Loss: 0.6642 Acc: 0.7926 | Valid Loss: 0.7626 Acc: 0.7539 | Time: 46.2s
  ‚Üí üíæ Saved best model (valid_loss: 0.7626)
Epoch   8/100 | Train Loss

---
## Step 6: Visualize Training Progress

In [None]:
# Plot training curves
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

epochs = range(1, len(history['train_loss']) + 1)

# Loss
axes[0].plot(epochs, history['train_loss'], 'b-', label='Training', linewidth=2)
axes[0].plot(epochs, history['valid_loss'], 'r-', label='Validation', linewidth=2)
axes[0].set_xlabel('Epoch', fontsize=12)
axes[0].set_ylabel('Loss', fontsize=12)
axes[0].set_title('6-Layer MLP: Training vs Validation Loss', fontsize=14, fontweight='bold')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Accuracy
axes[1].plot(epochs, history['train_acc'], 'b-', label='Training', linewidth=2)
axes[1].plot(epochs, history['valid_acc'], 'r-', label='Validation', linewidth=2)
axes[1].set_xlabel('Epoch', fontsize=12)
axes[1].set_ylabel('Accuracy', fontsize=12)
axes[1].set_title('6-Layer MLP: Training vs Validation Accuracy', fontsize=14, fontweight='bold')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nüìä Training Summary:")
print(f"   Final train accuracy: {history['train_acc'][-1]:.4f}")
print(f"   Final valid accuracy: {history['valid_acc'][-1]:.4f}")
print(f"   Best valid accuracy: {max(history['valid_acc']):.4f}")
print(f"   Time per epoch: {training_time/len(epochs):.2f}s")

---
## Step 7: Evaluate 6-Layer Model

In [None]:
# Load best model
print("üì¶ Loading best 6-layer model...")
checkpoint = torch.load(best_model_path)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

print(f"‚úÖ Loaded model from epoch {checkpoint['epoch']+1}")
print(f"   Valid loss: {checkpoint['valid_loss']:.4f}")
print(f"   Valid acc: {checkpoint['valid_acc']:.4f}")

In [None]:
# Get predictions
print("\nüîÆ Making predictions...")
_, _, pred_labels_6layer, gt_labels = validate_epoch(model, valid_loader, criterion, device)

accuracy_6layer = accuracy_score(gt_labels, pred_labels_6layer)
print(f"\nüéØ 6-LAYER MODEL ACCURACY: {accuracy_6layer:.4f} ({accuracy_6layer*100:.2f}%)")

In [None]:
# Classification report
print("\nüìã CLASSIFICATION REPORT (6-Layer Model):")
print("="*70)
print(classification_report(gt_labels, pred_labels_6layer, 
                          target_names=class_df['class_name'].tolist(), 
                          digits=3))

In [None]:
# Confusion matrix
cm_6layer = confusion_matrix(gt_labels, pred_labels_6layer)

plt.figure(figsize=(14, 12))
sns.heatmap(cm_6layer, annot=True, fmt='d', cmap='Greens', 
            xticklabels=class_df['class_name'].tolist(),
            yticklabels=class_df['class_name'].tolist())
plt.title('Confusion Matrix - 6-Layer MLP', fontsize=14, fontweight='bold')
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

---
## Step 8: Compare with 4-Layer and Pretrained Models

In [None]:
# Load results from previous 4-layer experiment
prev_results_path = './results_new_model/training_logs.csv'

if os.path.exists(prev_results_path):
    print("üìä Loading 4-layer model results...")
    
    # Train 4-layer predictor to get accuracy
    trainer_4layer = Trainer(
        model_checkpoint_path='./results_new_model/best_checkpoint.pt',
        results_dir='./results_new_model/',
        num_features=NUM_FEATURES,
        num_classes=NUM_CLASSES,
        batch_size=BATCH_SIZE
    )
    
    pred_labels_4layer, _ = trainer_4layer.predict(valid_path)
    accuracy_4layer = accuracy_score(gt_labels, pred_labels_4layer)
    
    print(f"‚úÖ 4-layer model accuracy: {accuracy_4layer:.4f}")
else:
    print("‚ö†Ô∏è 4-layer model results not found. Run the previous notebook first.")
    accuracy_4layer = None
    pred_labels_4layer = None

In [None]:
# Load MAPS pretrained model
pretrained_path = '../models/cell_phenotyping/cHL_CODEX.pt'

if os.path.exists(pretrained_path):
    print("üì¶ Loading MAPS pretrained model...")
    
    pretrained_predictor = Predictor(
        model_checkpoint_path=pretrained_path,
        num_features=NUM_FEATURES,
        num_classes=NUM_CLASSES,
        batch_size=BATCH_SIZE
    )
    
    pred_labels_pretrained, _ = pretrained_predictor.predict(valid_path)
    accuracy_pretrained = accuracy_score(gt_labels, pred_labels_pretrained)
    
    print(f"‚úÖ Pretrained model accuracy: {accuracy_pretrained:.4f}")
else:
    print("‚ö†Ô∏è Pretrained model not found.")
    accuracy_pretrained = None
    pred_labels_pretrained = None

In [None]:
# Comprehensive comparison
print("\n" + "="*70)
print("MODEL COMPARISON")
print("="*70)

print(f"\nüÜï 6-Layer MLP (your model):         {accuracy_6layer:.4f} ({accuracy_6layer*100:.2f}%)")

if accuracy_4layer is not None:
    print(f"üìä 4-Layer MLP (standard MAPS):      {accuracy_4layer:.4f} ({accuracy_4layer*100:.2f}%)")
    print(f"   ‚Üí Difference (6L vs 4L): {(accuracy_6layer - accuracy_4layer)*100:+.2f}%")

if accuracy_pretrained is not None:
    print(f"üì¶ Pretrained MAPS (500 epochs):     {accuracy_pretrained:.4f} ({accuracy_pretrained*100:.2f}%)")
    print(f"   ‚Üí Difference (6L vs pretrained): {(accuracy_6layer - accuracy_pretrained)*100:+.2f}%")

print("\nüìà Analysis:")
if accuracy_4layer is not None:
    if accuracy_6layer > accuracy_4layer:
        print("   ‚úÖ The deeper 6-layer architecture performs BETTER!")
        print("   üí° More layers helped capture complex patterns.")
    elif accuracy_6layer > accuracy_4layer - 0.01:
        print("   ‚öñÔ∏è  Both architectures perform similarly.")
        print("   üí° Adding layers didn't hurt, but didn't help much either.")
    else:
        print("   ‚ö†Ô∏è The 4-layer model performed slightly better.")
        print("   üí° Deeper isn't always better - may need more regularization.")

---
## Step 9: Per-Class Comparison

In [None]:
# Calculate F1 scores per class
if accuracy_4layer is not None:
    print("\nüìä PER-CLASS F1 SCORE COMPARISON:")
    print("="*100)
    
    comparison_data = []
    for i, class_name in enumerate(class_df['class_name']):
        gt_binary = (np.array(gt_labels) == i).astype(int)
        
        pred_6l_binary = (np.array(pred_labels_6layer) == i).astype(int)
        f1_6layer = f1_score(gt_binary, pred_6l_binary, zero_division=0)
        
        pred_4l_binary = (np.array(pred_labels_4layer) == i).astype(int)
        f1_4layer = f1_score(gt_binary, pred_4l_binary, zero_division=0)
        
        comparison_data.append({
            'Class': class_name,
            'Count': (np.array(gt_labels) == i).sum(),
            '6-Layer F1': f1_6layer,
            '4-Layer F1': f1_4layer,
            'Improvement': f1_6layer - f1_4layer
        })
    
    comp_df = pd.DataFrame(comparison_data)
    comp_df = comp_df.sort_values('Improvement', ascending=False)
    
    print(comp_df.to_string(index=False, float_format=lambda x: f"{x:.3f}"))
    
    # Visualize
    plt.figure(figsize=(15, 6))
    x = np.arange(len(comp_df))
    width = 0.35
    
    bars1 = plt.bar(x - width/2, comp_df['6-Layer F1'], width, label='6-Layer MLP', alpha=0.8, color='green')
    bars2 = plt.bar(x + width/2, comp_df['4-Layer F1'], width, label='4-Layer MLP', alpha=0.8, color='blue')
    
    plt.xlabel('Cell Type', fontsize=12)
    plt.ylabel('F1 Score', fontsize=12)
    plt.title('Per-Class Performance: 6-Layer vs 4-Layer MLP', fontsize=14, fontweight='bold')
    plt.xticks(x, comp_df['Class'], rotation=45, ha='right')
    plt.legend()
    plt.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Highlight improvements
    improvements = comp_df[comp_df['Improvement'] > 0.02]
    if len(improvements) > 0:
        print("\nüéâ Classes where 6-layer improves significantly:")
        for _, row in improvements.iterrows():
            print(f"   {row['Class']}: +{row['Improvement']:.3f}")
    
    degradations = comp_df[comp_df['Improvement'] < -0.02]
    if len(degradations) > 0:
        print("\n‚ö†Ô∏è Classes where 6-layer performs worse:")
        for _, row in degradations.iterrows():
            print(f"   {row['Class']}: {row['Improvement']:.3f}")

---
## Step 10: Final Summary & Conclusions

In [None]:
print("\n" + "="*70)
print("üèÅ EXPERIMENT SUMMARY: 6-LAYER MLP")
print("="*70)

print(f"\nüèóÔ∏è Architecture:")
print(f"   Layers: 6 hidden layers (vs 4 in standard MAPS)")
print(f"   Hidden units: 512 per layer")
print(f"   Dropout: {DROPOUT}")
print(f"   Total parameters: {sum(p.numel() for p in model.parameters()):,}")

print(f"\n‚è±Ô∏è Training:")
print(f"   Time: {training_time:.2f} seconds ({training_time/60:.2f} minutes)")
print(f"   Epochs: {len(history['train_loss'])}")
print(f"   Device: {device}")
print(f"   Batch size: {BATCH_SIZE}")

print(f"\nüéØ Performance:")
print(f"   6-Layer MLP: {accuracy_6layer:.4f} ({accuracy_6layer*100:.2f}%)")
if accuracy_4layer is not None:
    print(f"   4-Layer MLP: {accuracy_4layer:.4f} ({accuracy_4layer*100:.2f}%)")
    print(f"   Improvement: {(accuracy_6layer - accuracy_4layer)*100:+.2f}%")

print(f"\nüíæ Saved Files:")
print(f"   Model: {best_model_path}")
print(f"   Logs: {results_dir}training_logs.csv")

print("\nüî¨ Conclusions:")
if accuracy_4layer is not None:
    diff = accuracy_6layer - accuracy_4layer
    if diff > 0.01:
        print("   ‚úÖ Deeper architecture (6 layers) provides measurable improvement")
        print("   üí° Consider trying even deeper networks (8 layers?)")
    elif diff > -0.01:
        print("   ‚öñÔ∏è  Performance is comparable - depth didn't significantly help")
        print("   üí° 4 layers may be sufficient for this task")
    else:
        print("   ‚ö†Ô∏è Deeper network underperformed slightly")
        print("   üí° May need different hyperparameters (learning rate, dropout)")

print("\nüí° Next Experiments to Try:")
print("   1. Try 8-layer MLP (go even deeper)")
print("   2. Try different hidden dimensions (256, 768, 1024)")
print("   3. Try different dropout rates (0.05, 0.15, 0.20)")
print("   4. Try batch normalization between layers")
print("   5. Try residual connections (ResNet-style)")
print("   6. Try different activation functions (LeakyReLU, ELU)")

print("\n‚úÖ Experiment complete!")