In [None]:
# ===============================================================
# Comprehensive Implementation with Modifications for Debugging
# ===============================================================

# -----------------------------
# 1. Import Libraries
# -----------------------------
import math
import json
import logging
import numpy as np
import torch
import random
import threading
import time
import subprocess
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
from dataclasses import dataclass
from torchvision import models
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from torchvision.models import resnet18, ResNet18_Weights
from torch.utils.tensorboard import SummaryWriter
import matplotlib
matplotlib.use('TkAgg')  # Ensure the correct backend is used
import matplotlib.pyplot as plt
import os
import tkinter as tk
from tkinter import ttk
import queue
from PIL import Image, ImageTk
import sys
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg

# Set default font to reduce font scanning time
matplotlib.rcParams['font.family'] = 'DejaVu Sans'

# -----------------------------
# 2. Configure Logging and Seed
# -----------------------------

# Configure logging
logging.basicConfig(level=logging.INFO, stream=sys.stdout)
logger = logging.getLogger(__name__)

# Set random seeds for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed()

# -----------------------------
# 3. Define Constants
# -----------------------------

GRID_SIZE = 30        # Fixed grid size (adjust as needed)
NUM_CLASSES = 11      # 0-10, where 10 represents dead squares

# -----------------------------
# 4. Define Data Structures and Loading Functions
# -----------------------------

# Data Class for Grid Pairs
@dataclass
class GridPair:
    task_id: str
    input_grid: list
    output_grid: list

# Load ARC Data
def load_arc_data():
    file_paths = {
        "arc-agi_training-challenges": "arc-agi_training_challenges.json",
        "arc-agi_evaluation-challenges": "arc-agi_evaluation_challenges.json",
        "arc-agi_training-solutions": "arc-agi_training_solutions.json",
        "arc-agi_evaluation-solutions": "arc-agi_evaluation_solutions.json",
    }
    arc_data = {}
    for key, path in file_paths.items():
        try:
            with open(path, 'r') as f:
                arc_data[key] = json.load(f)
                logger.info(f"Loaded {key} from {path}.")
        except (FileNotFoundError, json.JSONDecodeError) as e:
            logger.error(f"Error loading {path}: {e}")
            arc_data[key] = {}
    return arc_data

# Reshape to Fixed Square Grid
def reshape_to_square_grid(flat_list, grid_size=30):
    required_length = grid_size * grid_size
    current_length = len(flat_list)
    
    if current_length > required_length:
        # Truncate if the grid is larger than grid_size x grid_size
        flat_list = flat_list[:required_length]
    else:
        # Pad with -1 to reach the required length
        flat_list = np.pad(flat_list, (0, required_length - current_length), 'constant', constant_values=-1)
    
    return flat_list.reshape(grid_size, grid_size).tolist()

# Extract and Reshape Grid
def extract_and_reshape_grid(grid, grid_size=30):
    try:
        # Flatten the grid if it's a list of lists
        if isinstance(grid[0], list):
            flat_list = [item for sublist in grid for item in sublist]
        else:
            flat_list = grid
        return reshape_to_square_grid(flat_list, grid_size)
    except Exception as e:
        logger.error(f"Error processing grid: {e}")
        return None

# Flatten and Reshape Grid Data
def flatten_and_reshape(task_data, grid_size=30):
    flattened_pairs = []
    for task_id, task_content in task_data.items():
        logger.info(f"Parsing task {task_id}...")
        train_pairs = task_content.get('train', [])
        for pair in train_pairs:
            input_grid = extract_and_reshape_grid(pair.get("input"), grid_size)
            output_grid = extract_and_reshape_grid(pair.get("output"), grid_size)
            if input_grid and output_grid:
                flattened_pairs.append(GridPair(task_id, input_grid, output_grid))
            else:
                logger.warning(f"Task ID: {task_id} has invalid input/output grids.")
    logger.info(f"Total valid grid pairs extracted: {len(flattened_pairs)}")
    return flattened_pairs

# -----------------------------
# 5. Data Augmentation Functions
# -----------------------------

def augment_grid(grid, noise_prob=0.2, dead_square_prob=0.1):
    """Applies augmentation to the grid by adding noise and dead squares."""
    augmented_grid = np.array(grid).copy()

    for i in range(augmented_grid.shape[0]):
        for j in range(augmented_grid.shape[1]):
            if random.random() < noise_prob:
                augmented_grid[i, j] = random.randint(0, NUM_CLASSES - 2)  # Add noise within 0-9
            if random.random() < dead_square_prob:
                augmented_grid[i, j] = -1  # Dead square
    return augmented_grid.tolist()

def rotate_grid(grid):
    """Randomly rotates the grid."""
    rotations = random.choice([0, 1, 2, 3])
    return np.rot90(grid, rotations).tolist()

def flip_grid(grid):
    """Randomly flips the grid."""
    flip_choice = random.choice(['none', 'vertical', 'horizontal'])
    if flip_choice == 'vertical':
        return np.flipud(grid).tolist()  # Vertical flip
    elif flip_choice == 'horizontal':
        return np.fliplr(grid).tolist()  # Horizontal flip
    else:
        return grid  # No flip

# Generate Multiple Augmented Datasets
def generate_multiple_augmented_datasets(grid_pairs, num_augmented_sets=3):
    augmented_pairs = []
    for _ in range(num_augmented_sets):
        for pair in grid_pairs:
            augmented_input = augment_grid(pair.input_grid)
            augmented_input = rotate_grid(augmented_input)
            augmented_input = flip_grid(augmented_input)
            augmented_pairs.append(GridPair(pair.task_id, augmented_input, pair.output_grid))
    return augmented_pairs

# -----------------------------
# 6. Custom Collate Function
# -----------------------------

def collate_fn(batch):
    """
    Stack input and output grids into batch tensors.
    All grids are already padded to GRID_SIZE x GRID_SIZE.
    """
    inputs, outputs = zip(*batch)  # Unzip the batch
    inputs = torch.stack(inputs)    # Shape: (batch_size, 1, GRID_SIZE, GRID_SIZE)
    outputs = torch.stack(outputs)  # Shape: (batch_size, GRID_SIZE, GRID_SIZE)
    return inputs, outputs

# -----------------------------
# 7. PyTorch Dataset Class
# -----------------------------

class AugmentedARCDataset(Dataset):
    def __init__(self, grid_pairs, augment=False):
        self.grid_pairs = grid_pairs
        self.augment = augment

    def __len__(self):
        return len(self.grid_pairs)

    def __getitem__(self, idx):
        pair = self.grid_pairs[idx]
        input_grid = pair.input_grid
        output_grid = pair.output_grid

        if self.augment:
            input_grid = augment_grid(input_grid)
            input_grid = rotate_grid(input_grid)
            input_grid = flip_grid(input_grid)

        # Convert to tensors
        input_tensor = torch.tensor(input_grid, dtype=torch.float32).unsqueeze(0)  # Shape: (1, GRID_SIZE, GRID_SIZE)

        # Map -1 to NUM_CLASSES -1 (10)
        output_grid_mapped = [
            [NUM_CLASSES - 1 if cell == -1 else cell for cell in row]
            for row in output_grid
        ]
        output_tensor = torch.tensor(output_grid_mapped, dtype=torch.long)  # Shape: (GRID_SIZE, GRID_SIZE)

        return input_tensor, output_tensor

# -----------------------------
# 8. Define the Deep Neural Network Model
# -----------------------------

class CNNGridMapper(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES, grid_size=GRID_SIZE):
        super(CNNGridMapper, self).__init__()
        self.grid_size = grid_size
        self.num_classes = num_classes

        # CNN Backbone: ResNet18 pretrained on ImageNet
        self.cnn = resnet18(weights=ResNet18_Weights.DEFAULT)
        # Modify the first convolutional layer to accept single-channel input
        self.cnn.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        # Initialize the new conv1 weights
        nn.init.kaiming_normal_(self.cnn.conv1.weight, mode='fan_out', nonlinearity='relu')
        # Remove the fully connected layer and average pool
        self.cnn_layers = nn.Sequential(*list(self.cnn.children())[:-2])  # Output: (batch_size, 512, H, W)

        # Interpolation to match GRID_SIZE
        self.interpolate = nn.Upsample(size=(grid_size, grid_size), mode='bilinear', align_corners=False)

        # RNN Module: LSTM
        # Treat each row as a sequence of cells
        self.rnn = nn.LSTM(input_size=512,  # Number of features per cell from CNN
                           hidden_size=128,
                           num_layers=2,
                           batch_first=True,
                           bidirectional=True)

        # Fully Connected Layers
        self.fc = nn.Linear(128 * 2, num_classes)  # *2 for bidirectional

    def forward(self, x):
        batch_size = x.size(0)

        # Pass through CNN
        features = self.cnn_layers(x)  # Shape: (batch_size, 512, H, W)

        # Interpolate to (512, GRID_SIZE, GRID_SIZE)
        features = self.interpolate(features)  # Shape: (batch_size, 512, GRID_SIZE, GRID_SIZE)

        # Reshape to (batch_size, GRID_SIZE, GRID_SIZE, 512)
        features = features.permute(0, 2, 3, 1)  # Shape: (batch_size, GRID_SIZE, GRID_SIZE, 512)

        # Reshape to (batch_size * GRID_SIZE, GRID_SIZE, 512) for RNN
        features = features.contiguous().view(batch_size * self.grid_size, self.grid_size, 512)

        # Pass through RNN
        rnn_out, _ = self.rnn(features)  # Shape: (batch_size * GRID_SIZE, GRID_SIZE, hidden_size * 2)

        # Pass through Fully Connected layer
        logits = self.fc(rnn_out)  # Shape: (batch_size * GRID_SIZE, GRID_SIZE, num_classes)

        # Reshape logits back to (batch_size, GRID_SIZE * GRID_SIZE, num_classes)
        logits = logits.contiguous().view(batch_size, self.grid_size * self.grid_size, self.num_classes)

        return logits  # (batch_size, GRID_SIZE * GRID_SIZE, num_classes)

# -----------------------------
# 9. GUI Class
# -----------------------------

class TrainingGUI:
    """
    A Tkinter-based GUI that displays real-time training progress, including current epoch, loss, and accuracy.
    """
    def __init__(self, root, total_epochs):
        self.root = root
        self.root.title("Model Training Progress Tracker")
        self.queue = queue.Queue()

        # Create main frame
        self.frame = tk.Frame(root)
        self.frame.pack(fill=tk.BOTH, expand=1)

        # Initialize GUI components
        self.epoch_label = tk.Label(self.frame, text=f"Epoch: 0/{total_epochs}", font=("Helvetica", 14))
        self.epoch_label.pack(pady=5)

        self.loss_label = tk.Label(self.frame, text="Loss: 0.0000", font=("Helvetica", 12))
        self.loss_label.pack(pady=2)

        self.accuracy_label = tk.Label(self.frame, text="Accuracy: 0.0000", font=("Helvetica", 12))
        self.accuracy_label.pack(pady=2)

        self.val_loss_label = tk.Label(self.frame, text="Validation Loss: 0.0000", font=("Helvetica", 12))
        self.val_loss_label.pack(pady=2)

        self.val_accuracy_label = tk.Label(self.frame, text="Validation Accuracy: 0.0000", font=("Helvetica", 12))
        self.val_accuracy_label.pack(pady=2)

        self.progress_bar = ttk.Progressbar(self.frame, orient="horizontal", length=400, mode="determinate")
        self.progress_bar.pack(pady=10)

        # Real-time plots
        self.fig, self.ax = plt.subplots(figsize=(6, 4))
        self.line_loss, = self.ax.plot([], [], label='Training Loss', color='blue')
        self.line_val_loss, = self.ax.plot([], [], label='Validation Loss', color='orange')
        self.line_acc, = self.ax.plot([], [], label='Training Accuracy', color='green')
        self.line_val_acc, = self.ax.plot([], [], label='Validation Accuracy', color='red')
        self.ax.set_xlabel('Epochs')
        self.ax.set_ylabel('Metrics')
        self.ax.legend()
        self.ax.grid(True)
        self.canvas_plot = FigureCanvasTkAgg(self.fig, master=self.frame)
        self.canvas_plot.draw()
        self.canvas_plot.get_tk_widget().pack()

        self.loss_data = []
        self.val_loss_data = []
        self.acc_data = []
        self.val_acc_data = []

        # Start processing the queue
        self.root.after(100, self.process_queue)

    def process_queue(self):
        """
        Process the queue for thread-safe GUI updates.
        """
        while not self.queue.empty():
            message = self.queue.get()
            if isinstance(message, dict):
                self.update_gui(message)
        self.root.after(100, self.process_queue)

    def update_gui(self, data):
        """
        Updates the GUI elements with new training epoch information.
        """
        self.epoch_label.config(text=f"Epoch: {data['epoch']}/{data['total_epochs']}")
        self.loss_label.config(text=f"Loss: {data['loss']:.4f}")
        self.accuracy_label.config(text=f"Accuracy: {data['accuracy']:.4f}")
        self.val_loss_label.config(text=f"Validation Loss: {data['val_loss']:.4f}")
        self.val_accuracy_label.config(text=f"Validation Accuracy: {data['val_accuracy']:.4f}")

        # Update progress bar
        self.progress_bar["value"] = (data['epoch'] / data['total_epochs']) * 100
        self.root.update_idletasks()

        # Update plots
        self.loss_data.append(data['loss'])
        self.val_loss_data.append(data['val_loss'])
        self.acc_data.append(data['accuracy'])
        self.val_acc_data.append(data['val_accuracy'])

        self.line_loss.set_data(range(1, len(self.loss_data) + 1), self.loss_data)
        self.line_val_loss.set_data(range(1, len(self.val_loss_data) + 1), self.val_loss_data)
        self.line_acc.set_data(range(1, len(self.acc_data) + 1), self.acc_data)
        self.line_val_acc.set_data(range(1, len(self.val_acc_data) + 1), self.val_acc_data)

        self.ax.relim()
        self.ax.autoscale_view()
        self.canvas_plot.draw()

# -----------------------------
# 10. Training Function with GUI Integration
# -----------------------------

def train_deep_model(model, train_loader, val_loader, epochs, lr, device, gui, patience=5):
    logger.info("Starting the training process.")
    torch.autograd.set_detect_anomaly(True)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=lr)
    model.to(device)

    # Use GradScaler and autocast only if CUDA is available
    use_amp = torch.cuda.is_available()
    if use_amp:
        scaler = GradScaler()
    else:
        scaler = None

    best_val_loss = float('inf')
    epochs_no_improve = 0

    for epoch in range(1, epochs + 1):
        logger.info(f"Starting epoch {epoch}/{epochs}.")
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs = inputs.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()

            if use_amp:
                with autocast():
                    outputs = model(inputs)
                    targets_flat = targets.view(-1)
                    outputs_flat = outputs.view(-1, NUM_CLASSES)
                    loss = criterion(outputs_flat, targets_flat)
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
            else:
                outputs = model(inputs)
                targets_flat = targets.view(-1)
                outputs_flat = outputs.view(-1, NUM_CLASSES)
                loss = criterion(outputs_flat, targets_flat)
                loss.backward()
                optimizer.step()

            running_loss += loss.item()

            _, predicted = torch.max(outputs_flat.detach(), 1)
            correct += (predicted == targets_flat).sum().item()
            total += targets_flat.size(0)

            if batch_idx % 10 == 0:
                logger.info(f"Epoch [{epoch}/{epochs}], Batch [{batch_idx}/{len(train_loader)}], Loss: {loss.item():.4f}")

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = correct / total
        logger.info(f"Epoch [{epoch}/{epochs}] completed. Training Loss: {epoch_loss:.4f}, Training Accuracy: {epoch_acc:.4f}")

        # Validation Phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs = inputs.to(device)
                targets = targets.to(device)

                if use_amp:
                    with autocast():
                        outputs = model(inputs)
                        targets_flat = targets.view(-1)
                        outputs_flat = outputs.view(-1, NUM_CLASSES)
                        loss = criterion(outputs_flat, targets_flat)
                else:
                    outputs = model(inputs)
                    targets_flat = targets.view(-1)
                    outputs_flat = outputs.view(-1, NUM_CLASSES)
                    loss = criterion(outputs_flat, targets_flat)

                val_loss += loss.item()

                _, predicted = torch.max(outputs_flat.detach(), 1)
                val_correct += (predicted == targets_flat).sum().item()
                val_total += targets_flat.size(0)

        avg_val_loss = val_loss / len(val_loader)
        val_acc = val_correct / val_total
        logger.info(f"Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {val_acc:.4f}")

        # Update GUI
        gui.queue.put({
            'epoch': epoch,
            'total_epochs': epochs,
            'loss': epoch_loss,
            'accuracy': epoch_acc,
            'val_loss': avg_val_loss,
            'val_accuracy': val_acc
        })

        # Check for improvement
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            epochs_no_improve = 0
            # Save the best model
            torch.save(model.state_dict(), "best_deep_model.pth")
            logger.info(f"Epoch {epoch}/{epochs} - Validation loss decreased. Saving model.")
        else:
            epochs_no_improve += 1
            logger.info(f"Epoch {epoch}/{epochs} - No improvement in validation loss for {epochs_no_improve} epochs.")

        # Early Stopping
        if epochs_no_improve >= patience:
            logger.info("Early stopping triggered.")
            break

    logger.info("Training completed.")

# -----------------------------
# 11. Evaluation and Prediction Functions
# -----------------------------

def evaluate_model(model, test_loader, device='cpu'):
    """
    Evaluates the model on the test dataset.
    
    Args:
        model (nn.Module): Trained model.
        test_loader (DataLoader): DataLoader for the test dataset.
        device (str): Device to run evaluation on.
    
    Returns:
        tuple: (average_loss, accuracy)
    """
    criterion = nn.CrossEntropyLoss()
    model.to(device)
    model.eval()

    total_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)  # Shape: (batch_size, 900, num_classes)
            loss = criterion(outputs.view(-1, NUM_CLASSES), targets.view(-1))
            total_loss += loss.item() * inputs.size(0)

            _, predicted = torch.max(outputs.view(-1, NUM_CLASSES), 1)
            correct += (predicted == targets.view(-1)).sum().item()
            total += targets.view(-1).size(0)

    avg_loss = total_loss / len(test_loader.dataset)
    accuracy = correct / total

    logger.info(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}")
    return avg_loss, accuracy

def visualize_predictions(model, test_loader, device='cpu', num_samples=5):
    """
    Visualizes a few predictions made by the model.
    
    Args:
        model (nn.Module): Trained model.
        test_loader (DataLoader): DataLoader for the test dataset.
        device (str): Device to run inference on.
        num_samples (int): Number of samples to visualize.
    """
    model.to(device)
    model.eval()

    samples_visualized = 0

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)  # Shape: (batch_size, 900, num_classes)
            _, predicted = torch.max(outputs, 2)  # Shape: (batch_size, 900)

            for i in range(inputs.size(0)):
                input_grid = inputs[i].cpu().numpy().squeeze()  # Shape: (GRID_SIZE, GRID_SIZE)
                predicted_grid = predicted[i].cpu().numpy().reshape(GRID_SIZE, GRID_SIZE)
                actual_grid = targets[i].cpu().numpy().reshape(GRID_SIZE, GRID_SIZE)

                fig, axs = plt.subplots(1, 3, figsize=(15, 5))

                # Input Grid
                axs[0].imshow(input_grid, cmap='viridis', interpolation='nearest')
                axs[0].set_title("Input Grid")
                axs[0].axis('off')

                # Predicted Grid
                axs[1].imshow(predicted_grid, cmap='viridis', interpolation='nearest')
                axs[1].set_title("Predicted Grid")
                axs[1].axis('off')

                # Actual Grid
                axs[2].imshow(actual_grid, cmap='viridis', interpolation='nearest')
                axs[2].set_title("Actual Grid")
                axs[2].axis('off')

                plt.tight_layout()
                plt.show()

                samples_visualized += 1
                if samples_visualized >= num_samples:
                    return

# -----------------------------
# 12. Main Workflow with Modifications
# -----------------------------

def main():
    # Define device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logger.info(f"Using device: {device}")

    # Load ARC data
    arc_data = load_arc_data()

    # Extract and reshape training and evaluation grid pairs
    train_grid_pairs = flatten_and_reshape(arc_data.get("arc-agi_training-challenges", {}), grid_size=GRID_SIZE)
    eval_grid_pairs = flatten_and_reshape(arc_data.get("arc-agi_evaluation-challenges", {}), grid_size=GRID_SIZE)

    logger.info(f"Number of training grid pairs: {len(train_grid_pairs)}")
    logger.info(f"Number of evaluation grid pairs: {len(eval_grid_pairs)}")

    # Generate multiple augmented datasets
    augmented_pairs = generate_multiple_augmented_datasets(train_grid_pairs, num_augmented_sets=3)

    # Combine all datasets
    combined_train_pairs = train_grid_pairs + augmented_pairs

    # Split into training and validation sets (e.g., 80-20 split)
    train_pairs, val_pairs = train_test_split(combined_train_pairs, test_size=0.2, random_state=42)

    # Reduce dataset size for testing
    train_pairs = train_pairs[:50]  # Use 50 samples for quick testing
    val_pairs = val_pairs[:10]      # Use 10 samples for validation

    # Create DataLoaders
    train_dataset = AugmentedARCDataset(train_pairs, augment=False)  # Already augmented
    val_dataset = AugmentedARCDataset(val_pairs, augment=False)
    eval_dataset = AugmentedARCDataset(eval_grid_pairs, augment=False)

    train_loader = DataLoader(
        train_dataset, 
        batch_size=4,  # Reduced batch size for debugging
        shuffle=True, 
        collate_fn=collate_fn,
        num_workers=0  # Set to 0 to avoid multiprocessing issues on Windows
    )

    val_loader = DataLoader(
        val_dataset, 
        batch_size=4, 
        shuffle=False, 
        collate_fn=collate_fn,
        num_workers=0  # Set to 0 to avoid multiprocessing issues on Windows
    )

    eval_loader = DataLoader(
        eval_dataset, 
        batch_size=4, 
        shuffle=False, 
        collate_fn=collate_fn,
        num_workers=0
    )

    logger.info(f"Training DataLoader size: {len(train_loader)} batches")
    logger.info(f"Validation DataLoader size: {len(val_loader)} batches")
    logger.info(f"Number of training samples: {len(train_dataset)}")
    logger.info(f"Number of validation samples: {len(val_dataset)}")
    logger.info(f"Number of evaluation samples: {len(eval_dataset)}")

    # Initialize the model
    model = CNNGridMapper(num_classes=NUM_CLASSES, grid_size=GRID_SIZE).to(device)
    logger.info("Model initialized successfully.")

    # Test model forward and backward pass
    try:
        model.train()
        sample_inputs, sample_targets = next(iter(train_loader))
        sample_inputs = sample_inputs.to(device)
        sample_targets = sample_targets.to(device)

        outputs = model(sample_inputs)
        targets_flat = sample_targets.view(-1)
        outputs_flat = outputs.view(-1, NUM_CLASSES)
        criterion = nn.CrossEntropyLoss()
        loss = criterion(outputs_flat, targets_flat)

        # Backward pass
        loss.backward()
        logger.info("Single batch forward and backward pass successful.")
    except Exception as e:
        logger.exception("Model forward or backward pass failed.")
        return

    # Initialize the GUI
    root = tk.Tk()
    total_epochs = 2  # Reduced for quick testing
    gui = TrainingGUI(root, total_epochs)

    # Define the training thread inside main()
    def train_thread():
        logger.info("Training thread started.")
        try:
            train_deep_model(model, train_loader, val_loader, epochs=total_epochs, lr=1e-3,
                             device=device, gui=gui, patience=10)
            logger.info("Training completed successfully.")
            # After training, evaluate the model
            evaluate_model(model, val_loader, device=device)
            visualize_predictions(model, val_loader, device=device, num_samples=5)
            # Evaluate on evaluation set
            logger.info("Evaluating on evaluation set.")
            evaluate_model(model, eval_loader, device=device)
            visualize_predictions(model, eval_loader, device=device, num_samples=5)
            logger.info("All processes completed successfully.")
        except Exception as e:
            logger.exception("An error occurred in the training thread.")

    # Run training directly for debugging
    train_thread()

    root.mainloop()

# -----------------------------
# 13. Execute the Main Function
# -----------------------------

if __name__ == "__main__":
    main()


In [None]:
# ===============================================================
# Comprehensive Implementation for ARC Dataset Training with TensorBoard
# ===============================================================

# -----------------------------
# 1. Install Necessary Packages
# -----------------------------
# Note: Uncomment the following lines if running for the first time.
# %pip install torch torchvision tensorboard tqdm matplotlib

# -----------------------------
# 2. Import Libraries
# -----------------------------
import math
import json
import logging
import numpy as np
import torch
import random
import threading
import time
import subprocess
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
from dataclasses import dataclass
from torchvision import models
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from torchvision.models import ResNet18_Weights
from torch.utils.tensorboard import SummaryWriter
from tqdm.notebook import tqdm  # For progress bars in Jupyter
import matplotlib.pyplot as plt

# -----------------------------
# 3. Configure Logging and Seed
# -----------------------------

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Set random seeds for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed()

# -----------------------------
# 4. Define Constants
# -----------------------------

GRID_SIZE = 30        # Fixed grid size (adjust as needed)
NUM_CLASSES = 11      # 0-10, where 10 represents dead squares

# -----------------------------
# 5. Define Data Structures and Loading Functions
# -----------------------------

# Data Class for Grid Pairs
@dataclass
class GridPair:
    task_id: str
    input_grid: list
    output_grid: list

# Load ARC Data
def load_arc_data():
    file_paths = {
        "arc-agi_training-challenges": "arc-agi_training_challenges.json",
        "arc-agi_evaluation-challenges": "arc-agi_evaluation_challenges.json",
        "arc-agi_training_solutions": "arc-agi_training_solutions.json",
        "arc-agi_evaluation_solutions": "arc-agi_evaluation_solutions.json",
    }
    arc_data = {}
    for key, path in file_paths.items():
        try:
            with open(path, 'r') as f:
                arc_data[key] = json.load(f)
                logger.info(f"Loaded {key} from {path}.")
        except (FileNotFoundError, json.JSONDecodeError) as e:
            logger.error(f"Error loading {path}: {e}")
            arc_data[key] = {}
    return arc_data

# Reshape to Fixed Square Grid
def reshape_to_square_grid(flat_list, grid_size=30):
    required_length = grid_size * grid_size
    current_length = len(flat_list)
    
    if current_length > required_length:
        # Truncate if the grid is larger than grid_size x grid_size
        flat_list = flat_list[:required_length]
    else:
        # Pad with -1 to reach the required length
        flat_list = np.pad(flat_list, (0, required_length - current_length), 'constant', constant_values=-1)
    
    return flat_list.reshape(grid_size, grid_size).tolist()

# Extract and Reshape Grid
def extract_and_reshape_grid(grid, grid_size=30):
    try:
        # Flatten the grid if it's a list of lists
        if isinstance(grid, list):
            flat_list = [item for sublist in grid for item in sublist]
        else:
            flat_list = [grid]
        return reshape_to_square_grid(flat_list, grid_size)
    except Exception as e:
        logger.error(f"Error processing grid: {e}")
        return None

# Flatten and Reshape Grid Data
def flatten_and_reshape(task_data, grid_size=30):
    flattened_pairs = []
    for task_id, task_content in task_data.items():
        logger.info(f"Parsing task {task_id}...")
        train_pairs = task_content.get('train', [])
        for pair in train_pairs:
            input_grid = extract_and_reshape_grid(pair.get("input"), grid_size)
            output_grid = extract_and_reshape_grid(pair.get("output"), grid_size)
            if input_grid and output_grid:
                flattened_pairs.append(GridPair(task_id, input_grid, output_grid))
            else:
                logger.warning(f"Task ID: {task_id} has invalid input/output grids.")
    logger.info(f"Total valid grid pairs extracted: {len(flattened_pairs)}")
    return flattened_pairs

# -----------------------------
# 6. Data Augmentation Functions
# -----------------------------

def augment_grid(grid, noise_prob=0.2, dead_square_prob=0.1):
    """Applies augmentation to the grid by adding noise and dead squares."""
    augmented_grid = np.array(grid).copy()

    for i in range(augmented_grid.shape[0]):
        for j in range(augmented_grid.shape[1]):
            if random.random() < noise_prob:
                augmented_grid[i, j] = random.randint(0, NUM_CLASSES - 2)  # Add noise within 0-9
            if random.random() < dead_square_prob:
                augmented_grid[i, j] = -1  # Dead square
    return augmented_grid.tolist()

def rotate_grid(grid):
    """Randomly rotates the grid."""
    rotations = random.choice([0, 1, 2, 3])
    return np.rot90(grid, rotations).tolist()

def flip_grid(grid):
    """Randomly flips the grid."""
    if random.random() > 0.5:
        return np.flipud(grid).tolist()  # Vertical flip
    else:
        return np.fliplr(grid).tolist()  # Horizontal flip

# -----------------------------
# 7. Custom Collate Function
# -----------------------------

def collate_fn(batch):
    """
    Stack input and output grids into batch tensors.
    All grids are already padded to GRID_SIZE x GRID_SIZE.
    """
    inputs, outputs = zip(*batch)  # Unzip the batch
    inputs = torch.stack(inputs)    # Shape: (batch_size, 1, GRID_SIZE, GRID_SIZE)
    outputs = torch.stack(outputs)  # Shape: (batch_size, GRID_SIZE, GRID_SIZE)
    return inputs, outputs

# -----------------------------
# 8. PyTorch Dataset Class
# -----------------------------

class AugmentedARCDataset(Dataset):
    def __init__(self, grid_pairs, augment=True):
        self.grid_pairs = grid_pairs
        self.augment = augment

    def __len__(self):
        return len(self.grid_pairs)

    def __getitem__(self, idx):
        pair = self.grid_pairs[idx]
        input_grid = pair.input_grid
        output_grid = pair.output_grid

        if self.augment:
            input_grid = augment_grid(input_grid)
            input_grid = rotate_grid(input_grid)
            input_grid = flip_grid(input_grid)

        # Convert to tensors
        input_tensor = torch.tensor(input_grid, dtype=torch.float32).unsqueeze(0)  # Shape: (1, GRID_SIZE, GRID_SIZE)

        # Map -1 to NUM_CLASSES -1 (10)
        output_grid_mapped = [
            [NUM_CLASSES - 1 if cell == -1 else cell for cell in row]
            for row in output_grid
        ]
        output_tensor = torch.tensor(output_grid_mapped, dtype=torch.long)  # Shape: (GRID_SIZE, GRID_SIZE)

        return input_tensor, output_tensor

# -----------------------------
# 9. Define the Deep Neural Network Model
# -----------------------------

class CNNGridMapper(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES, grid_size=GRID_SIZE):
        super(CNNGridMapper, self).__init__()
        self.grid_size = grid_size
        self.num_classes = num_classes

        # CNN Backbone: ResNet18 pretrained on ImageNet
        self.cnn = models.resnet18(weights=ResNet18_Weights.DEFAULT)
        # Modify the first convolutional layer to accept single-channel input
        self.cnn.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        # Initialize the new conv1 weights
        nn.init.kaiming_normal_(self.cnn.conv1.weight, mode='fan_out', nonlinearity='relu')
        # Remove the fully connected layer and average pool
        self.cnn = nn.Sequential(*list(self.cnn.children())[:-2])  # Output: (batch_size, 512, H, W)

        # Interpolation to match GRID_SIZE
        self.interpolate = nn.Upsample(size=(grid_size, grid_size), mode='bilinear', align_corners=False)

        # RNN Module: LSTM
        # Treat each row as a sequence of cells
        self.rnn = nn.LSTM(input_size=512,  # Number of features per cell from CNN
                           hidden_size=128,
                           num_layers=2,
                           batch_first=True,
                           bidirectional=True)

        # Fully Connected Layers
        self.fc = nn.Linear(128 * 2, num_classes)  # *2 for bidirectional

    def forward(self, x):
        # x: (batch_size, 1, GRID_SIZE, GRID_SIZE)
        batch_size = x.size(0)

        # Pass through CNN
        features = self.cnn(x)  # Shape: (batch_size, 512, H, W)

        # Interpolate to (512, GRID_SIZE, GRID_SIZE)
        features = self.interpolate(features)  # Shape: (batch_size, 512, GRID_SIZE, GRID_SIZE)

        # Reshape to (batch_size, GRID_SIZE, GRID_SIZE, 512)
        features = features.permute(0, 2, 3, 1)  # Shape: (batch_size, GRID_SIZE, GRID_SIZE, 512)

        # Reshape to (batch_size * GRID_SIZE, GRID_SIZE, 512) for RNN
        features = features.reshape(batch_size * self.grid_size, self.grid_size, 512)  # Shape: (batch_size*30, 30, 512)

        # Pass through RNN
        rnn_out, _ = self.rnn(features)  # Shape: (batch_size*30, 30, 256)

        # Pass through Fully Connected layer
        logits = self.fc(rnn_out)  # Shape: (batch_size*30, 30, 11)

        # Reshape logits back to (batch_size, GRID_SIZE * GRID_SIZE, num_classes)
        logits = logits.reshape(batch_size, self.grid_size, self.grid_size, self.num_classes)  # Shape: (batch_size, 30, 30, 11)
        logits = logits.reshape(batch_size, self.grid_size * self.grid_size, self.num_classes)  # Shape: (batch_size, 900, 11)

        return logits  # (batch_size, 900, num_classes)

# -----------------------------
# 10. Training Function with TensorBoard Integration and Progress Bars
# -----------------------------

def train_deep_model(model, train_loader, val_loader, epochs, lr, device, patience=5):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=lr)
    scaler = GradScaler()  # Initialize GradScaler for mixed precision

    model.to(device)

    # Initialize TensorBoard writer
    writer = SummaryWriter('runs/deep_model')

    best_val_loss = float('inf')
    epochs_no_improve = 0

    for epoch in range(1, epochs + 1):
        logger.info(f"Starting Epoch {epoch}/{epochs}")
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        # Initialize tqdm progress bar for training
        train_bar = tqdm(train_loader, desc=f"Epoch {epoch} Training", leave=False)
        for batch_idx, (inputs, targets) in enumerate(train_bar, 1):
            inputs = inputs.to(device)    # Shape: (batch_size, 1, GRID_SIZE, GRID_SIZE)
            targets = targets.to(device)  # Shape: (batch_size, GRID_SIZE, GRID_SIZE)

            optimizer.zero_grad()

            with autocast():  # Enable autocasting for mixed precision
                outputs = model(inputs)  # Shape: (batch_size, 900, num_classes)
                targets_flat = targets.view(-1)  # Shape: (batch_size * 900)
                outputs_flat = outputs.view(-1, NUM_CLASSES)  # Shape: (batch_size * 900, num_classes)
                loss = criterion(outputs_flat, targets_flat)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()

            _, predicted = torch.max(outputs_flat, 1)
            correct += (predicted == targets_flat).sum().item()
            total += targets_flat.size(0)

            # Update progress bar with loss
            train_bar.set_postfix({'Loss': loss.item()})

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = correct / total

        logger.info(f"Epoch {epoch}/{epochs} - Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f} - Starting Validation Phase.")

        # Validation Phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        val_bar = tqdm(val_loader, desc=f"Epoch {epoch} Validation", leave=False)
        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(val_bar, 1):
                inputs = inputs.to(device)
                targets = targets.to(device)

                with autocast():  # Enable autocasting for mixed precision
                    outputs = model(inputs)
                    targets_flat = targets.view(-1)
                    outputs_flat = outputs.view(-1, NUM_CLASSES)
                    loss = criterion(outputs_flat, targets_flat)
                    val_loss += loss.item()

                    _, predicted = torch.max(outputs_flat, 1)
                    val_correct += (predicted == targets_flat).sum().item()
                    val_total += targets_flat.size(0)

                # Update validation progress bar with loss
                val_bar.set_postfix({'Val Loss': loss.item()})

        avg_val_loss = val_loss / len(val_loader)
        val_acc = val_correct / val_total

        logger.info(f"Epoch {epoch}/{epochs} - Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.4f}")

        # Log metrics to TensorBoard
        writer.add_scalar('Loss/Train', epoch_loss, epoch)
        writer.add_scalar('Loss/Validation', avg_val_loss, epoch)
        writer.add_scalar('Accuracy/Train', epoch_acc, epoch)
        writer.add_scalar('Accuracy/Validation', val_acc, epoch)

        # Check for improvement
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            epochs_no_improve = 0
            # Save the best model
            torch.save(model.state_dict(), "best_deep_model.pth")
            logger.info("Validation loss decreased. Saving model.")
        else:
            epochs_no_improve += 1
            logger.info(f"No improvement in validation loss for {epochs_no_improve} epochs.")

        # Early Stopping
        if epochs_no_improve >= patience:
            logger.info("Early stopping triggered.")
            break

    writer.close()
    logger.info("Training completed.")

# -----------------------------
# 11. Evaluation and Prediction Functions
# -----------------------------

def evaluate_model(model, test_loader, device='cpu'):
    """
    Evaluates the model on the test dataset.
    
    Args:
        model (nn.Module): Trained model.
        test_loader (DataLoader): DataLoader for the test dataset.
        device (str): Device to run evaluation on.
    
    Returns:
        tuple: (average_loss, accuracy)
    """
    criterion = nn.CrossEntropyLoss()
    model.to(device)
    model.eval()

    total_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, targets in tqdm(test_loader, desc="Evaluating", leave=False):
            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)  # Shape: (batch_size, 900, num_classes)
            loss = criterion(outputs.view(-1, NUM_CLASSES), targets.view(-1))
            total_loss += loss.item() * inputs.size(0)

            _, predicted = torch.max(outputs.view(-1, NUM_CLASSES), 1)
            correct += (predicted == targets.view(-1)).sum().item()
            total += targets.view(-1).size(0)

    avg_loss = total_loss / len(test_loader.dataset)
    accuracy = correct / total

    logger.info(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}")
    return avg_loss, accuracy

def visualize_predictions(model, test_loader, device='cpu', num_samples=5):
    """
    Visualizes a few predictions made by the model.
    
    Args:
        model (nn.Module): Trained model.
        test_loader (DataLoader): DataLoader for the test dataset.
        device (str): Device to run inference on.
        num_samples (int): Number of samples to visualize.
    """
    model.to(device)
    model.eval()

    samples_visualized = 0

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)  # Shape: (batch_size, 900, num_classes)
            _, predicted = torch.max(outputs, 2)  # Shape: (batch_size, 900)

            for i in range(inputs.size(0)):
                input_grid = inputs[i].cpu().numpy().squeeze()  # Shape: (GRID_SIZE, GRID_SIZE)
                predicted_grid = predicted[i].cpu().numpy().reshape(GRID_SIZE, GRID_SIZE)
                actual_grid = targets[i].cpu().numpy().reshape(GRID_SIZE, GRID_SIZE)

                fig, axs = plt.subplots(1, 3, figsize=(15, 5))

                # Input Grid
                axs[0].imshow(input_grid, cmap='viridis', interpolation='nearest')
                axs[0].set_title("Input Grid")
                axs[0].axis('off')

                # Predicted Grid
                axs[1].imshow(predicted_grid, cmap='viridis', interpolation='nearest')
                axs[1].set_title("Predicted Grid")
                axs[1].axis('off')

                # Actual Grid
                axs[2].imshow(actual_grid, cmap='viridis', interpolation='nearest')
                axs[2].set_title("Actual Grid")
                axs[2].axis('off')

                plt.tight_layout()
                plt.show()

                samples_visualized += 1
                if samples_visualized >= num_samples:
                    return

# -----------------------------
# 12. TensorBoard Launch Function
# -----------------------------

def launch_tensorboard(log_dir='runs/deep_model'):
    """
    Launches TensorBoard in a separate thread.
    
    Args:
        log_dir (str): Directory where TensorBoard logs are stored.
    """
    def run_tensorboard():
        subprocess.run(["tensorboard", "--logdir", log_dir, "--port", "6006"])
    
    thread = threading.Thread(target=run_tensorboard, daemon=True)
    thread.start()
    time.sleep(5)  # Wait for TensorBoard to start
    logger.info("TensorBoard launched at http://localhost:6006")

# -----------------------------
# 13. Main Workflow
# -----------------------------

def main():
    # Define device
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    logger.info(f"Using device: {device}")

    # Load ARC data
    arc_data = load_arc_data()

    # Extract and reshape training and evaluation grid pairs
    train_grid_pairs = flatten_and_reshape(arc_data.get("arc-agi_training-challenges", {}), grid_size=GRID_SIZE)
    eval_grid_pairs = flatten_and_reshape(arc_data.get("arc-agi_evaluation-challenges", {}), grid_size=GRID_SIZE)

    logger.info(f"Number of training grid pairs: {len(train_grid_pairs)}")
    logger.info(f"Number of evaluation grid pairs: {len(eval_grid_pairs)}")

    # Split into training and validation sets (e.g., 80-20 split)
    train_pairs, val_pairs = train_test_split(train_grid_pairs, test_size=0.2, random_state=42)

    # Create DataLoaders
    train_dataset = AugmentedARCDataset(train_pairs, augment=True)
    val_dataset = AugmentedARCDataset(val_pairs, augment=False)

    train_loader = DataLoader(
        train_dataset, 
        batch_size=32, 
        shuffle=True, 
        collate_fn=collate_fn,
        num_workers=0  # Set to 0 to avoid multiprocessing issues on Windows
    )

    val_loader = DataLoader(
        val_dataset, 
        batch_size=32, 
        shuffle=False, 
        collate_fn=collate_fn,
        num_workers=0  # Set to 0 to avoid multiprocessing issues on Windows
    )

    logger.info(f"Training DataLoader size: {len(train_loader)} batches")
    logger.info(f"Validation DataLoader size: {len(val_loader)} batches")

    # Initialize the model
    model = CNNGridMapper(num_classes=NUM_CLASSES, grid_size=GRID_SIZE).to(device)
    logger.info("Model initialized successfully.")

    # Launch TensorBoard in a separate thread
    launch_tensorboard(log_dir='runs/deep_model')

    # Train the model
    train_deep_model(model, train_loader, val_loader, epochs=20, lr=1e-3, device=device, patience=5)

    # Evaluate the model
    evaluate_model(model, val_loader, device=device)

    # Visualize Predictions
    visualize_predictions(model, val_loader, device=device, num_samples=5)

    logger.info("All processes completed successfully.")

# -----------------------------
# 14. Execute the Main Function
# -----------------------------

if __name__ == "__main__":
    main()


In [None]:
import math
import json
import logging
import numpy as np
import torch
import random
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
from dataclasses import dataclass
from torchvision import models
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

# -----------------------------
# 1. Configure Logging and Set Random Seeds
# -----------------------------
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed()

# -----------------------------
# 2. Define Constants and Variables
# -----------------------------
GRID_SIZE = 30
NUM_CLASSES = 11
EPOCHS = 20
LEARNING_RATE = 1e-3

# -----------------------------
# 3. Define Data Classes and Functions
# -----------------------------
@dataclass
class GridPair:
    task_id: str
    input_grid: list
    output_grid: list

def load_arc_data():
    file_paths = {
        "arc-agi_training-challenges": "arc-agi_training_challenges.json",
        "arc-agi_evaluation-challenges": "arc-agi_evaluation_challenges.json",
    }
    arc_data = {}
    for key, path in file_paths.items():
        try:
            with open(path, 'r') as f:
                arc_data[key] = json.load(f)
                logger.info(f"Loaded {key} from {path}.")
        except (FileNotFoundError, json.JSONDecodeError) as e:
            logger.error(f"Error loading {path}: {e}")
            arc_data[key] = {}
    return arc_data

def reshape_to_square_grid(flat_list):
    size = GRID_SIZE
    padded_list = np.pad(flat_list, (0, size * size - len(flat_list)), constant_values=-1)
    return padded_list.reshape(size, size).tolist()

class AugmentedARCDataset(Dataset):
    def __init__(self, grid_pairs, augment=True):
        self.grid_pairs = grid_pairs
        self.augment = augment

    def __len__(self):
        return len(self.grid_pairs)

    def __getitem__(self, idx):
        pair = self.grid_pairs[idx]
        input_tensor = torch.tensor(pair.input_grid, dtype=torch.float32).unsqueeze(0)
        output_tensor = torch.tensor(pair.output_grid, dtype=torch.long)
        return input_tensor, output_tensor

def collate_fn(batch):
    inputs, outputs = zip(*batch)
    inputs = torch.stack(inputs)
    outputs = torch.stack(outputs)
    return inputs, outputs

# -----------------------------
# 4. Define Model
# -----------------------------
class CNNGridMapper(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES):
        super(CNNGridMapper, self).__init__()
        self.cnn = models.resnet18(weights=None)
        self.cnn.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.cnn(x)
        x = x.view(x.size(0), -1)  # Flatten the output
        return self.fc(x)

# -----------------------------
# 5. Training and Evaluation Functions
# -----------------------------
def train_model(model, train_loader, val_loader, epochs, device):
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    criterion = nn.CrossEntropyLoss()
    model.to(device)

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    plt.ion()  # Enable interactive mode for live plotting

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 10))

    for epoch in range(epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs.view(-1, NUM_CLASSES), targets.view(-1))
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == targets).sum().item()
            total += targets.size(0)

        train_loss = running_loss / len(train_loader)
        train_acc = correct / total
        val_loss, val_acc = evaluate_model(model, val_loader, device)

        train_losses.append(train_loss)
        train_accuracies.append(train_acc)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        logger.info(f"Epoch {epoch + 1}/{epochs} - "
                    f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} - "
                    f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

        # Update plots
        ax1.clear()
        ax2.clear()
        ax1.plot(train_losses, label='Train Loss')
        ax1.plot(val_losses, label='Val Loss')
        ax1.legend()
        ax2.plot(train_accuracies, label='Train Acc')
        ax2.plot(val_accuracies, label='Val Acc')
        ax2.legend()
        plt.pause(0.1)

    plt.ioff()  # Disable interactive mode
    plt.show()

def evaluate_model(model, val_loader, device):
    model.eval()
    total_loss, correct, total = 0.0, 0, 0
    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs.view(-1, NUM_CLASSES), targets.view(-1))
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == targets).sum().item()
            total += targets.size(0)

    return total_loss / len(val_loader), correct / total

# -----------------------------
# 6. Main Workflow
# -----------------------------
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_pairs = [GridPair("task", np.random.randint(0, 10, (30, 30)).tolist(),
                            np.random.randint(0, 10, (30, 30)).tolist()) for _ in range(100)]

    train_loader = DataLoader(AugmentedARCDataset(train_pairs), batch_size=4, shuffle=True, collate_fn=collate_fn)
    val_loader = DataLoader(AugmentedARCDataset(train_pairs), batch_size=4, shuffle=False, collate_fn=collate_fn)

    model = CNNGridMapper().to(device)
    train_model(model, train_loader, val_loader, EPOCHS, device)

if __name__ == "__main__":
    main()


In [None]:
# models/cnn_grid_mapper.py

import torch
import torch.nn as nn

class CNNGridMapper(nn.Module):
    def __init__(self, num_classes=11):
        """
        Initializes the CNN model.

        Args:
            num_classes (int, optional): Number of classes per grid cell. Defaults to 11.
        """
        super(CNNGridMapper, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),  # Input channels=1
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # 10x10 -> 5x5
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # 5x5 -> 2x2
        )
        
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(32, 16, kernel_size=2, stride=2),  # 2x2 -> 4x4
            nn.ReLU(),
            nn.ConvTranspose2d(16, num_classes, kernel_size=3, stride=1, padding=1),  # 4x4 -> 4x4
            # Optionally, add another upsampling layer to reach 9x9
            nn.Upsample(size=(9, 9), mode='bilinear', align_corners=True)
        )
        
    def forward(self, x):
        # Reshape input to (batch_size, 1, 10, 10)
        x = x.view(-1, 1, 10, 10)
        x = self.encoder(x)
        x = self.decoder(x)
        # Flatten output to (batch_size, 81)
        x = x.view(-1, 81)
        return x



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


In [None]:
def extract_grid_pairs(challenges, solutions):
    """
    Extracts pairs of input and output grids from two datasets: challenges and solutions.

    Args:
        challenges (list of dict): A list of dictionaries containing input grids.
        solutions (list of dict): A list of dictionaries containing output grids.

    Returns:
        list of tuple: A list of (input_grid, output_grid) pairs.
    """
    if len(challenges) != len(solutions):
        raise ValueError("The number of challenges and solutions must be equal.")

    grid_pairs = []

    for challenge, solution in zip(challenges, solutions):
        try:
            # Ensure each dictionary contains 'input' and 'output' keys
            input_grid = challenge.get('input')
            output_grid = solution.get('output')

            if input_grid is not None and output_grid is not None:
                grid_pairs.append((input_grid, output_grid))
            else:
                print(f"Invalid grid pair: {challenge}, {solution}")

        except Exception as e:
            print(f"Error extracting grid pair: {e}")

    return grid_pairs


In [None]:
def train_arc_model(model_type='mlp', num_epochs=50, learning_rate=0.001, device='cpu'):
    """
    Trains the specified model type on the ARC dataset.

    Args:
        model_type (str, optional): Type of model ('mlp' or 'cnn'). Defaults to 'mlp'.
        num_epochs (int, optional): Number of training epochs. Defaults to 50.
        learning_rate (float, optional): Learning rate for the optimizer. Defaults to 0.001.
        device (str, optional): Device to train on ('cpu' or 'cuda'). Defaults to 'cpu'.

    Returns:
        nn.Module: Trained model.
    """
    # Load data
    arc_data = load_arc_data()
    unique_id = "007bbfb7"  # Replace with your actual unique ID if different

    train_challenges = arc_data.get("arc-agi_training-challenges", [])
    train_solutions = arc_data.get("arc-agi_training-solutions", [])
    eval_challenges = arc_data.get("arc-agi_evaluation-challenges", [])
    eval_solutions = arc_data.get("arc-agi_evaluation-solutions", [])

    logger.info(f"Number of training challenges: {len(train_challenges)}")
    logger.info(f"Number of training solutions: {len(train_solutions)}")
    logger.info(f"Number of evaluation challenges: {len(eval_challenges)}")
    logger.info(f"Number of evaluation solutions: {len(eval_solutions)}")

    # Extract grid pairs
    train_grid_pairs = extract_grid_pairs(train_challenges, train_solutions)
    eval_grid_pairs = extract_grid_pairs(eval_challenges, eval_solutions)
    logger.info(f"Number of training grid pairs: {len(train_grid_pairs)}")
    logger.info(f"Number of evaluation grid pairs: {len(eval_grid_pairs)}")

    # Create DataLoaders
    batch_size = 32
    train_loader, test_loader = create_data_loaders(
        train_grid_pairs,
        eval_grid_pairs,
        batch_size=batch_size,
        flatten=True,
        max_size=10,
        padding_value=-1
    )

    logger.info(f"Training DataLoader size: {len(train_loader)} batches")
    logger.info(f"Testing DataLoader size: {len(test_loader)} batches")

    # Initialize the model
    if model_type.lower() == 'mlp':
        input_size = 10 * 10  # 100
        output_size = 9 * 9  # 81
        model = MLPGridMapper(input_size=input_size, hidden_sizes=[256, 128], output_size=output_size)
    elif model_type.lower() == 'cnn':
        model = CNNGridMapper(num_classes=11)  # Adjust num_classes as per your dataset
    else:
        logger.error("Invalid model type specified. Choose 'mlp' or 'cnn'.")
        return None

    # Define the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Train the model
    model.to(device)
    logger.info(f"Starting training on {device}...")
    for epoch in range(1, num_epochs + 1):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for batch in train_loader:
            inputs = batch['input_grid'].to(device)
            targets = batch['output_grid'].to(device)

            optimizer.zero_grad()
            outputs = model(inputs)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            _, actual = torch.max(targets, 1)
            total += targets.size(0)
            correct += (predicted == actual).sum().item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = correct / total

        # Validation
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for batch in test_loader:
                inputs = batch['input_grid'].to(device)
                targets = batch['output_grid'].to(device)

                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item() * inputs.size(0)

                _, predicted = torch.max(outputs, 1)
                _, actual = torch.max(targets, 1)
                val_total += targets.size(0)
                val_correct += (predicted == actual).sum().item()

        avg_val_loss = val_loss / len(test_loader.dataset)
        val_acc = val_correct / val_total

        logger.info(f"Epoch {epoch}/{num_epochs} - "
                    f"Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f} - "
                    f"Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.4f}")

    logger.info("Training complete.")

    # Save the trained model
    model_filename = f"{model_type}_grid_mapper.pth"
    torch.save(model.state_dict(), model_filename)
    logger.info(f"Model saved to {model_filename}")

    return model


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MLPGridMapper(nn.Module):
    """
    A Multi-Layer Perceptron (MLP) model to map input grids to output grids.
    """

    def __init__(self, input_size=100, hidden_sizes=[256, 128], output_size=81):
        super(MLPGridMapper, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_sizes[0])
        self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
        self.fc3 = nn.Linear(hidden_sizes[1], output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)


In [None]:
# Initialize the model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = MLPGridMapper(input_size=100, hidden_sizes=[256, 128], output_size=81).to(device)
criterion = nn.MSELoss()  # Example loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Example data (random) - replace with actual grid data
X_train = torch.randn(100, 100).to(device)  # 100 samples, each with 100 features
y_train = torch.randn(100, 81).to(device)   # 100 samples, each with 81 outputs

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()
    
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}")


In [None]:
# Save the model's state dictionary
torch.save(model.state_dict(), 'mlp_grid_mapper.pth')


In [None]:
# Initialize the model again
model = MLPGridMapper(input_size=100, hidden_sizes=[256, 128], output_size=81).to(device)

# Load the model with weights_only=True
model.load_state_dict(
    torch.load('mlp_grid_mapper.pth', map_location=device, weights_only=True)
)

print("Model loaded successfully.")


In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset

# Example test data (random data)
X_test = torch.randn(20, 100)  # 20 samples, each with 100 features
y_test = torch.randint(0, 81, (20, 81))  # 20 samples, each with 81 possible outputs

# Create a TensorDataset and DataLoader for the test data
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)


In [None]:
def evaluate_model(model, test_loader, device='cpu'):
    criterion = nn.CrossEntropyLoss()
    model.to(device)
    model.eval()

    total_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs = inputs.to(device)

            # Convert targets to class indices if they are one-hot encoded
            if targets.dtype != torch.long:
                targets = torch.argmax(targets, dim=1)

            targets = targets.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item() * inputs.size(0)

            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    avg_loss = total_loss / len(test_loader.dataset)
    accuracy = correct / total

    print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}")
    return avg_loss, accuracy


In [None]:
import matplotlib.pyplot as plt

def visualize_predictions(model, test_loader, device='cpu', num_samples=5):
    model.to(device)
    model.eval()

    samples_visualized = 0

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)  # Predicted class indices

            for i in range(inputs.size(0)):
                input_grid = inputs[i].cpu().numpy().reshape(10, 10)  # Input grid

                # Use torch.argmax() to get the class index from targets
                predicted_class = predicted[i].item()
                actual_class = torch.argmax(targets[i]).item()

                fig, axs = plt.subplots(1, 2, figsize=(10, 5))

                # Input Grid
                axs[0].imshow(input_grid, cmap='viridis', interpolation='nearest')
                axs[0].set_title("Input Grid (10x10)")
                axs[0].axis('off')

                # Predicted and Actual Class Labels
                axs[1].text(0.5, 0.5, f"Predicted: {predicted_class}\nActual: {actual_class}",
                            fontsize=18, ha='center', va='center')
                axs[1].set_title("Class Prediction")
                axs[1].axis('off')

                plt.tight_layout()
                plt.show()

                samples_visualized += 1
                if samples_visualized >= num_samples:
                    return

# Visualize predictions
visualize_predictions(model, test_loader, device=device, num_samples=5)


In [None]:
# data/generate_and_visualize.py

import matplotlib.pyplot as plt
import torch

def visualize_batch(batch):
    """
    Visualizes a batch of images.

    Args:
        batch (dict): Batch containing color_image, grayscale_image, numeric_image, and grid.
    """
    color_images = batch['color_image']
    grayscale_images = batch['grayscale_image']
    numeric_images = batch['numeric_image']
    grids = batch['grid']

    batch_size = color_images.size(0)
    fig, axs = plt.subplots(batch_size, 3, figsize=(12, 4 * batch_size))

    for i in range(batch_size):
        # Color Image
        axs[i, 0].imshow(color_images[i].permute(1, 2, 0))
        axs[i, 0].set_title(f"Color Image\nGrid:\n{grids[i].numpy()}")
        axs[i, 0].axis('off')

        # Grayscale Image
        axs[i, 1].imshow(grayscale_images[i].squeeze(), cmap='gray')
        axs[i, 1].set_title("Grayscale Image")
        axs[i, 1].axis('off')

        # Numeric Image
        axs[i, 2].imshow(numeric_images[i].permute(1, 2, 0))
        axs[i, 2].set_title("Numeric Image")
        axs[i, 2].axis('off')

    plt.tight_layout()
    plt.show()


def main():
    train_loader, eval_loader = get_data_loaders(batch_size=4, grid_size=10, num_classes=11, augment=True)

    # Get a batch of training data
    batch = next(iter(train_loader))

    # Visualize the batch
    visualize_batch(batch)


if __name__ == "__main__":
    main()


In [None]:
# data/data_conversion.py (continued)

import pickle

def save_datasets(train_grids, eval_grids, filepath='data/grids.pkl'):
    """
    Saves the training and evaluation grids to a pickle file.

    Args:
        train_grids (list of np.ndarray): Training grids.
        eval_grids (list of np.ndarray): Evaluation grids.
        filepath (str, optional): Path to save the pickle file. Defaults to 'data/grids.pkl'.
    """
    with open(filepath, 'wb') as f:
        pickle.dump({'train_grids': train_grids, 'eval_grids': eval_grids}, f)
    logger.info(f"Saved grids to {filepath}.")


def load_datasets(filepath='data/grids.pkl'):
    """
    Loads the training and evaluation grids from a pickle file.

    Args:
        filepath (str, optional): Path to the pickle file. Defaults to 'data/grids.pkl'.

    Returns:
        tuple: (train_grids, eval_grids)
    """
    if not os.path.exists(filepath):
        logger.error(f"File {filepath} does not exist.")
        return None, None

    with open(filepath, 'rb') as f:
        data = pickle.load(f)
    logger.info(f"Loaded grids from {filepath}.")
    return data['train_grids'], data['eval_grids']


In [None]:
# training/trainer.py

# ==========================
# 1. Standard Library Imports
# ==========================
import logging

# ==========================
# 2. Third-Party Library Imports
# ==========================
import torch
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, log_loss

# ==========================
# 3. Local Application/Module Imports
# ==========================

def train_regular_model(
    model, train_loader, eval_loader, num_epochs, initial_lr, gui, model_num, 
    total_models, device, tuner, shared_layer=None, redis_manager=None
):
    """
    Trains a single model over a specified number of epochs.
    """
    optimizer = optim.AdamW(model.parameters(), lr=initial_lr, weight_decay=1e-4)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    scaler = GradScaler() if torch.cuda.is_available() else None

    best_val_loss, best_val_accuracy = float('inf'), 0.0

    for epoch in range(1, num_epochs + 1):
        model.train()

        total_loss, correct_predictions, total_samples = 0, 0, 0

        for batch_idx, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            if shared_layer:
                inputs = shared_layer(inputs)

            try:
                with autocast(enabled=torch.cuda.is_available()):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels) / 4

                scaler.scale(loss).backward() if scaler else loss.backward()

                if (batch_idx + 1) % 4 == 0:
                    if scaler:
                        scaler.step(optimizer)
                        scaler.update()
                    else:
                        optimizer.step()
                    optimizer.zero_grad()

                total_loss += loss.item() * 4
                _, predicted = torch.max(outputs, 1)
                correct_predictions += (predicted == labels).sum().item()
                total_samples += labels.size(0)

            except Exception as e:
                logging.error(f"Error in batch {batch_idx}: {e}")
                continue

        avg_train_loss = total_loss / len(train_loader)
        train_accuracy = correct_predictions / total_samples

        val_loss, val_accuracy = evaluate_model(model, eval_loader, criterion, device)

        scheduler.step(val_loss)

        # Use Hyperparameter Tuner to adjust learning rate
        if tuner:
            tuner.adjust_learning_rate(optimizer, val_loss)
            if tuner.early_stopping():
                logging.info("Early stopping triggered by HyperparameterTuner.")
                break

        # Update Redis with current metrics
        if redis_manager:
            current_metrics = {
                'model_num': model_num,
                'total_models': total_models,
                'epoch': epoch,
                'total_epochs': num_epochs,
                'loss': avg_train_loss,
                'accuracy': train_accuracy,
                'val_loss': val_loss,
                'val_accuracy': val_accuracy,
                'lr': optimizer.param_groups[0]['lr']
            }
            redis_manager.set_value('current_metrics', current_metrics)

        gui.queue.put({
            'type': 'epoch',
            'model_num': model_num,
            'total_models': total_models,
            'epoch': epoch,
            'total_epochs': num_epochs,
            'loss': avg_train_loss,
            'accuracy': train_accuracy,
            'val_loss': val_loss,
            'val_accuracy': val_accuracy,
            'lr': optimizer.param_groups[0]['lr']
        })

    return model, None

def evaluate_model(model, eval_loader, criterion, device):
    """
    Evaluates the model on the evaluation dataset.
    
    Args:
        model (nn.Module): The trained model.
        eval_loader (DataLoader): DataLoader for evaluation data.
        criterion (nn.Module): Loss function.
        device (torch.device): Device to perform computation on.
    
    Returns:
        tuple: (average_loss, accuracy)
    """
    model.eval()
    total_loss, correct_predictions, total_samples = 0, 0, 0

    with torch.no_grad():
        for inputs, labels in eval_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)

    avg_loss = total_loss / len(eval_loader)
    accuracy = correct_predictions / total_samples

    return avg_loss, accuracy


In [None]:
# training/trainer.py (excerpt)

from data.data_conversion import get_data_loaders_variable_sizes
from models import MLPMagician, CNNMagician  # Import necessary models
from torch import nn, optim

def main_training():
    # Parameters
    batch_size = 32
    grid_sizes = [3, 5, 10]
    num_classes = 11
    augment = True
    num_models = 6
    num_epochs = 50
    initial_lr = 0.001
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Create data loaders
    train_loader, eval_loader = get_data_loaders_variable_sizes(
        batch_size=batch_size,
        grid_sizes=grid_sizes,
        num_classes=num_classes,
        augment=augment
    )

    # Initialize models, optimizers, loss functions, etc.
    models = []
    for i in range(num_models):
        model = MLPMagician(input_size=grid_sizes[-1]**2, hidden_sizes=[256, 128, 64], dropout_rate=0.5)
        model.to(device)
        models.append(model)

    criterion = nn.CrossEntropyLoss()
    optimizers = [optim.AdamW(model.parameters(), lr=initial_lr, weight_decay=1e-4) for model in models]

    # Training loop
    for epoch in range(1, num_epochs + 1):
        for model, optimizer in zip(models, optimizers):
            model.train()
            running_loss = 0.0
            correct = 0
            total = 0

            for batch in train_loader:
                inputs = batch['color_image'].to(device)
                targets = batch['grid'].to(device).view(-1)  # Flatten targets

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

            epoch_loss = running_loss / len(train_loader.dataset)
            epoch_acc = correct / total
            logger.info(f"Model {model.__class__.__name__} - Epoch {epoch}/{num_epochs} - Loss: {epoch_loss:.4f} - Accuracy: {epoch_acc:.4f}")

        # Evaluation at the end of each epoch
        for model in models:
            model.eval()
            val_loss = 0.0
            correct = 0
            total = 0

            with torch.no_grad():
                for batch in eval_loader:
                    inputs = batch['color_image'].to(device)
                    targets = batch['grid'].to(device).view(-1)

                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    val_loss += loss.item() * inputs.size(0)

                    _, predicted = torch.max(outputs.data, 1)
                    total += targets.size(0)
                    correct += (predicted == targets).sum().item()

            avg_val_loss = val_loss / len(eval_loader.dataset)
            val_accuracy = correct / total
            logger.info(f"Model {model.__class__.__name__} - Validation Loss: {avg_val_loss:.4f} - Validation Accuracy: {val_accuracy:.4f}")

    logger.info("Training completed.")

if __name__ == "__main__":
    main_training()


In [None]:
# models/mlp_magician.py

import torch
import torch.nn as nn
import logging

class MLPMagician(nn.Module):
    def __init__(self, input_size, hidden_sizes, dropout_rate=0.5, num_classes=10):
        """
        Initializes the MLPMagician model.

        Args:
            input_size (int): Number of input features.
            hidden_sizes (list): List containing the number of neurons in each hidden layer.
            dropout_rate (float): Dropout rate for regularization.
            num_classes (int): Number of output classes.
        """
        super(MLPMagician, self).__init__()
        layers = []
        prev_size = input_size
        for idx, hidden_size in enumerate(hidden_sizes):
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
            prev_size = hidden_size
        layers.append(nn.Linear(prev_size, num_classes))
        self.network = nn.Sequential(*layers)
        
        # Initialize weights
        initialize_weights(self)
        logging.info("Initialized MLPMagician model with layers: {}".format(self.network))

    def forward(self, x):
        """
        Forward pass of the model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, input_size).

        Returns:
            torch.Tensor: Output predictions of shape (batch_size, num_classes).
        """
        return self.network(x)


In [None]:
# data/test_data_conversion.py

import unittest
import numpy as np
from PIL import Image

class TestDataConversion(unittest.TestCase):
    def setUp(self):
        self.grid = np.array([
            [0, 1, 2],
            [3, 4, 5],
            [6, 7, 8]
        ])
        self.color_map = {
            0: [0, 0, 0],
            1: [255, 0, 0],
            2: [0, 255, 0],
            3: [0, 0, 255],
            4: [255, 255, 0],
            5: [255, 165, 0],
            6: [128, 0, 128],
            7: [0, 255, 255],
            8: [255, 192, 203],
        }

    def test_grid_to_image(self):
        img = grid_to_image(self.grid, self.color_map)
        self.assertIsInstance(img, Image.Image)
        self.assertEqual(img.size, (3, 3))
        pixels = img.load()
        self.assertEqual(pixels[0, 0], (0, 0, 0))          # Black
        self.assertEqual(pixels[1, 0], (255, 0, 0))        # Red
        self.assertEqual(pixels[2, 0], (0, 255, 0))        # Green

    def test_grid_to_grayscale(self):
        img = grid_to_grayscale(self.grid)
        self.assertIsInstance(img, Image.Image)
        self.assertEqual(img.mode, 'L')                     # Grayscale
        self.assertEqual(img.size, (3, 3))
        pixels = img.load()
        self.assertEqual(pixels[0, 0], 0)                   # Minimum value normalized to 0
        self.assertEqual(pixels[1, 1], 127)                 # Mid value

    def test_grid_to_numeric_image(self):
        img = grid_to_numeric_image(self.grid)
        self.assertIsInstance(img, Image.Image)
        self.assertEqual(img.mode, 'RGB')
        self.assertEqual(img.size, (150, 150))               # 3x3 grid with cell_size=50
        pixels = img.load()
        self.assertEqual(pixels[25, 25], (255, 0, 0))        # Number 1 in red cell
        self.assertEqual(pixels[75, 75], (255, 255, 0))      # Number 4 in yellow cell

    def test_augment_image(self):
        img = grid_to_image(self.grid, self.color_map)
        augmented_img, dead_squares = augment_image(img, self.grid, perturb_prob=0.0, dead_square_prob=1.0, noise_prob=0.0)
        self.assertEqual(len(dead_squares), 1)               # Only one dead square expected
        self.assertIsInstance(augmented_img, Image.Image)

if __name__ == '__main__':
    unittest.main()


In [None]:
# data/data_conversion.py (continued)

def generate_grids_variable_sizes(num_grids_per_size, grid_sizes, num_classes):
    """
    Generates grids of multiple sizes.

    Args:
        num_grids_per_size (int): Number of grids per size.
        grid_sizes (list of int): List of grid sizes (e.g., [3, 5, 10]).
        num_classes (int): Number of classes/colors.

    Returns:
        list of np.ndarray: Generated grids.
    """
    grids = []
    for size in grid_sizes:
        grids.extend(generate_grids(num_grids=num_grids_per_size, grid_size=size, num_classes=num_classes))
    logger.info(f"Generated {len(grids)} grids of varying sizes: {grid_sizes}.")
    return grids


def get_data_loaders_variable_sizes(batch_size=32, grid_sizes=[3, 5, 10], num_classes=11, augment=True):
    """
    Creates DataLoader instances for training and evaluation with variable grid sizes.

    Args:
        batch_size (int, optional): Number of samples per batch. Defaults to 32.
        grid_sizes (list of int, optional): List of grid sizes. Defaults to [3, 5, 10].
        num_classes (int, optional): Number of classes/colors. Defaults to 11.
        augment (bool, optional): Whether to apply data augmentation. Defaults to True.

    Returns:
        DataLoader: Training DataLoader.
        DataLoader: Evaluation DataLoader.
    """
    # Generate grids for training and evaluation
    train_grids = generate_grids_variable_sizes(num_grids_per_size=1000, grid_sizes=grid_sizes, num_classes=num_classes)
    eval_grids = generate_grids_variable_sizes(num_grids_per_size=200, grid_sizes=grid_sizes, num_classes=num_classes)

    # Define transformations
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
    ])

    # Create datasets
    train_dataset = GridDataset(train_grids, transform=transform, augmentation=augment)
    eval_dataset = GridDataset(eval_grids, transform=transform, augmentation=False)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    eval_loader = DataLoader(eval_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

    logger.info("Created training and evaluation DataLoaders with variable grid sizes.")
    return train_loader, eval_loader


In [None]:
# models/cnn_magician.py

import torch
import torch.nn as nn
import torch.nn.functional as F
from utils.weight_initialization import initialize_weights
import logging

class CNNMagician(nn.Module):
    def __init__(self, dropout_rate=0.5, num_classes=10):
        """
        Initializes the CNNMagician model.

        Args:
            dropout_rate (float): Dropout rate for regularization.
            num_classes (int): Number of output classes.
        """
        super(CNNMagician, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # Assuming grayscale input
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.fc1 = nn.Linear(128 * 3 * 3, 128)  # Adjust based on input size
        self.fc2 = nn.Linear(128, num_classes)
        self.dropout = nn.Dropout(dropout_rate)
        
        # Initialize weights
        initialize_weights(self)
        logging.info("Initialized CNNMagician model with layers.")

    def forward(self, x):
        """
        Forward pass of the model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, 1, 28, 28).

        Returns:
            torch.Tensor: Output predictions of shape (batch_size, num_classes).
        """
        x = self.pool(F.relu(self.bn1(self.conv1(x))))  # [32, H/2, W/2]
        x = self.pool(F.relu(self.bn2(self.conv2(x))))  # [64, H/4, W/4]
        x = self.pool(F.relu(self.bn3(self.conv3(x))))  # [128, H/8, W/8]
        x = x.view(x.size(0), -1)  # Flatten
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)


In [None]:
# models/resnet_magician.py

import torch
import torch.nn as nn
import torch.nn.functional as F
import logging

class ResNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        """
        Initializes a single ResNet block.

        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            stride (int): Stride for the convolution.
        """
        super(ResNetBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.downsample = None
        if stride != 1 or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        """
        Forward pass of the ResNet block.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Output tensor after residual connection.
        """
        identity = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        if self.downsample:
            identity = self.downsample(x)
        out += identity
        out = self.relu(out)
        return out

class ResNetMagician(nn.Module):
    def __init__(self, num_classes=10):
        """
        Initializes the ResNetMagician model.

        Args:
            num_classes (int): Number of output classes.
        """
        super(ResNetMagician, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3)  # Assuming grayscale input
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        # Define ResNet layers
        self.layer1 = self._make_layer(64, 64, blocks=2, stride=1)
        self.layer2 = self._make_layer(64, 128, blocks=2, stride=2)
        self.layer3 = self._make_layer(128, 256, blocks=2, stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(256, num_classes)
        
        # Initialize weights
        initialize_weights(self)
        logging.info("Initialized ResNetMagician model with layers.")

    def _make_layer(self, in_channels, out_channels, blocks, stride):
        """
        Creates a ResNet layer composed of multiple ResNet blocks.

        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            blocks (int): Number of ResNet blocks.
            stride (int): Stride for the first block.

        Returns:
            nn.Sequential: Sequential container of ResNet blocks.
        """
        layers = []
        layers.append(ResNetBlock(in_channels, out_channels, stride))
        for _ in range(1, blocks):
            layers.append(ResNetBlock(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        """
        Forward pass of the ResNetMagician model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, 1, 224, 224).

        Returns:
            torch.Tensor: Output predictions of shape (batch_size, num_classes).
        """
        x = self.relu(self.bn1(self.conv1(x)))  # [64, H/2, W/2]
        x = self.maxpool(x)  # [64, H/4, W/4]
        
        x = self.layer1(x)  # [64, H/4, W/4]
        x = self.layer2(x)  # [128, H/8, W/8]
        x = self.layer3(x)  # [256, H/16, W/16]
        
        x = self.avgpool(x)  # [256, 1, 1]
        x = torch.flatten(x, 1)  # [256]
        x = self.fc(x)  # [num_classes]
        return x


In [None]:
# models/vision_transformer_magician.py

import torch
import torch.nn as nn
from torchvision.models import vit_b_16
import logging

class VisionTransformerMagician(nn.Module):
    def __init__(self, num_classes=10, pretrained=True):
        """
        Initializes the VisionTransformerMagician model.

        Args:
            num_classes (int): Number of output classes.
            pretrained (bool): Whether to use a pre-trained ViT model.
        """
        super(VisionTransformerMagician, self).__init__()
        self.vit = vit_b_16(pretrained=pretrained)
        # Replace the classification head
        self.vit.heads = nn.Linear(self.vit.heads.in_features, num_classes)
        
        # Initialize weights of the new head
        initialize_weights(self.vit.heads)
        logging.info("Initialized VisionTransformerMagician model with ViT backbone.")

    def forward(self, x):
        """
        Forward pass of the VisionTransformerMagician model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, 3, 224, 224).

        Returns:
            torch.Tensor: Output predictions of shape (batch_size, num_classes).
        """
        return self.vit(x)


In [None]:
# models/dnn_magician.py

import torch
import torch.nn as nn
import logging

class DNNMagician(nn.Module):
    def __init__(self, input_size, hidden_sizes, dropout_rate=0.5, num_classes=10):
        """
        Initializes the DNNMagician model.

        Args:
            input_size (int): Number of input features.
            hidden_sizes (list): List containing the number of neurons in each hidden layer.
            dropout_rate (float): Dropout rate for regularization.
            num_classes (int): Number of output classes.
        """
        super(DNNMagician, self).__init__()
        layers = []
        prev_size = input_size
        for idx, hidden_size in enumerate(hidden_sizes):
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
            prev_size = hidden_size
        layers.append(nn.Linear(prev_size, num_classes))
        self.network = nn.Sequential(*layers)
        
        # Initialize weights
        initialize_weights(self)
        logging.info("Initialized DNNMagician model with layers: {}".format(self.network))

    def forward(self, x):
        """
        Forward pass of the model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, input_size).

        Returns:
            torch.Tensor: Output predictions of shape (batch_size, num_classes).
        """
        return self.network(x)


In [None]:
# models/rnn_magician.py

import torch
import torch.nn as nn
import logging

class RNNMagician(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout_rate=0.3):
        """
        Initializes the RNNMagician model.

        Args:
            input_size (int): Number of input features per time step.
            hidden_size (int): Number of features in the hidden state.
            num_layers (int): Number of recurrent layers.
            output_size (int): Number of output classes.
            dropout_rate (float): Dropout rate for regularization.
        """
        super(RNNMagician, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate)
        self.fc = nn.Linear(hidden_size, output_size)
        
        # Initialize weights
        initialize_weights(self)
        logging.info("Initialized RNNMagician model with LSTM layers.")

    def forward(self, x):
        """
        Forward pass of the RNNMagician model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, sequence_length, input_size).

        Returns:
            torch.Tensor: Output predictions of shape (batch_size, output_size).
        """
        out, _ = self.lstm(x)  # out: (batch_size, sequence_length, hidden_size)
        out = out[:, -1, :]    # Take the last time step
        out = self.fc(out)     # (batch_size, output_size)
        return out


In [None]:
# utils/logging_setup.py

import logging
import os

def initialize_logging(log_file='logs/training.log', log_level=logging.INFO):
    """
    Initializes the logging configuration.

    Args:
        log_file (str): Path to the log file.
        log_level (int): Logging level (e.g., logging.INFO, logging.DEBUG).
    """
    # Create logs directory if it doesn't exist
    os.makedirs(os.path.dirname(log_file), exist_ok=True)
    
    # Define logging format
    log_format = '%(asctime)s [%(levelname)s] %(message)s'
    
    # Configure logging
    logging.basicConfig(
        level=log_level,
        format=log_format,
        handlers=[
            logging.FileHandler(log_file),
            logging.StreamHandler()
        ]
    )
    
    logging.info("Logging is initialized.")


In [None]:
# utils/redis_manager.py

import redis
import json
import logging

class RedisManager:
    """
    Manages connections and interactions with Redis for shared memory.
    """

    def __init__(self, host='localhost', port=6379, db=0):
        """
        Initializes the RedisManager by establishing a connection to the Redis server.

        Args:
            host (str): Redis server hostname.
            port (int): Redis server port.
            db (int): Redis database index.
        """
        try:
            self.redis = redis.Redis(host=host, port=port, db=db)
            # Test the connection
            self.redis.ping()
            logging.info(f"Connected to Redis at {host}:{port}, DB: {db}")
        except redis.exceptions.ConnectionError as e:
            logging.error(f"Redis connection error: {e}")
            raise e

    def set_value(self, key, value):
        """
        Sets a value in Redis after serializing it to JSON.

        Args:
            key (str): The key under which the value is stored.
            value (any): The value to store (must be JSON serializable).
        """
        try:
            self.redis.set(key, json.dumps(value))
            logging.debug(f"Set key '{key}' in Redis.")
        except Exception as e:
            logging.error(f"Error setting key '{key}' in Redis: {e}")

    def get_value(self, key):
        """
        Retrieves a value from Redis and deserializes it from JSON.

        Args:
            key (str): The key to retrieve.

        Returns:
            any: The deserialized value, or None if the key does not exist.
        """
        try:
            value = self.redis.get(key)
            if value:
                logging.debug(f"Retrieved key '{key}' from Redis.")
                return json.loads(value)
            else:
                logging.debug(f"Key '{key}' not found in Redis.")
                return None
        except Exception as e:
            logging.error(f"Error retrieving key '{key}' from Redis: {e}")
            return None

    def delete_key(self, key):
        """
        Deletes a key from Redis.

        Args:
            key (str): The key to delete.
        """
        try:
            self.redis.delete(key)
            logging.debug(f"Deleted key '{key}' from Redis.")
        except Exception as e:
            logging.error(f"Error deleting key '{key}' from Redis: {e}")


In [None]:
# utils/plot_utils.py

import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import logging

def plot_training_metrics(gui, loss_data, val_loss_data, acc_data, val_acc_data):
    """
    Plots training and validation metrics in the GUI.

    Args:
        gui (TrainingGUI): Instance of the TrainingGUI.
        loss_data (list): List of training loss values.
        val_loss_data (list): List of validation loss values.
        acc_data (list): List of training accuracy values.
        val_acc_data (list): List of validation accuracy values.
    """
    try:
        gui.ax.clear()
        gui.ax.plot(range(1, len(loss_data)+1), loss_data, label='Training Loss', color='blue')
        gui.ax.plot(range(1, len(val_loss_data)+1), val_loss_data, label='Validation Loss', color='orange')
        gui.ax.plot(range(1, len(acc_data)+1), acc_data, label='Training Accuracy', color='green')
        gui.ax.plot(range(1, len(val_acc_data)+1), val_acc_data, label='Validation Accuracy', color='red')
        gui.ax.set_xlabel('Epochs')
        gui.ax.set_ylabel('Metrics')
        gui.ax.legend()
        gui.ax.grid(True)
        gui.canvas.draw()
        logging.info("Updated training metrics plot.")
    except Exception as e:
        logging.error(f"Error plotting training metrics: {e}")


In [None]:
# utils/weight_initialization.py

import torch.nn as nn
import logging

def initialize_weights(model):
    """
    Initializes weights of the model using Kaiming initialization for Conv layers
    and Xavier initialization for Linear layers. BatchNorm layers are initialized
    with constant weights and biases.

    Args:
        model (nn.Module): The model to initialize.
    """
    for m in model.modules():
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
            nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
            logging.debug(f"Initialized Conv layer: {m}")
        elif isinstance(m, nn.Linear):
            nn.init.xavier_normal_(m.weight)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
            logging.debug(f"Initialized Linear layer: {m}")
        elif isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)):
            nn.init.constant_(m.weight, 1)
            nn.init.constant_(m.bias, 0)
            logging.debug(f"Initialized BatchNorm layer: {m}")
        elif isinstance(m, nn.LSTM):
            for name, param in m.named_parameters():
                if 'weight' in name:
                    nn.init.xavier_normal_(param.data)
                elif 'bias' in name:
                    nn.init.constant_(param.data, 0)
            logging.debug(f"Initialized LSTM layer: {m}")


In [None]:
# utils/hyperparameter_tuner.py

import logging

class HyperparameterTuner:
    """
    Adjusts hyperparameters like learning rate based on training progress.
    Implements a simple strategy; can be expanded with reinforcement learning.
    """

    def __init__(self, initial_lr, factor=0.5, patience=3, min_lr=1e-6):
        """
        Initializes the HyperparameterTuner.

        Args:
            initial_lr (float): Initial learning rate.
            factor (float): Factor by which to reduce the learning rate.
            patience (int): Number of epochs to wait before reducing LR.
            min_lr (float): Minimum learning rate.
        """
        self.initial_lr = initial_lr
        self.factor = factor
        self.patience = patience
        self.min_lr = min_lr
        self.best_loss = float('inf')
        self.counter = 0
        logging.info("Initialized HyperparameterTuner.")

    def adjust_learning_rate(self, optimizer, current_loss):
        """
        Adjusts the learning rate based on current loss.

        Args:
            optimizer (torch.optim.Optimizer): Optimizer whose LR is to be adjusted.
            current_loss (float): Current validation loss.
        """
        if current_loss < self.best_loss:
            self.best_loss = current_loss
            self.counter = 0
            logging.debug("Validation loss improved; resetting patience counter.")
        else:
            self.counter += 1
            logging.debug(f"No improvement in validation loss. Counter: {self.counter}/{self.patience}")
            if self.counter >= self.patience:
                for param_group in optimizer.param_groups:
                    new_lr = max(param_group['lr'] * self.factor, self.min_lr)
                    if param_group['lr'] > self.min_lr:
                        param_group['lr'] = new_lr
                        logging.info(f"Reducing learning rate to {new_lr}.")
                self.counter = 0  # Reset counter after adjusting

    def early_stopping(self):
        """
        Determines whether to perform early stopping.

        Returns:
            bool: True if early stopping criteria met, False otherwise.
        """
        # Placeholder for actual early stopping logic
        return False


In [None]:
# utils/hyperparameter_helper.py

import random
import logging

def randomize_params():
    """
    Randomizes hyperparameters for model training.

    Returns:
        tuple: (num_models, num_epochs, initial_lr, other_params)
    """
    try:
        num_models = random.randint(5, 15)  # Example range
        num_epochs = random.randint(50, 200)
        initial_lr = random.choice([0.1, 0.01, 0.001, 0.0001])
        other_params = {}  # Add other hyperparameters as needed
        logging.info(f"Randomized hyperparameters: num_models={num_models}, num_epochs={num_epochs}, initial_lr={initial_lr}")
        return num_models, num_epochs, initial_lr, other_params
    except Exception as e:
        logging.error(f"Error randomizing parameters: {e}")
        return 10, 100, 0.001, {}


In [None]:
# gui/training_gui.py

import tkinter as tk
from tkinter import ttk, messagebox
import queue
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import matplotlib.pyplot as plt
import logging


class TrainingGUI:
    """
    A Tkinter-based GUI that displays real-time training progress, including model metrics,
    learning rate adjustments, ensemble accuracy, and allows querying the LLM for explanations.
    """

    def __init__(self, root, total_models, total_epochs, redis_manager, llm):
        """
        Initializes the TrainingGUI.

        Args:
            root (tk.Tk): The root Tkinter window.
            total_models (int): Total number of models to train.
            total_epochs (int): Total number of epochs per model.
            redis_manager (RedisManager): Instance for interacting with Redis.
            llm (StrategyLLM): Instance of the language model for generating explanations.
        """
        self.root = root
        self.queue = queue.Queue()
        self.redis_manager = redis_manager
        self.llm = llm
        self.root.title("Model Training Progress Tracker")

        # Initialize GUI Components
        self._init_labels(total_models, total_epochs)
        self._init_progress_bar()
        self._init_plots()
        self._init_ensemble_accuracy()
        self._init_query_section()

        # Start real-time queue processing
        self.root.after(100, self.process_queue)

    def _init_labels(self, total_models, total_epochs):
        """Initialize the labels to display real-time metrics."""
        self.model_label = tk.Label(self.root, text=f"Training Model: 0/{total_models}", font=("Helvetica", 14))
        self.model_label.pack(pady=5)

        self.epoch_label = tk.Label(self.root, text=f"Epoch: 0/{total_epochs}", font=("Helvetica", 14))
        self.epoch_label.pack(pady=5)

        self.loss_label = tk.Label(self.root, text="Loss: 0.0000", font=("Helvetica", 12))
        self.loss_label.pack(pady=2)

        self.accuracy_label = tk.Label(self.root, text="Accuracy: 0.0000", font=("Helvetica", 12))
        self.accuracy_label.pack(pady=2)

        self.val_loss_label = tk.Label(self.root, text="Validation Loss: 0.0000", font=("Helvetica", 12))
        self.val_loss_label.pack(pady=2)

        self.val_accuracy_label = tk.Label(self.root, text="Validation Accuracy: 0.0000", font=("Helvetica", 12))
        self.val_accuracy_label.pack(pady=2)

        self.lr_label = tk.Label(self.root, text="Learning Rate: 0.000000", font=("Helvetica", 12))
        self.lr_label.pack(pady=2)

    def _init_progress_bar(self):
        """Initialize the progress bar."""
        self.progress_bar = ttk.Progressbar(self.root, orient="horizontal", length=400, mode="determinate")
        self.progress_bar.pack(pady=10)

    def _init_plots(self):
        """Initialize the real-time plots."""
        self.fig, self.ax = plt.subplots(figsize=(6, 4))
        self.line_loss, = self.ax.plot([], [], label='Training Loss', color='blue')
        self.line_val_loss, = self.ax.plot([], [], label='Validation Loss', color='orange')
        self.line_acc, = self.ax.plot([], [], label='Training Accuracy', color='green')
        self.line_val_acc, = self.ax.plot([], [], label='Validation Accuracy', color='red')

        self.ax.set_xlabel('Epochs')
        self.ax.set_ylabel('Metrics')
        self.ax.legend()
        self.ax.grid(True)

        self.canvas = FigureCanvasTkAgg(self.fig, master=self.root)
        self.canvas.draw()
        self.canvas.get_tk_widget().pack()

        # Data storage for plotting
        self.loss_data = []
        self.val_loss_data = []
        self.acc_data = []
        self.val_acc_data = []

    def _init_ensemble_accuracy(self):
        """Initialize the ensemble accuracy display."""
        self.ensemble_label = tk.Label(self.root, text="Ensemble Accuracy: N/A", font=("Helvetica", 14))
        self.ensemble_label.pack(pady=5)

    def _init_query_section(self):
        """Initialize the section for querying the LLM for explanations."""
        self.query_frame = tk.Frame(self.root)
        self.query_frame.pack(pady=10)

        self.query_label = tk.Label(self.query_frame, text="LLM Query:", font=("Helvetica", 12))
        self.query_label.pack(side=tk.LEFT, padx=5)

        self.query_entry = tk.Entry(self.query_frame, width=50)
        self.query_entry.pack(side=tk.LEFT, padx=5)

        self.query_button = tk.Button(self.query_frame, text="Ask", command=self.handle_query)
        self.query_button.pack(side=tk.LEFT, padx=5)

        self.response_text = tk.Text(self.root, height=10, width=80, state='disabled')
        self.response_text.pack(pady=5)

    def process_queue(self):
        """Process the queue for thread-safe GUI updates."""
        while not self.queue.empty():
            message = self.queue.get()
            if isinstance(message, dict):
                self._handle_message(message)
        self.root.after(100, self.process_queue)  # Schedule next update

    def _handle_message(self, message):
        """Handle incoming messages from the queue."""
        msg_type = message.get('type')
        if msg_type == 'epoch':
            self.update_epoch(message)
        elif msg_type == 'ensemble_accuracy':
            self.update_ensemble_accuracy(message.get('accuracy'))
        elif msg_type == 'training_completed':
            self.model_label.config(text="Training Completed")
            messagebox.showinfo("Training Completed", "All models have been trained successfully!")

    def update_epoch(self, data):
        """Update the GUI with epoch metrics."""
        self._update_labels(data)
        self._update_progress_bar(data['epoch'], data['total_epochs'])
        self._update_plots(data)

    def update_ensemble_accuracy(self, accuracy):
        """Update the ensemble accuracy label."""
        self.ensemble_label.config(text=f"Ensemble Accuracy: {accuracy:.4f}")

    def _update_labels(self, data):
        """Update the labels with new data."""
        self.model_label.config(text=f"Training Model: {data['model_num']}/{data['total_models']}")
        self.epoch_label.config(text=f"Epoch: {data['epoch']}/{data['total_epochs']}")
        self.loss_label.config(text=f"Loss: {data['loss']:.4f}")
        self.accuracy_label.config(text=f"Accuracy: {data['accuracy']:.4f}")
        self.val_loss_label.config(text=f"Validation Loss: {data['val_loss']:.4f}")
        self.val_accuracy_label.config(text=f"Validation Accuracy: {data['val_accuracy']:.4f}")
        self.lr_label.config(text=f"Learning Rate: {data['lr']:.6f}")

    def _update_progress_bar(self, epoch, total_epochs):
        """Update the progress bar."""
        self.progress_bar["value"] = (epoch / total_epochs) * 100
        self.root.update_idletasks()

    def _update_plots(self, data):
        """Update the real-time plots with new metrics."""
        self.loss_data.append(data['loss'])
        self.val_loss_data.append(data['val_loss'])
        self.acc_data.append(data['accuracy'])
        self.val_acc_data.append(data['val_accuracy'])

        self.ax.clear()
        self.ax.plot(range(1, len(self.loss_data)+1), self.loss_data, label='Training Loss', color='blue')
        self.ax.plot(range(1, len(self.val_loss_data)+1), self.val_loss_data, label='Validation Loss', color='orange')
        self.ax.plot(range(1, len(self.acc_data)+1), self.acc_data, label='Training Accuracy', color='green')
        self.ax.plot(range(1, len(self.val_acc_data)+1), self.val_acc_data, label='Validation Accuracy', color='red')
        self.ax.set_xlabel('Epochs')
        self.ax.set_ylabel('Metrics')
        self.ax.legend()
        self.ax.grid(True)
        self.canvas.draw()

    def handle_query(self):
        """Handle user queries to the LLM."""
        query = self.query_entry.get()
        if not query:
            messagebox.showwarning("Input Needed", "Please enter a query.")
            return

        # Retrieve current metrics from Redis
        current_metrics = self.redis_manager.get_value('current_metrics')
        if not current_metrics:
            messagebox.showwarning("No Data", "No current metrics available for explanation.")
            return

        # Generate explanation using LLM
        explanation = self.llm.explain_prediction(
            model_num=current_metrics.get('model_num', 0),
            epoch=current_metrics.get('epoch', 0),
            input_data=None,       # Optionally, pass actual input data if available
            prediction=None,      # Optionally, pass actual prediction if available
            actual_label=None     # Optionally, pass actual label if available
        )

        # Display the explanation
        response = f"Query: {query}\nExplanation: {explanation}\n"
        self.response_text.config(state='normal')
        self.response_text.insert(tk.END, response + "\n")
        self.response_text.config(state='disabled')
        self.query_entry.delete(0, tk.END)


In [None]:
# training/trainer.py

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
import logging
from queue import Queue
import threading
import time
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, log_loss
from sklearn.preprocessing import StandardScaler




# Ensure all models are imported in models/__init__.py for easy access

class Trainer:
    """
    Manages the training process for multiple models, interacts with the GUI,
    handles hyperparameter tuning, evaluates models, and creates an ensemble.
    """
    
    def __init__(self, 
                 train_loader: DataLoader, 
                 eval_loader: DataLoader, 
                 total_models: int, 
                 total_epochs: int, 
                 initial_lr: float, 
                 device: torch.device,
                 redis_manager: RedisManager,
                 llm: StrategyLLM,
                 gui: TrainingGUI):
        """
        Initializes the Trainer with necessary components.

        Args:
            train_loader (DataLoader): DataLoader for training data.
            eval_loader (DataLoader): DataLoader for evaluation data.
            total_models (int): Total number of models to train.
            total_epochs (int): Number of epochs per model.
            initial_lr (float): Initial learning rate.
            device (torch.device): Device to perform training on.
            redis_manager (RedisManager): Instance for interacting with Redis.
            llm (StrategyLLM): Instance of the language model for explanations.
            gui (TrainingGUI): Instance of the GUI for updates.
        """
        self.train_loader = train_loader
        self.eval_loader = eval_loader
        self.total_models = total_models
        self.total_epochs = total_epochs
        self.initial_lr = initial_lr
        self.device = device
        self.redis_manager = redis_manager
        self.llm = llm
        self.gui = gui
        self.models = []
        self.ensemble_model = None
        self.scaler = None
        self.tuner = HyperparameterTuner(initial_lr=initial_lr)
        self.lock = threading.Lock()  # To manage access to shared resources

    def get_model(self, model_num: int):
        """
        Dynamically initializes a model based on the model number.

        Args:
            model_num (int): The current model number.

        Returns:
            nn.Module: An instance of the selected model.
        """
        # Example strategy: cycle through different model architectures
        architectures = [
            MLPMagician,
            CNNMagician,
            ResNetMagician,
            VisionTransformerMagician,
            DNNMagician,
            RNNMagician
        ]
        architecture = architectures[model_num % len(architectures)]
        
        if architecture == MLPMagician:
            model = MLPMagician(input_size=784, hidden_sizes=[256, 128, 64], dropout_rate=0.5)
        elif architecture == CNNMagician:
            model = CNNMagician(dropout_rate=0.5, num_classes=10)
        elif architecture == ResNetMagician:
            model = ResNetMagician(num_classes=10)
        elif architecture == VisionTransformerMagician:
            model = VisionTransformerMagician(num_classes=10, pretrained=True)
        elif architecture == DNNMagician:
            model = DNNMagician(input_size=784, hidden_sizes=[512, 256, 128], dropout_rate=0.5)
        elif architecture == RNNMagician:
            model = RNNMagician(input_size=10, hidden_size=50, num_layers=2, output_size=10, dropout_rate=0.3)
        else:
            raise ValueError(f"Unsupported architecture: {architecture}")
        
        model.to(self.device)
        logging.info(f"Initialized {architecture.__name__} for Model {model_num}.")
        return model

    def train(self):
        """
        Starts the training process in a separate thread to keep the GUI responsive.
        """
        training_thread = threading.Thread(target=self._training_loop, daemon=True)
        training_thread.start()

    def _training_loop(self):
        """
        The main training loop that iterates over the number of models and epochs.
        """
        for model_num in range(1, self.total_models + 1):
            try:
                model = self.get_model(model_num)
                self.models.append(model)
                optimizer = optim.AdamW(model.parameters(), lr=self.initial_lr, weight_decay=1e-4)
                scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)
                criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
                scaler = torch.cuda.amp.GradScaler() if self.device.type == 'cuda' else None

                best_val_loss = float('inf')
                best_val_accuracy = 0.0

                for epoch in range(1, self.total_epochs + 1):
                    model.train()
                    running_loss = 0.0
                    correct = 0
                    total = 0

                    for batch_idx, (inputs, labels) in enumerate(self.train_loader):
                        inputs, labels = inputs.to(self.device), labels.to(self.device)
                        
                        optimizer.zero_grad()
                        
                        with torch.cuda.amp.autocast(enabled=self.device.type == 'cuda'):
                            outputs = model(inputs)
                            loss = criterion(outputs, labels)
                        
                        if scaler:
                            scaler.scale(loss).backward()
                            scaler.step(optimizer)
                            scaler.update()
                        else:
                            loss.backward()
                            optimizer.step()
                        
                        running_loss += loss.item() * inputs.size(0)
                        _, predicted = torch.max(outputs.data, 1)
                        total += labels.size(0)
                        correct += (predicted == labels).sum().item()
                    
                    epoch_loss = running_loss / len(self.train_loader.dataset)
                    epoch_acc = correct / total

                    val_loss, val_acc = self.evaluate(model, criterion)

                    scheduler.step(val_loss)
                    self.tuner.adjust_learning_rate(optimizer, val_loss)

                    # Update Redis with current metrics
                    current_metrics = {
                        'model_num': model_num,
                        'epoch': epoch,
                        'total_models': self.total_models,
                        'total_epochs': self.total_epochs,
                        'loss': epoch_loss,
                        'accuracy': epoch_acc,
                        'val_loss': val_loss,
                        'val_accuracy': val_acc,
                        'lr': optimizer.param_groups[0]['lr']
                    }
                    self.redis_manager.set_value('current_metrics', current_metrics)

                    # Send update to GUI
                    self.gui.queue.put({
                        'type': 'epoch',
                        'model_num': model_num,
                        'total_models': self.total_models,
                        'epoch': epoch,
                        'total_epochs': self.total_epochs,
                        'loss': epoch_loss,
                        'accuracy': epoch_acc,
                        'val_loss': val_loss,
                        'val_accuracy': val_acc,
                        'lr': optimizer.param_groups[0]['lr']
                    })

                    logging.info(f"Model {model_num}, Epoch {epoch}/{self.total_epochs} - "
                                 f"Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}, "
                                 f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}, "
                                 f"LR: {optimizer.param_groups[0]['lr']:.6f}")

                # Save model checkpoint
                checkpoint_path = f'models/model_{model_num}.pth'
                torch.save(model.state_dict(), checkpoint_path)
                logging.info(f"Saved Model {model_num} at '{checkpoint_path}'.")

            except Exception as e:
                logging.error(f"Error training Model {model_num}: {e}")
                continue

        # After training all models, create an ensemble
        self.create_ensemble()

        # Notify GUI that training is completed
        self.gui.queue.put({'type': 'training_completed'})

    def evaluate(self, model: nn.Module, criterion: nn.Module):
        """
        Evaluates the model on the validation dataset.

        Args:
            model (nn.Module): The trained model.
            criterion (nn.Module): The loss function.

        Returns:
            tuple: (validation_loss, validation_accuracy)
        """
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, labels in self.eval_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        avg_val_loss = val_loss / len(self.eval_loader.dataset)
        val_accuracy = correct / total

        logging.info(f"Validation - Loss: {avg_val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

        return avg_val_loss, val_accuracy

    def create_ensemble(self):
        """
        Creates an ensemble model using a Random Forest classifier based on the trained models' outputs.
        """
        try:
            # Collect features and labels from training data
            all_features = []
            all_labels = []
            for model in self.models:
                model.eval()
                features = []
                labels = []
                with torch.no_grad():
                    for inputs, lbls in self.train_loader:
                        inputs = inputs.to(self.device)
                        outputs = model(inputs)
                        features.append(outputs.cpu().numpy())
                        labels.append(lbls.numpy())
                all_features.append(np.vstack(features))
                all_labels.append(np.hstack(labels))
            
            # Concatenate features from all models
            combined_features = np.hstack(all_features)
            combined_labels = all_labels[0]  # Assuming all models have the same labels

            # Scale features
            scaler = StandardScaler()
            scaled_features = scaler.fit_transform(combined_features)

            # Train Random Forest ensemble
            rf = RandomForestClassifier(n_estimators=300, max_depth=12, random_state=42, n_jobs=-1)
            rf.fit(scaled_features, combined_labels)
            self.ensemble_model = rf
            self.scaler = scaler

            # Save ensemble model
            ensemble_path = 'ensemble_random_forest.pkl'
            joblib.dump((rf, scaler), ensemble_path)
            logging.info(f"Saved ensemble model at '{ensemble_path}'.")

            # Evaluate ensemble on validation data
            val_loss, val_accuracy = self.evaluate_ensemble()
            self.gui.queue.put({'type': 'ensemble_accuracy', 'accuracy': val_accuracy})

        except Exception as e:
            logging.error(f"Error creating ensemble: {e}")

    def evaluate_ensemble(self):
        """
        Evaluates the ensemble model on the validation dataset.

        Returns:
            tuple: (validation_loss, validation_accuracy)
        """
        if not self.ensemble_model or not self.scaler:
            logging.error("Ensemble model or scaler not found.")
            return float('inf'), 0.0

        all_features = []
        all_labels = []
        for model in self.models:
            model.eval()
            features = []
            labels = []
            with torch.no_grad():
                for inputs, lbls in self.eval_loader:
                    inputs = inputs.to(self.device)
                    outputs = model(inputs)
                    features.append(outputs.cpu().numpy())
                    labels.append(lbls.numpy())
            all_features.append(np.vstack(features))
            all_labels.append(np.hstack(labels))
        
        # Concatenate features from all models
        combined_features = np.hstack(all_features)
        combined_labels = all_labels[0]  # Assuming all models have the same labels

        # Scale features
        scaled_features = self.scaler.transform(combined_features)

        # Predict with Random Forest
        predictions = self.ensemble_model.predict(scaled_features)
        prediction_probs = self.ensemble_model.predict_proba(scaled_features)

        # Calculate metrics
        accuracy = accuracy_score(combined_labels, predictions)
        loss = log_loss(combined_labels, prediction_probs)

        logging.info(f"Ensemble Validation - Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

        return loss, accuracy



In [None]:
# utils/__init__.py

from .logging_setup import initialize_logging
from .redis_manager import RedisManager
from .plot_utils import plot_training_metrics
from .weight_initialization import initialize_weights
from .hyperparameter_tuner import HyperparameterTuner
from .hyperparameter_helper import randomize_params

__all__ = [
    'initialize_logging',
    'RedisManager',
    'plot_training_metrics',
    'initialize_weights',
    'HyperparameterTuner',
    'randomize_params'
]


In [None]:
# models/__init__.py

from .mlp_magician import MLPMagician
from .cnn_magician import CNNMagician
from .resnet_magician import ResNetMagician
from .vision_transformer_magician import VisionTransformerMagician
from .dnn_magician import DNNMagician
from .rnn_magician import RNNMagician

__all__ = [
    'MLPMagician',
    'CNNMagician',
    'ResNetMagician',
    'VisionTransformerMagician',
    'DNNMagician',
    'RNNMagician'
]


In [None]:
# main.py

import tkinter as tk
from utils.logging_setup import initialize_logging
from utils.redis_manager import RedisManager
from utils.strategy_llm import StrategyLLM
from gui.training_gui import TrainingGUI
from data.data_loader import load_arc_data, prepare_training_data, prepare_evaluation_data
from training.trainer import Trainer
from utils.hyperparameter_helper import randomize_params
import torch

def main():
    # Initialize logging
    initialize_logging()

    # Initialize Redis Manager
    redis_manager = RedisManager(host='localhost', port=6379, db=0)

    # Initialize Strategy LLM
    llm = StrategyLLM(model_name='gpt2')  # Adjust model name as needed

    # Load ARC data
    arc_data = load_arc_data()
    if arc_data is None:
        print("Failed to load ARC data.")
        return

    # Prepare data loaders
    train_loader = prepare_training_data(
        arc_data=arc_data, 
        batch_size=32, 
        shuffle=True, 
        augment=True, 
        seed_manager=None  # Initialize SeedManager if needed
    )
    eval_loader = prepare_evaluation_data(
        arc_data=arc_data, 
        batch_size=32, 
        shuffle=False
    )

    # Randomize hyperparameters
    num_models, num_epochs, initial_lr, _ = randomize_params()

    # Determine device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Initialize GUI
    root = tk.Tk()
    gui = TrainingGUI(
        root=root, 
        total_models=num_models, 
        total_epochs=num_epochs, 
        redis_manager=redis_manager, 
        llm=llm
    )

    # Initialize Trainer
    trainer = Trainer(
        train_loader=train_loader,
        eval_loader=eval_loader,
        total_models=num_models,
        total_epochs=num_epochs,
        initial_lr=initial_lr,
        device=device,
        redis_manager=redis_manager,
        llm=llm,
        gui=gui
    )

    # Start training
    trainer.train()

    # Start the Tkinter main loop
    root.mainloop()

if __name__ == "__main__":
    main()


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import random

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class CNNGridExtractor(nn.Module):
    """Extract spatial features from grid patterns using CNN."""
    def __init__(self):
        super(CNNGridExtractor, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc = nn.Linear(64 * 15 * 15, 256)  # Adjust for 30x30 grid

    def forward(self, x):
        x = F.relu(self.pool(self.conv1(x)))
        x = F.relu(self.pool(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten
        features = F.relu(self.fc(x))
        return features

class RNNLearner(nn.Module):
    """Learns temporal dependencies across iterations."""
    def __init__(self, input_size=256, hidden_size=128, num_layers=2):
        super(RNNLearner, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 64)

    def forward(self, x, hidden_state):
        out, hidden_state = self.lstm(x, hidden_state)
        output = self.fc(out[:, -1, :])  # Use last hidden state output
        return output, hidden_state

class HybridGridNetwork(nn.Module):
    """Combines CNN and RNN to learn grid-based patterns."""
    def __init__(self):
        super(HybridGridNetwork, self).__init__()
        self.cnn = CNNGridExtractor()
        self.rnn = RNNLearner()
        self.fc = nn.Linear(64, 10)  # Example output layer (10 classes)

    def forward(self, x, hidden_state):
        features = self.cnn(x)
        features = features.unsqueeze(1)  # Add batch dimension for LSTM
        rnn_output, hidden_state = self.rnn(features, hidden_state)
        output = self.fc(rnn_output)
        return output, hidden_state

def initialize_hidden(batch_size, hidden_size=128, num_layers=2):
    """Initialize LSTM hidden state."""
    return (
        torch.zeros(num_layers, batch_size, hidden_size).to(device),
        torch.zeros(num_layers, batch_size, hidden_size).to(device)
    )

def preprocess_grid(grid):
    """Convert grid to tensor and normalize it."""
    grid_tensor = torch.tensor(grid, dtype=torch.float32).unsqueeze(0)  # Add channel dimension
    return grid_tensor / 10.0  # Normalize values

def compute_reward(predicted, target):
    """Compute reward based on MSE difference."""
    return 1.0 - F.mse_loss(predicted, target).item()

def train_hybrid_model(
    model, train_loader, eval_loader, num_epochs, optimizer, criterion, device
):
    """Train the hybrid model using reinforcement learning and SGD."""
    hidden_state = initialize_hidden(train_loader.batch_size)
    rewards = []

    for epoch in range(num_epochs):
        model.train()
        total_loss, total_reward = 0, 0

        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()

            outputs, hidden_state = model(inputs, hidden_state)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            reward = compute_reward(outputs, targets)
            total_reward += reward
            total_loss += loss.item()

        val_loss, val_accuracy = evaluate_model(model, eval_loader, criterion, device)
        print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}, Reward: {total_reward:.4f}, Val Acc: {val_accuracy:.4f}")

    return model

def evaluate_model(model, eval_loader, criterion, device):
    """Evaluate the model on validation data."""
    model.eval()
    total_loss, correct_predictions, total_samples = 0, 0, 0
    hidden_state = initialize_hidden(eval_loader.batch_size)

    with torch.no_grad():
        for inputs, targets in eval_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs, _ = model(inputs, hidden_state)
            loss = criterion(outputs, targets)
            total_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == targets).sum().item()
            total_samples += targets.size(0)

    avg_loss = total_loss / len(eval_loader)
    accuracy = correct_predictions / total_samples
    return avg_loss, accuracy

# Example usage
grid_data = np.random.randint(0, 10, (100, 30, 30))  # Generate random grid data
grid_labels = np.random.randint(0, 10, (100,))  # Random labels

# Prepare DataLoader
grids = torch.stack([preprocess_grid(grid) for grid in grid_data])
labels = torch.tensor(grid_labels)
dataset = TensorDataset(grids, labels)
train_loader = DataLoader(dataset, batch_size=8, shuffle=True)
eval_loader = DataLoader(dataset, batch_size=8)

model = HybridGridNetwork().to(device)
optimizer = optim.AdamW(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

# Train the model
train_hybrid_model(model, train_loader, eval_loader, num_epochs=500, optimizer=optimizer, criterion=criterion, device=device)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import random
from sklearn.ensemble import RandomForestClassifier
import numpy as np

class CNNExtractor(nn.Module):
    """Extract visual features from image data using CNN."""
    def __init__(self):
        super(CNNExtractor, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc = nn.Linear(128 * 15 * 15, 512)  # Assuming 30x30 input size

    def forward(self, x):
        x = F.relu(self.pool(self.conv1(x)))
        x = F.relu(self.pool(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten
        features = F.relu(self.fc(x))
        return features

class RNNLearner(nn.Module):
    """Learns long-term dependencies from extracted features."""
    def __init__(self, input_size=512, hidden_size=256, num_layers=2):
        super(RNNLearner, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 128)  # Output for reinforcement learning

    def forward(self, x, hidden_state):
        out, hidden_state = self.lstm(x, hidden_state)
        output = self.fc(out[:, -1, :])  # Use the last hidden state
        return output, hidden_state

class HybridNetwork(nn.Module):
    """Combines CNN and RNN for visual data learning."""
    def __init__(self):
        super(HybridNetwork, self).__init__()
        self.cnn = CNNExtractor()
        self.rnn = RNNLearner()
        self.fc = nn.Linear(128, 10)  # Example output layer (10 classes)

    def forward(self, x, hidden_state):
        features = self.cnn(x)
        features = features.unsqueeze(1)  # Add batch dimension for LSTM
        rnn_output, hidden_state = self.rnn(features, hidden_state)
        output = self.fc(rnn_output)
        return output, hidden_state

def initialize_hidden(batch_size, hidden_size=256, num_layers=2):
    """Initialize LSTM hidden state."""
    return (
        torch.zeros(num_layers, batch_size, hidden_size).to(device),
        torch.zeros(num_layers, batch_size, hidden_size).to(device)
    )

def compute_reward(predicted, target):
    """Compute reward based on accuracy."""
    return 1.0 - F.mse_loss(predicted, target).item()  # Reward is inverse of MSE

def train_hybrid_model(
    model, train_loader, eval_loader, num_epochs, optimizer, criterion, device
):
    """Train the hybrid model using reinforcement learning and SGD."""
    hidden_state = initialize_hidden(train_loader.batch_size)
    rewards = []

    for epoch in range(num_epochs):
        model.train()
        total_loss, total_reward = 0, 0

        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()

            # Forward pass
            outputs, hidden_state = model(inputs, hidden_state)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            # Compute reward and update long-term memory
            reward = compute_reward(outputs, targets)
            total_reward += reward

            total_loss += loss.item()

        # Evaluate on validation set
        val_loss, val_accuracy = evaluate_model(model, eval_loader, criterion, device)

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss}, Reward: {total_reward}, Val Acc: {val_accuracy}")

    return model

def evaluate_model(model, eval_loader, criterion, device):
    """Evaluate model on validation set."""
    model.eval()
    total_loss, correct_predictions, total_samples = 0, 0, 0

    with torch.no_grad():
        hidden_state = initialize_hidden(eval_loader.batch_size)
        for inputs, targets in eval_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs, _ = model(inputs, hidden_state)
            loss = criterion(outputs, targets)
            total_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == targets).sum().item()
            total_samples += targets.size(0)

    accuracy = correct_predictions / total_samples
    avg_loss = total_loss / len(eval_loader)
    return avg_loss, accuracy

# Usage example:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = HybridNetwork().to(device)
optimizer = optim.AdamW(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

# Train and evaluate the model
num_epochs = 500
train_hybrid_model(model, train_loader, eval_loader, num_epochs, optimizer, criterion, device)


In [None]:
# data/data_conversion.py

import os
import random
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import torch
from torchvision import transforms
import logging

# Configure logging for the data_conversion module
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def grid_to_image(grid, color_map):
    """
    Converts a numerical grid into a colored image based on the provided color map.

    Args:
        grid (np.ndarray): 2D array representing the grid values.
        color_map (dict): Dictionary mapping grid values to RGB colors.

    Returns:
        PIL.Image: Colored image representation of the grid.
    """
    height, width = grid.shape
    img_array = np.zeros((height, width, 3), dtype=np.uint8)

    for i in range(height):
        for j in range(width):
            img_array[i, j] = color_map.get(grid[i, j], [255, 255, 255])  # Default to white

    return Image.fromarray(img_array)


def grid_to_grayscale(grid):
    """
    Converts a numerical grid into a grayscale image.

    Args:
        grid (np.ndarray): 2D array representing the grid values.

    Returns:
        PIL.Image: Grayscale image representation of the grid.
    """
    height, width = grid.shape
    # Normalize grid values to 0-255
    normalized_grid = (grid - grid.min()) / (grid.max() - grid.min()) * 255
    grayscale_array = normalized_grid.astype(np.uint8)
    return Image.fromarray(grayscale_array, mode='L')


def grid_to_numeric_image(grid, font_path=None, font_size=12):
    """
    Converts a numerical grid into an image with numbers overlaid on a blank background.

    Args:
        grid (np.ndarray): 2D array representing the grid values.
        font_path (str, optional): Path to a .ttf font file. Defaults to None.
        font_size (int, optional): Font size for the numbers. Defaults to 12.

    Returns:
        PIL.Image: Image with numbers overlaid on the grid.
    """
    height, width = grid.shape
    cell_size = 50  # Pixels
    img_width = width * cell_size
    img_height = height * cell_size

    image = Image.new('RGB', (img_width, img_height), color='white')
    draw = ImageDraw.Draw(image)

    # Load a font
    if font_path and os.path.exists(font_path):
        font = ImageFont.truetype(font_path, font_size)
    else:
        font = ImageFont.load_default()

    for i in range(height):
        for j in range(width):
            top_left = (j * cell_size, i * cell_size)
            bottom_right = ((j + 1) * cell_size, (i + 1) * cell_size)
            draw.rectangle([top_left, bottom_right], outline='black', fill='white')

            # Overlay the number
            number = str(grid[i, j])
            text_width, text_height = draw.textsize(number, font=font)
            text_x = top_left[0] + (cell_size - text_width) / 2
            text_y = top_left[1] + (cell_size - text_height) / 2
            draw.text((text_x, text_y), number, fill='black', font=font)

    return image


# data/data_conversion.py (continued)

def augment_image_with_transforms(image, grid, perturb_prob=0.1, dead_square_prob=0.05, noise_prob=0.05):
    """
    Applies random perturbations, dead squares, noise, and additional transformations to the image.

    Args:
        image (PIL.Image): The original image to augment.
        grid (np.ndarray): The original grid data.
        perturb_prob (float): Probability of perturbing a cell.
        dead_square_prob (float): Probability of marking a cell as dead.
        noise_prob (float): Probability of adding noise to a cell.

    Returns:
        PIL.Image: Augmented image.
        list: List of dead squares as (i, j) tuples.
    """
    augmented_grid = grid.copy()
    height, width = grid.shape
    dead_squares = []

    for i in range(height):
        for j in range(width):
            rand_val = random.random()
            if rand_val < perturb_prob:
                # Introduce random perturbation
                original_value = augmented_grid[i, j]
                augmented_grid[i, j] = random.randint(0, augmented_grid.max())
                logger.debug(f"Perturbed cell ({i}, {j}) from {original_value} to {augmented_grid[i, j]}")

            if rand_val < dead_square_prob:
                # Mark square as dead
                dead_squares.append((i, j))
                logger.debug(f"Marked cell ({i}, {j}) as dead.")

            if rand_val < noise_prob:
                # Add random noise by altering the pixel color slightly
                pixels = image.load()
                current_color = pixels[j, i]
                noise = np.random.randint(-30, 31, size=3)
                noisy_color = np.clip(np.array(current_color) + noise, 0, 255)
                pixels[j, i] = tuple(noisy_color)
                logger.debug(f"Added noise to cell ({i}, {j}): {current_color} -> {pixels[j, i]}")

    # Apply additional transformations
    transform = transforms.Compose([
        transforms.RandomRotation(15),          # Rotate image by ±15 degrees
        transforms.RandomHorizontalFlip(p=0.5), # Flip image horizontally with 50% probability
        transforms.RandomVerticalFlip(p=0.5),   # Flip image vertically with 50% probability
    ])
    augmented_image = grid_to_image(augmented_grid, color_map={
        0: [0, 0, 0],        # Black
        1: [255, 0, 0],      # Red
        2: [0, 255, 0],      # Green
        3: [0, 0, 255],      # Blue
        4: [255, 255, 0],    # Yellow
        5: [255, 165, 0],    # Orange
        6: [128, 0, 128],    # Purple
        7: [0, 255, 255],    # Cyan
        8: [255, 192, 203],  # Pink
        9: [128, 128, 128],  # Gray
        10: [255, 255, 255], # White
        # Add more mappings as needed
    })
    augmented_image = transform(augmented_image)

    return augmented_image, dead_squares


class GridDataset(Dataset):
    """
    Custom Dataset for handling grid-based image data.
    """

    def __init__(self, grids, transform=None, augmentation=True):
        """
        Initializes the GridDataset.

        Args:
            grids (list of np.ndarray): List of 2D grid arrays.
            transform (callable, optional): Transformations to apply to images. Defaults to None.
            augmentation (bool, optional): Whether to apply data augmentation. Defaults to True.
        """
        self.grids = grids
        self.transform = transform
        self.augmentation = augmentation

    def __len__(self):
        return len(self.grids)

    def __getitem__(self, idx):
        grid = self.grids[idx]
        # Generate images
        color_image = grid_to_image(grid, color_map={
            0: [0, 0, 0],        # Black
            1: [255, 0, 0],      # Red
            2: [0, 255, 0],      # Green
            3: [0, 0, 255],      # Blue
            4: [255, 255, 0],    # Yellow
            5: [255, 165, 0],    # Orange
            6: [128, 0, 128],    # Purple
            7: [0, 255, 255],    # Cyan
            8: [255, 192, 203],  # Pink
            9: [128, 128, 128],  # Gray
            10: [255, 255, 255], # White
            # Add more mappings as needed
        })
        grayscale_image = grid_to_grayscale(grid)
        numeric_image = grid_to_numeric_image(grid)
    
        # Apply augmentation
        if self.augmentation:
            color_image_aug, dead_squares = augment_image_with_transforms(color_image, grid)
            grayscale_image_aug, _ = augment_image_with_transforms(grayscale_image.convert('RGB'), grid)
            numeric_image_aug, _ = augment_image_with_transforms(numeric_image, grid)
        else:
            color_image_aug = color_image
            grayscale_image_aug = grayscale_image
            numeric_image_aug = numeric_image
    
        # Apply transformations
        if self.transform:
            color_image_aug = self.transform(color_image_aug)
            grayscale_image_aug = self.transform(grayscale_image_aug)
            numeric_image_aug = self.transform(numeric_image_aug)
    
        return {
            'color_image': color_image_aug,
            'grayscale_image': grayscale_image_aug,
            'numeric_image': numeric_image_aug,
            'grid': torch.tensor(grid, dtype=torch.long)
        }


def generate_grids(num_grids, grid_size, num_classes):
    """
    Generates random grids for training.

    Args:
        num_grids (int): Number of grids to generate.
        grid_size (int): Size of each grid (grid_size x grid_size).
        num_classes (int): Number of classes/colors.

    Returns:
        list of np.ndarray: Generated grids.
    """
    grids = []
    for _ in range(num_grids):
        grid = np.random.randint(0, num_classes, size=(grid_size, grid_size))
        grids.append(grid)
    logger.info(f"Generated {num_grids} grids of size {grid_size}x{grid_size}.")
    return grids


def get_data_loaders(batch_size=32, grid_size=10, num_classes=11, augment=True):
    """
    Creates DataLoader instances for training and evaluation.

    Args:
        batch_size (int, optional): Number of samples per batch. Defaults to 32.
        grid_size (int, optional): Size of each grid. Defaults to 10.
        num_classes (int, optional): Number of classes/colors. Defaults to 11.
        augment (bool, optional): Whether to apply data augmentation. Defaults to True.

    Returns:
        DataLoader: Training DataLoader.
        DataLoader: Evaluation DataLoader.
    """
    # Generate grids
    train_grids = generate_grids(num_grids=1000, grid_size=grid_size, num_classes=num_classes)
    eval_grids = generate_grids(num_grids=200, grid_size=grid_size, num_classes=num_classes)

    # Define transformations
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
    ])

    # Create datasets
    train_dataset = GridDataset(train_grids, transform=transform, augmentation=augment)
    eval_dataset = GridDataset(eval_grids, transform=transform, augmentation=False)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    eval_loader = DataLoader(eval_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

    logger.info("Created training and evaluation DataLoaders.")
    return train_loader, eval_loader
