<a href="https://colab.research.google.com/github/OneFineStarstuff/State-of-the-Art/blob/main/Neural_Architecture_Search_(NAS).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from copy import deepcopy  # Import deepcopy for safely copying models

class RandomSearchNAS:
    def __init__(self, input_dim, output_dim, max_layers=5):
        """
        Initialize the NAS framework.
        :param input_dim: Number of input channels (e.g., 1 for grayscale images).
        :param output_dim: Number of output classes (e.g., 10 for MNIST).
        :param max_layers: Maximum number of layers in a randomly generated architecture.
        """
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.max_layers = max_layers

    def generate_random_architecture(self, input_dim):
        """
        Generate a random architecture with convolutional layers followed by classification layers.
        :param input_dim: Number of input channels.
        :return: A PyTorch model with the generated architecture.
        """
        layers = []
        for _ in range(torch.randint(1, self.max_layers + 1, (1,)).item()):  # Random number of layers
            layers.append(nn.Conv2d(in_channels=input_dim, out_channels=16, kernel_size=3, padding=1))
            layers.append(nn.BatchNorm2d(16))  # Add BatchNorm for stability
            layers.append(nn.ReLU())
            input_dim = 16  # Update input channels for the next layer
        layers.append(nn.AdaptiveAvgPool2d((1, 1)))  # Global pooling to handle variable input sizes
        layers.append(nn.Flatten())  # Flatten the spatial dimensions
        layers.append(nn.Linear(16, self.output_dim))  # Final classification layer
        return nn.Sequential(*layers)

    def evaluate_architecture(self, model, train_loader, device):
        """
        Train the given model for one epoch and return the training loss.
        :param model: PyTorch model to evaluate.
        :param train_loader: DataLoader for training data.
        :param device: Device (CPU or GPU) to run the training on.
        :return: Average training loss for the model.
        """
        model.to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss()
        model.train()

        total_loss = 0
        for x_batch, y_batch in train_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        return total_loss / len(train_loader)

    def search(self, train_loader, device, n_architectures=10):
        """
        Perform random search to find the best architecture based on training loss.
        :param train_loader: DataLoader for training data.
        :param device: Device (CPU or GPU) to run the training on.
        :param n_architectures: Number of random architectures to try.
        :return: The best-performing model.
        """
        best_model = None
        best_loss = float('inf')
        for _ in range(n_architectures):
            model = self.generate_random_architecture(input_dim=1)  # For MNIST (grayscale images)
            loss = self.evaluate_architecture(model, train_loader, device)
            if loss < best_loss:
                best_loss = loss
                best_model = deepcopy(model)  # Deep copy to prevent overwriting during subsequent training
        return best_model

# Example usage with MNIST dataset
if __name__ == "__main__":
    transform = transforms.Compose([transforms.ToTensor()])
    train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    nas = RandomSearchNAS(input_dim=1, output_dim=10)
    best_model = nas.search(train_loader, device)

    print("Best model structure:")
    print(best_model)