In [86]:
import argparse
import json
import tensorboard
import tensorboardX
import os
import argparse
import json
import os
import numpy as np
import torch
import torch.nn as nn
import nni
from nni.nas.nn.pytorch import ModelSpace, LayerChoice, MutableConv2d, MutableBatchNorm2d, MutableReLU

from nni.nas.evaluator.pytorch import Lightning, ClassificationModule, Trainer
from nni.nas.experiment import NasExperiment
from nni.nas.space import model_context
from nni.nas.hub.pytorch import DARTS
from nni.nas.strategy import DARTS as DartsStrategy
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms
from torchvision.datasets import CIFAR10
from nni.nas.experiment import NasExperiment
from nni.nas.evaluator import FunctionalEvaluator
from nni.nas.evaluator import FunctionalEvaluator
import nni.nas.strategy as strategy
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
#from ops import AvgPool,DilConv,SepConv
import genotypes
from pytorch_lightning.callbacks import ModelCheckpoint
torch.set_float32_matmul_precision('medium')
from tqdm import tqdm
from nni.nas.nn.pytorch import LayerChoice, ModelSpace,ValueChoice
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler
from pytorch_lightning import LightningModule, Trainer
from torchvision import datasets, transforms
from nni.nas.evaluator.pytorch import Classification

## Auxiliary Module

In [60]:
@nni.trace
class AuxLossClassificationModule(ClassificationModule):
    """Several customization for the training of DARTS, based on default Classification."""
    model: DARTS
    def __init__(self,
                 learning_rate: float = 1e-5,
                 weight_decay: float = 0.,
                 auxiliary_loss_weight: float = 0.4,
                 max_epochs: int = 600):
        print(f"lr : {learning_rate}")
        print(f"weight decay: {weight_decay}")
        print(f"aux loss weight: {auxiliary_loss_weight}")
        print(f"max epochs: {max_epochs}")
        super().__init__(learning_rate=learning_rate, weight_decay=weight_decay, num_classes=10)
        self.auxiliary_loss_weight = auxiliary_loss_weight
        self.max_epochs = max_epochs
        self.criterion=  nn.CrossEntropyLoss()

    def configure_optimizers(self):
        """Customized optimizer with momentum, as well as a scheduler."""
        """
        Classification Module params from nni.nas.evaluators
             learning_rate: float
             weight_decay: float
             optimizer: Type[optim.Optimizer]
             export_onnx: bool 
             num_classes: Optional[int] 

        """
        optimizer = torch.optim.Adam(
            self.parameters(),
            lr = 1e-6,
            betas=(0.9, 0.999),  # type: ignore
            eps=1e-07,
            weight_decay= self.auxiliary_loss_weight  # type: ignore
        )
        return {
            'optimizer': optimizer,
            'lr_scheduler': torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, self.max_epochs, eta_min=1e-3)
        }

    def training_step(self, batch, batch_idx):
        """Training step, customized with auxiliary loss."""
        x, y = batch

        # Check for NaNs or infinite values in input
        if torch.isnan(x).any() or torch.isnan(y).any() or torch.isinf(x).any() or torch.isinf(y).any():
            raise ValueError("Input data contains NaNs or Infinities.")

        if self.auxiliary_loss_weight:
            y_hat, y_aux = self(x)
            loss_main = self.criterion(y_hat, y)
            loss_aux = self.criterion(y_aux, y)
            # Check for NaNs in loss values
            if torch.isnan(loss_main).any() or torch.isnan(loss_aux).any():
                raise ValueError("Loss contains NaNs.")
            self.log('train_loss_main', loss_main)
            self.log('train_loss_aux', loss_aux)
            loss = loss_main + self.auxiliary_loss_weight * loss_aux

        else:
            y_hat = self(x)
            loss = self.criterion(y_hat, y)
        self.log('train_loss', loss, prog_bar=True)
        for name, metric in self.metrics.items():
            self.log('train_' + name, metric(y_hat, y), prog_bar=True)
        return loss

    def on_train_epoch_start(self):
        """Set drop path probability before every epoch. This has no effect if drop path is not enabled in model."""
        self.model.set_drop_path_prob(self.model.drop_path_prob * self.current_epoch / self.max_epochs)

        # Logging learning rate at the beginning of every epoch
        self.log('lr', self.trainer.optimizers[0].param_groups[0]['lr'])




## Cifar-10 Dataset

### Random cutout transform

In [61]:
def cutout_transform(img, length: int = 16):
    h, w = img.size(1), img.size(2)
    mask = np.ones((h, w), np.float32)
    y = np.random.randint(h)
    x = np.random.randint(w)

    y1 = np.clip(y - length // 2, 0, h)
    y2 = np.clip(y + length // 2, 0, h)
    x1 = np.clip(x - length // 2, 0, w)
    x2 = np.clip(x + length // 2, 0, w)

    mask[y1: y2, x1: x2] = 0.
    mask = torch.from_numpy(mask)
    mask = mask.expand_as(img)
    img *= mask
    return img




### Load Dataset

In [62]:

def get_cifar10_dataset(train: bool = True, cutout: bool = False):
    CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
    CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]

    if train:
        transform_list = [
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(15),
            transforms.ToTensor(),
            transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
        ]
        if cutout:
            transform_list.append(cutout_transform)
        transform = transforms.Compose(transform_list)
    else:
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
        ])

    dataset = nni.trace(CIFAR10)(root='./data', train=train, download=True, transform=transform)
    
    # Check for NaNs in the dataset
    # for img, label in tqdm(dataset, desc="Checking dataset for NaNs or Infinities"):
    #     if torch.isnan(img).any() or torch.isinf(img).any():
    #         raise ValueError("Dataset contains NaNs or Infinities.")
    
    return dataset

## Darts Model Search

### Model Space

In [92]:

class SepConv(nn.Module):
    def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
        super(SepConv, self).__init__()
        self.op = nn.Sequential(
            MutableConv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding),
            MutableConv2d(C_in, C_in, kernel_size=1, padding=0),
            MutableConv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding),
            MutableConv2d(C_in, C_out, kernel_size=1, padding=0),
            MutableBatchNorm2d(C_out,affine=affine),
            MutableReLU()
        )
    
    def forward(self, x):
        return self.op(x)
        
                
        
class PhotonicSigmoid(nn.Module):
    def forward(self, x):
        tmp = torch.exp((x - 0.145) / 0.073)
        tmp = 1.005 + (0.06 - 1.005) / (1 + tmp)
        return tmp.float()


### Model Kernel 2x2

In [74]:

class CustomDARTSSpaceK2(ModelSpace):
    def __init__(self, input_channels, channels, num_classes, layers):
        super(CustomDARTSSpaceK2, self).__init__()
        #self.first_iter = True
        self.layers = nn.ModuleList()
        self.drop_path_prob = 0.0  
        self.preliminary_layer = nn.Conv2d(3, 8, kernel_size=2, padding=0, bias=False)
        
        layer1 = LayerChoice([
            SepConv(8, ValueChoice([12, 14, 16]), kernel_size=3, stride=1, padding=0),   
            nn.Sequential(
                nn.AvgPool2d(kernel_size=3, stride=1, padding=0),
                nn.Conv2d(8, ValueChoice([12, 14, 16]), kernel_size=1)
            ),   
            nn.Sequential(
                nn.Conv2d(8, ValueChoice([12, 14, 16]), kernel_size=1),
                nn.AvgPool2d(kernel_size=3, stride=1, padding=0)
            ) 
        ], label='layer_1')
        self.layers.append(layer1)
        
        layer2 = LayerChoice([
            SepConv(ValueChoice([12, 14, 16]), ValueChoice([16, 18, 20]), kernel_size=3, stride=1, padding=0),   
            nn.Sequential(
                nn.AvgPool2d(kernel_size=3, stride=1, padding=0),
                nn.Conv2d(ValueChoice([12, 14, 16]), ValueChoice([16, 18, 20]), kernel_size=1)
            ),   
            nn.Sequential(
                nn.Conv2d(ValueChoice([12, 14, 16]), ValueChoice([16, 18, 20]), kernel_size=1),
                nn.AvgPool2d(kernel_size=3, stride=1, padding=0)
            )         
        ], label='layer_2')
        self.layers.append(layer2)
        
        layer3 = LayerChoice([
            SepConv(ValueChoice([16, 18, 20]), ValueChoice([20, 22, 24]), kernel_size=3, stride=1, padding=0),   
            nn.Sequential(
                nn.AvgPool2d(kernel_size=3, stride=1, padding=0),
                nn.Conv2d(ValueChoice([16, 18, 20]), ValueChoice([20, 22, 24]), kernel_size=1)
            ),   
            nn.Sequential(
                nn.Conv2d(ValueChoice([16, 18, 20]), ValueChoice([20, 22, 24]), kernel_size=1),
                nn.AvgPool2d(kernel_size=3, stride=1, padding=0)
            )              
        ], label='layer_3')
        self.layers.append(layer3)
        
        layer4 = LayerChoice([
            SepConv(ValueChoice([20, 22, 24]), ValueChoice([28, 30, 32]), kernel_size=3, stride=1, padding=0),   
            nn.Sequential(
                nn.AvgPool2d(kernel_size=3, stride=1, padding=0),
                nn.Conv2d(ValueChoice([20, 22, 24]), ValueChoice([28, 30, 32]), kernel_size=1)
            ),   
            nn.Sequential(
                nn.Conv2d(ValueChoice([20, 22, 24]), ValueChoice([28, 30, 32]), kernel_size=1),
                nn.AvgPool2d(kernel_size=3, stride=1, padding=0)
            )             
        ], label='layer_4')
        self.layers.append(layer4)
        
        layer5 = LayerChoice([
            SepConv(ValueChoice([28, 30, 32]), 40, kernel_size=3, stride=1, padding=0),   
            nn.Sequential(
                nn.AvgPool2d(kernel_size=3, stride=1, padding=0),
                nn.Conv2d(ValueChoice([28, 30, 32]), 40, kernel_size=1)
            ),   
            nn.Sequential(
                nn.Conv2d(ValueChoice([28, 30, 32]), 40, kernel_size=1),
                nn.AvgPool2d(kernel_size=3, stride=1, padding=0)
            )               
        ], label='layer_5')
        self.layers.append(layer5)
        
        # Ensure the number of inputs to fc1 is close to but not exceeding 200
        self.pool = nn.AdaptiveAvgPool2d((2, 2))
        self.fc1 = nn.Linear(160, 96)  
        self.fc2 = nn.Linear(96, 64)  
        self.fc3 = nn.Linear(64, 32) 
        self.relu = nn.ReLU()
        self.classifier = nn.Linear(32, num_classes)

    def forward(self, x):
        # Kernel: 2
        # Initial shape: 32x32
        #if first_iter:print(f'Input shape: {x.shape}')
        x = self.preliminary_layer(x)
        # After preliminary layer: 31x31
        #if first_iter:print(f'After preliminary layer: {x.shape}')
        
        for i, layer in enumerate(self.layers):
            x = layer(x)
            #if first_iter:print(f'After layer {i+1}: {x.shape}')
            
            # Add a AvgPool2d layer after the third layer to reduce spatial dimensions
            if i == 2 or i == 4:
                x = nn.AvgPool2d(kernel_size=2, stride=2)(x)
                #if first_iter:print(f'After avg pooling: {x.shape}')
        
        # Add an adaptive pooling layer before flattening
        x = self.pool(x)
        #if first_iter:print(f'After adaptive pooling: {x.shape}')
        
        x = torch.flatten(x, 1)
        #if first_iter:print(f'After flattening: {x.shape}')
        
        x = self.fc1(x)
        x= self.relu(x)
        #if first_iter:print(f'After fc1: {x.shape}')
        x = self.fc2(x)
        x= self.relu(x)
        #if first_iter:print(f'After fc2: {x.shape}')
        x = self.fc3(x)
        x= self.relu(x)
        #if first_iter:print(f'After fc3: {x.shape}')
        
        x = self.classifier(x)
        #if first_iter:print(f'After classifier: {x.shape}')
        #self.first_iter = False
        return x

    def set_drop_path_prob(self, drop_path_prob):
        self.drop_path_prob = drop_path_prob
        for layer in self.layers:
            if hasattr(layer, 'set_drop_path_prob'):
                layer.set_drop_path_prob(drop_path_prob)



### Model kernel 3x3
avg pool dopo conv2d come scelta
aggiungere sepconv con sigmoid in scelte
aumentare numero conv dentro la sepconv
add model checkpoint

In [None]:
  SepConv(8, nni.choice([12, 14, 16], label='layer1_out'), kernel_size=3, stride=1, padding=1),

In [98]:

class CustomDARTSSpaceK3(ModelSpace):
    def __init__(self, input_channels, channels, num_classes, layers, verbose):
        super(CustomDARTSSpaceK3, self).__init__()
        #self.first_iter = True
        self.layers = nn.ModuleList()
        self.drop_path_prob = 0.0  
        self.preliminary_layer = nn.Conv2d(3, 8, kernel_size=3, padding=0, bias=False)
        self.verbose = verbose

 # Declare the choices using the correct format
        layer1_out = nni.choice('layer1_out', [12, 14, 16])
        layer2_in = nni.choice('layer2_in', [12, 14, 16])
        layer2_out = nni.choice('layer2_out', [16, 18, 20])
        layer3_in = nni.choice('layer3_in', [16, 18, 20])
        layer3_out = nni.choice('layer3_out', [20, 22, 24])
        layer4_in = nni.choice('layer4_in', [20, 22, 24])
        layer4_out = nni.choice('layer4_out', [28, 30, 32])
        layer5_in = nni.choice('layer5_in', [28, 30, 32])

        # Use the declared choices within LayerChoice
        layer1 = LayerChoice([
              SepConv(8, layer1_out, kernel_size=3, stride=1, padding=1),
            nn.Sequential(
                nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
                MutableConv2d(8, layer1_out, kernel_size=1),
                MutableBatchNorm2d(layer1_out),
                MutableReLU()
            ),   
            nn.Sequential(
                MutableConv2d(8, layer1_out, kernel_size=1),
                nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
                MutableBatchNorm2d(layer1_out),
                MutableReLU()
            )
        ], label='layer_1')
        self.layers.append(layer1)
        
        layer2 = LayerChoice([
            SepConv(layer2_in, layer2_out, kernel_size=3, stride=1, padding=1),
            nn.Sequential(
                nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
                MutableConv2d(layer2_in, layer2_out, kernel_size=1),
                MutableBatchNorm2d(layer2_out),
                MutableReLU()
            ),   
            nn.Sequential(
                MutableConv2d(layer2_in, layer2_out, kernel_size=1),
                nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
                MutableBatchNorm2d(layer2_out),
                MutableReLU()
            )
        ], label='layer_2')
        self.layers.append(layer2)
        
        layer3 = LayerChoice([
            SepConv(layer3_in, layer3_out, kernel_size=3, stride=1, padding=1),
            nn.Sequential(
                nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
                MutableConv2d(layer3_in, layer3_out, kernel_size=1),
                MutableBatchNorm2d(layer3_out),
                MutableReLU()
            ),   
            nn.Sequential(
                MutableConv2d(layer3_in, layer3_out, kernel_size=1),
                nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
                MutableBatchNorm2d(layer3_out),
                MutableReLU()
            )
        ], label='layer_3')
        self.layers.append(layer3)
        
        layer4 = LayerChoice([
            SepConv(layer4_in, layer4_out, kernel_size=3, stride=1, padding=1),
            nn.Sequential(
                nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
                MutableConv2d(layer4_in, layer4_out, kernel_size=1),
                MutableBatchNorm2d(layer4_out),
                MutableReLU()
            ),   
            nn.Sequential(
                MutableConv2d(layer4_in, layer4_out, kernel_size=1),
                nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
                MutableBatchNorm2d(layer4_out),
                MutableReLU()
            )
        ], label='layer_4')
        self.layers.append(layer4)
        
        layer5 = LayerChoice([
            SepConv(layer5_in, 48, kernel_size=3, stride=1, padding=1),
            nn.Sequential(
                nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
                MutableConv2d(layer5_in, 48, kernel_size=1),
                MutableBatchNorm2d(48),
                MutableReLU()
            ),   
            nn.Sequential(
                MutableConv2d(layer5_in, 48, kernel_size=1),
                nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
                MutableBatchNorm2d(48),
                MutableReLU()
            )
        ], label='layer_5')
        self.layers.append(layer5)
        
        # Ensure the number of inputs to fc1 is close to but not exceeding 200
        self.pool = nn.AdaptiveAvgPool2d((2, 2))
        self.fc1 = nn.Linear(192, 128) 
        self.fc2 = nn.Linear(128, 64) 
        self.fc3 = nn.Linear(64, 32)  
        self.relu = nn.ReLU()
        self.classifier = nn.Linear(32, num_classes)

    def forward(self, x):
        # Kernel: 2
        # Initial shape: 32x32
        #print(f'Input shape: {x.shape}')
        x = self.preliminary_layer(x)
        # After preliminary layer: 31x31
        if self.verbose == 1 :
            print(f'After preliminary layer: {x.shape}')
        
        for i, layer in enumerate(self.layers):
            x = layer(x)
            if self.verbose == 1 :
                print(f'After layer {i+1}: {x.shape}')
            
            # Add a AvgPool2d layer after the third layer to reduce spatial dimensions
            if i == 2 or i == 4:
                x = nn.AvgPool2d(kernel_size=2, stride=2)(x)
                if self.verbose == 1 :
                    print(f'After avg pooling: {x.shape}')
        

        x =  nn.AvgPool2d(kernel_size=2, stride =2)(x)
        if self.verbose == 1 :
            print(f'After adaptive pooling: {x.shape}')
        
        x = torch.flatten(x, 1)
        if self.verbose == 1 :
            print(f'After flattening: {x.shape}')
        
        x = self.fc1(x)
        x= self.relu(x)
        if self.verbose == 1 :
            print(f'After fc1: {x.shape}')
        x = self.fc2(x)
        x= self.relu(x)
        if self.verbose == 1 :
            print(f'After fc2: {x.shape}')
        x = self.fc3(x)
        x= self.relu(x)
        if self.verbose == 1 :
            print(f'After fc3: {x.shape}')
        
        x = self.classifier(x)
        if self.verbose == 1 :
            print(f'After classifier: {x.shape}')
        #self.first_iter = False
        return x

    def set_drop_path_prob(self, drop_path_prob):
        self.drop_path_prob = drop_path_prob
        for layer in self.layers:
            if hasattr(layer, 'set_drop_path_prob'):
                layer.set_drop_path_prob(drop_path_prob)


### Darts Search

In [99]:
def search(log_dir: str, batch_size: int = 64):
    """
    Darts search 

    Args:
        log_dir (str): The directory where logs will be saved.
        batch_size (int, optional): The size of the batches. Default is 64.  
    Returns:
        None
    """

    model_space =CustomDARTSSpaceK3(input_channels=3, channels=64, num_classes=10, layers=5)
    model_space.set_drop_path_prob(0.2)
    train_data = get_cifar10_dataset()
    num_samples = len(train_data)
    indices = np.random.permutation(num_samples)
    split = num_samples // 2

    train_loader = DataLoader(
        train_data, batch_size=batch_size,
        sampler=SubsetRandomSampler(indices[:split]),
        pin_memory=True, num_workers=6,persistent_workers=True
    )

    valid_loader = DataLoader(
        train_data, batch_size=batch_size,
        sampler=SubsetRandomSampler(indices[split:]),
        pin_memory=True, num_workers=6,persistent_workers=True
    )
    checkpoint_callback = ModelCheckpoint(
        monitor='val_accuracy',  
        dirpath='./checkpoints', 
        filename='best-checkpoint',  
        save_top_k=1,
        mode='max'  
    )


    evaluator = Lightning(
        AuxLossClassificationModule(1e-6, 3e-4, 0., 600),
        Trainer(
            accelerator="auto",
            callbacks=[checkpoint_callback],  
            max_epochs=600
        ),
        train_dataloaders=train_loader,
        val_dataloaders=valid_loader
    )

    strategy = DartsStrategy(gradient_clip_val=0.3)

    experiment = NasExperiment(model_space, evaluator, strategy)
    experiment.run()



## Model Training

In [100]:
%%script False
def train(arch: dict, log_dir: str, batch_size: int = 96, ckpt_path: str = None):
    """
    Train the model with the given architecture and parameters.

    Args:
        arch (dict): The architecture of the model.
        log_dir (str): The directory where logs will be saved.
        batch_size (int, optional): The size of the batches. Default is 96.
        ckpt_path (str, optional): The path to the checkpoint file. Default is None.
    
    Returns:
        None
    """
    with model_context(arch):
        model = DARTS(36, 20, 'cifar', auxiliary_loss=True, drop_path_prob=0.2)

    train_data = get_cifar10_dataset(cutout=True)
    valid_data = get_cifar10_dataset(train=False)

    fit_kwargs = {}
    if ckpt_path:
        fit_kwargs['ckpt_path'] = ckpt_path

    evaluator = Lightning(
        CustomClassificationModule(0.025, 3e-4, 0.4, 600),
        Trainer(
            
            accelerator="auto",
            max_epochs=20
        ),
        train_dataloaders=DataLoader(train_data, batch_size=batch_size, pin_memory=True, shuffle=True, num_workers=6),
        val_dataloaders=DataLoader(valid_data, batch_size=batch_size, pin_memory=True, num_workers=6),
        fit_kwargs=fit_kwargs
    )

    evaluator.fit(model)


Couldn't find program: 'False'


## Test

In [101]:
search("./",128)

Files already downloaded and verified


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


lr : 1e-06
weight decay: 0.0003
aux loss weight: 0.0
max epochs: 600
[2024-07-23 13:02:42] [32mConfig is not provided. Will try to infer.[0m
[2024-07-23 13:02:42] [32mStrategy is found to be a one-shot strategy. Setting execution engine to "sequential" and format to "raw".[0m
[2024-07-23 13:02:43] [32mCheckpoint saved to C:\Users\Giovanni\nni-experiments\wynh2dzu\checkpoint.[0m
[2024-07-23 13:02:43] [32mExperiment initialized successfully. Starting exploration strategy...[0m


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type                        | Params | Mode 
------------------------------------------------------------------------
0 | training_module | AuxLossClassificationModule | 91.0 K | train
------------------------------------------------------------------------
91.0 K    Trainable params
0         Non-trainable params
91.0 K    Total params
0.364     Total estimated model params size (MB)


Epoch 0:   0%|          | 0/196 [00:00<?, ?it/s] [2024-07-23 13:03:34] [31mERROR: Model 1 fails to be executed.[0m
[2024-07-23 13:03:34] [31mERROR: Strategy failed to execute.[0m


RuntimeError: mat1 and mat2 shapes cannot be multiplied (128x432 and 192x128)