In [36]:
import torch
import torch.nn as nn
import torch.utils.data as data
from torchvision.transforms import Normalize
from ax import optimize
from ax.utils.notebook.plotting import render, init_notebook_plotting
import torch.nn as nn
import logging



#     _, predicted = torch.max(outputs.data, 1)
#     total += labels.size(0)
#     correct += (predicted == labels).sum().item()
# accuracy = correct / total  
# print(f"ACC for epoch {epoch}: ", accuracy)

class CNNTrainer:
    def __init__(self, train_data, val_data=None, batch_size=64, model=None, patience=5):
        self.patience = patience
        self.model = model
        # Load the training data
        self.train_loader = data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=8)
        if val_data: 
            self.val_loader = data.DataLoader(train_data, batch_size=batch_size)
            
    def compute_input_size(self, params):
        # Add BatchNorm2d layer to standardize input data
        x = nn.BatchNorm2d(3)(torch.zeros([1,3,32,32]))
        pool1 = eval(params.get("pool1"))
        pool2 = eval(params.get("pool2"))        
        # Pass the input through the sequence of layers
        for layer in [
                nn.Conv2d(in_channels=3, 
                          out_channels=params.get('num_filters1'), 
                          kernel_size=params.get('filter_size1')),
                nn.ReLU(),
                pool1(kernel_size=params.get("kernel_pool1")),
                nn.Conv2d(in_channels=params.get('num_filters1'),
                          out_channels=params.get('num_filters2'),
                          kernel_size=params.get('filter_size2')),
                nn.ReLU(),
                pool2(kernel_size=params.get("kernel_pool2")),
                nn.Conv2d(in_channels=params.get('num_filters2'),
                          out_channels=params.get('num_filters3'),
                          kernel_size=params.get('filter_size3')),
                nn.ReLU(),
                nn.Sequential(
                    nn.Conv2d(params.get('num_filters3'),
                              params.get('num_filters3'), 
                              kernel_size=1),
                    nn.Sigmoid()),            
                nn.Flatten()]:
            x = layer(x)
        input_size = x.shape[0] * x.shape[1]
        return input_size

    def build_model(self, params):
        linear_input = self.compute_input_size(params) 
        # Define the CNN architecture based on the given parameters
        pool1 = eval(params.get("pool1"))
        pool2 = eval(params.get("pool2"))
        print("linear_input", linear_input)
        model = nn.Sequential(
            nn.BatchNorm2d(3),  # Add BatchNorm2d layer to standardize input data,
            nn.Conv2d(in_channels=3, 
                      out_channels=params.get('num_filters1'), 
                      kernel_size=params.get('filter_size1')),
            nn.ReLU(),
            pool1(kernel_size=params.get("kernel_pool1")),
            nn.Conv2d(in_channels=params.get('num_filters1'),
                      out_channels=params.get('num_filters2'),
                      kernel_size=params.get('filter_size2')),
            nn.ReLU(),
            pool2(kernel_size=params.get("kernel_pool2")),
            nn.Conv2d(in_channels=params.get('num_filters2'),
                      out_channels=params.get('num_filters3'),
                      kernel_size=params.get('filter_size3')),
            nn.ReLU(),
           # Define the attention mechanism
            nn.Sequential(
                nn.Conv2d(params.get('num_filters3'),
                          params.get('num_filters3'), 
                          kernel_size=1),
                nn.Sigmoid()),
            nn.Flatten(),
            nn.Linear(linear_input, 10)
        )
        return model
        
        
    def fit(self, model, lr=0.001, epochs=1):
        # Initialize weights with Xavier initialization
                
        for m in model.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)      
                
        # Define the loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
        
        # Set the number of threads for multi-threading
        torch.set_num_threads(8)
        
        # Initialize some variables for early stopping
        best_loss = float('inf')
        counter = 0  # Counter for number of epochs without improvement
        
        # Train the model
        model.train()
        for epoch in range(epochs):
            print("Running epoch ", epoch)
            total, correct = 0,0
            for images, labels in self.train_loader:
                images, labels = images.to(device='cpu'), labels.to(device='cpu')
                # Forward pass
                outputs = model(images)
                loss = criterion(outputs, labels)
                # Backward and optimize
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
            # Validate your model
            model.eval()
            with torch.no_grad():
                val_loss = 0
                for x, y in self.val_loader:
                    y_pred = model(x)
                    val_loss += criterion(y_pred, y).item()
                val_loss /= len(self.val_loader)
            print('Val loss: ', val_loss)
            # Check for improvement
            if val_loss < best_loss:
                best_loss = val_loss
                counter = 0
            else:
                counter += 1
                if counter >= self.patience:
                    print(f"Early stopping on epoch {epoch}")
                    break
        return model
        
class CNNPredictor:
    def __init__(self, model):
        self.model = model
        
    def predict(self, test_data):
        # Evaluate the model on the validation set
        data_loader = data.DataLoader(test_data, batch_size=64)
        
        # Calculate validation accuracy
        correct, total  = 0, 0
        self.model.eval()
        with torch.no_grad():
            for images, labels in data_loader:
                outputs = self.model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = correct / total
        return labels, accuracy


class CNNOptimizer:
    def __init__(self, search_space, train_data, val_data, steps=20, epochs=1):
        self.epochs = epochs
        self.steps = steps
        self.search_space = search_space
        self.train_data = train_data
        self.val_data = val_data

    def evaluate_model(self, parameterization):
        batch_size = parameterization.get("batch_size")
        lr = parameterization.get("lr")
        
        try:
            print("Testing config", parameterization)
            trainer = CNNTrainer(self.train_data, batch_size=batch_size)
            model = trainer.build_model(parameterization) 
        except Exception as e:
            logging.error(e)
            return {'acc': 0} 
        
        print("CONFIG Valida")
        model = trainer.fit(model, lr=lr, epochs=self.epochs)
        predictor = CNNPredictor(model)
        _, accuracy = predictor.predict(self.val_data)
        print("ACC during eval", accuracy)
        # Return the validation accuracy as the objective value to optimize
        return {'acc': accuracy}

    def optimize(self):
        
        constraints = ["num_filters1 <= num_filters2",    
                       "num_filters2 <= num_filters3",   
                       "filter_size1 >= filter_size2",   
                       "filter_size2 >= filter_size3"  
                      ]    

        best_parameters, best_values, experiment, model = optimize(
            parameters=self.search_space,
            evaluation_function=self.evaluate_model,
            parameter_constraints=constraints,
            objective_name='acc',
            minimize=False,
            total_trials=self.steps
        )

        print('Best parameters:', best_parameters)
        print('Best validation accuracy:', best_values[0])
        
        return best_parameters, best_values, experiment, model

In [38]:
import numpy as np
import torchvision.datasets as datasets
import torchvision.transforms as transforms

# Load the CIFAR10 dataset
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor())

# Define the search space

def split_train_val(data, ptrain=0.3, pval=0.2):
    train_mask = np.random.rand(len(data)) <= ptrain
    val_mask = np.random.rand(len(data)) >= (1-pval)
    index = np.array(range(len(data)))
    train_id, test_id = index[train_mask], index[val_mask]
    train = torch.utils.data.Subset(data, train_id)
    test = torch.utils.data.Subset(data, test_id)
    return train, test

train, val = split_train_val(train_dataset)

search_space = [
    {"name": "num_filters1", "type": "range", "bounds": [12, 32], "value_type":"int"},
    {"name": "filter_size1", "type": "range", "bounds": [3, 5], "value_type":"int"},
    {"name": "num_filters2", "type": "range", "bounds": [12, 32], "value_type":"int"},
    {"name": "filter_size2", "type": "range", "bounds": [3, 5], "value_type":"int"},
    {"name": "num_filters3", "type": "range", "bounds": [12, 32], "value_type":"int"},
    {"name": "filter_size3", "type": "range", "bounds": [3, 5], "value_type":"int"},
    {"name": "pool1", "type": "choice", "is_ordered": False,
     "values": ["nn.AvgPool2d", "nn.MaxPool2d"]},
    {"name": "pool2", "type": "choice", "is_ordered": False, 
     "values": ["nn.AvgPool2d", "nn.MaxPool2d"]},
    {"name": "kernel_pool1", "type": "range", "bounds": [2, 3], "value_type":"int"},
    {"name": "kernel_pool2", "type": "range", "bounds": [2, 3], "value_type":"int"},
    {"name": "lr", "type": "range", "bounds": [1e-5,1e-2], "value_type":"float"},
    {"name": "batch_size", "type": "range", "bounds": [16, 128], "value_type":"int"},    
]

# Initialize the CNNOptimizer
optimizer = CNNOptimizer(search_space, train, val, steps=120, epochs=2)

# Run the optimization
best_parameters, best_values, experiment, model = optimizer.optimize()


In [61]:
best_parameters, best_values

({'num_filters1': 16,
  'filter_size1': 5,
  'num_filters2': 23,
  'filter_size2': 5,
  'num_filters3': 28,
  'filter_size3': 4,
  'kernel_pool1': 2,
  'kernel_pool2': 2,
  'lr': 0.0051422473539197155,
  'batch_size': 73,
  'pool1': 'nn.AvgPool2d',
  'pool2': 'nn.AvgPool2d'},
 ({'acc': 0.44478208681781845}, {'acc': {'acc': 0.00010064481711331273}}))

In [66]:
data = experiment.fetch_data()
data.df.head()

Unnamed: 0,arm_name,metric_name,mean,sem,trial_index
0,0_0,acc,0.429545,,0
1,1_0,acc,0.42253,,1
2,2_0,acc,0.371016,,2
3,3_0,acc,0.438765,,3
4,4_0,acc,0.0,,4


In [34]:
x.shape

torch.Size([64, 10])

In [35]:
x= torch.zeros([1,3,32,32])

for m in model.modules():
    print(m)
    x = m(x)
    print(x)

Sequential(
  (0): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1))
  (2): ReLU()
  (3): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (4): Conv2d(16, 23, kernel_size=(5, 5), stride=(1, 1))
  (5): ReLU()
  (6): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (7): Conv2d(23, 28, kernel_size=(4, 4), stride=(1, 1))
  (8): ReLU()
  (9): Sequential(
    (0): Conv2d(28, 28, kernel_size=(1, 1), stride=(1, 1))
    (1): Sigmoid()
  )
  (10): Flatten(start_dim=1, end_dim=-1)
  (11): Linear(in_features=112, out_features=10, bias=True)
)
tensor([[ 0.8756, -5.5353,  1.8794,  0.1591,  0.2256, -0.5847, -5.5011, -1.3562,
         -1.4498, -5.1926]], grad_fn=<AddmmBackward0>)
BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)


ValueError: expected 4D input (got 2D input)

In [27]:
train, val = split_train_val(train_dataset, ptrain=0.7, pval=0.3)

best_parameters= {'num_filters1': 16,
  'filter_size1': 5,
  'num_filters2': 23,
  'filter_size2': 5,
  'num_filters3': 28,
  'filter_size3': 4,
  'kernel_pool1': 2,
  'kernel_pool2': 2,
  'lr': 0.01, # 0.0051422473539197155,
  'batch_size': 73,
  'pool1': 'nn.AvgPool2d',
  'pool2': 'nn.AvgPool2d'}

trainer = CNNTrainer(train, val)
model = trainer.build_model(best_parameters) 
model = trainer.fit(model, epochs=150)


linear_input 112
Running epoch  0
Val loss:  1.5491171064063007
Running epoch  1
Val loss:  1.370190205914011
Running epoch  2
Val loss:  1.294030192456254
Running epoch  3
Val loss:  1.217041905336014
Running epoch  4
Val loss:  1.1468653453333504
Running epoch  5
Val loss:  1.1017187319464397
Running epoch  6
Val loss:  1.089891591281298
Running epoch  7
Val loss:  1.0189920145153346
Running epoch  8
Val loss:  0.992445466941192
Running epoch  9
Val loss:  0.9562430723929536
Running epoch  10
Val loss:  0.9406902184216153
Running epoch  11


Exception ignored in: <function _releaseLock at 0x7fc3b2d89160>
Traceback (most recent call last):
  File "/home/isac/miniconda3/envs/mariner/lib/python3.8/logging/__init__.py", line 227, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 


RuntimeError: DataLoader worker (pid(s) 330532, 330540) exited unexpectedly

In [21]:
predictor = CNNPredictor(model)
labels, accuracy = predictor.predict(val)
accuracy

0.7790674134556987

In [240]:
# unique, counts = np.unique(labels, return_counts=True)

In [82]:
model.parameters()

<generator object Module.parameters at 0x7fcd4b953f20>

In [83]:
num_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters in the model: {num_params}")

Number of parameters in the model: 21907
