# Neural architecture Search using One shot NAS

In [1]:
import pandas as pd
data = pd.read_csv('DATASETS/wine-quality.csv')
x = data.drop('quality_label', axis=1, inplace=False).values
print(x.shape[1])

13


In [2]:
from parameters import *

def vocab_dict():
    vocab = {}
    layer_id = 1

    for node in nodes:
        for activation in activations:
            vocab[layer_id] = {'type': 'hidden', 'nodes': node, 'activation': activation}
            layer_id += 1

    vocab[layer_id] = {'type': 'dropout'}
    layer_id += 1

    output_activation = 'Sigmoid' if target_classes == 2 else 'Softmax'
    vocab[layer_id] = {'type': 'output', 'nodes': target_classes, 'activation': output_activation}

    return vocab

encoded = vocab_dict()
print(encoded)
print('Size of vocabulary:',len(encoded))

{1: {'type': 'hidden', 'nodes': 8, 'activation': 'Sigmoid'}, 2: {'type': 'hidden', 'nodes': 8, 'activation': 'Tanh'}, 3: {'type': 'hidden', 'nodes': 8, 'activation': 'ReLU'}, 4: {'type': 'hidden', 'nodes': 16, 'activation': 'Sigmoid'}, 5: {'type': 'hidden', 'nodes': 16, 'activation': 'Tanh'}, 6: {'type': 'hidden', 'nodes': 16, 'activation': 'ReLU'}, 7: {'type': 'hidden', 'nodes': 32, 'activation': 'Sigmoid'}, 8: {'type': 'hidden', 'nodes': 32, 'activation': 'Tanh'}, 9: {'type': 'hidden', 'nodes': 32, 'activation': 'ReLU'}, 10: {'type': 'hidden', 'nodes': 64, 'activation': 'Sigmoid'}, 11: {'type': 'hidden', 'nodes': 64, 'activation': 'Tanh'}, 12: {'type': 'hidden', 'nodes': 64, 'activation': 'ReLU'}, 13: {'type': 'hidden', 'nodes': 128, 'activation': 'Sigmoid'}, 14: {'type': 'hidden', 'nodes': 128, 'activation': 'Tanh'}, 15: {'type': 'hidden', 'nodes': 128, 'activation': 'ReLU'}, 16: {'type': 'hidden', 'nodes': 256, 'activation': 'Sigmoid'}, 17: {'type': 'hidden', 'nodes': 256, 'activat

In [3]:
vocab_idx = list(encoded.keys())
print('vocab ids:',vocab_idx)

vocab ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]


In [4]:
import torch
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self,input_size, hidden_size , output_size ):
        super(LSTM,self).__init__()
        
        self.lstm = nn.LSTM(input_size, hidden_size ,batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=2) 
    
    def forward(self, x):
        lstm_out,_ = self.lstm(x)
        x = self.fc(lstm_out)
        x = self.softmax(x)
        return x

In [5]:
from torch.nn.utils.rnn import pad_sequence
seed = torch.zeros(max_len-1)
print(seed)
inputs = seed.reshape(1,1,9)
print(inputs.shape)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0.])
torch.Size([1, 1, 9])


In [6]:
seed=[]
sequence = pad_sequence([torch.tensor(seed)], batch_first=True, padding_value=0)
# Batch_size, sequence_length, feature_dimension
sequence = sequence[:, :max_len-1, :]  # Adjust the dimensions
sequence = sequence.unsqueeze(1)
print(sequence)

IndexError: too many indices for tensor of dimension 2

In [7]:
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence

model = LSTM(input_size=max_len-1,hidden_size=100,output_size = len(vocab_idx))
loss_function = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(),lr=0.005)

with torch.no_grad():
    output = model(inputs) 
    print(output.shape)
    print(output)

torch.Size([1, 1, 23])
tensor([[[0.0403, 0.0453, 0.0467, 0.0464, 0.0397, 0.0424, 0.0458, 0.0421,
          0.0412, 0.0469, 0.0439, 0.0469, 0.0434, 0.0417, 0.0454, 0.0452,
          0.0438, 0.0415, 0.0404, 0.0400, 0.0396, 0.0467, 0.0446]]])


In [8]:
print(output[0][0])
print(torch.sum(output[0][0]))

tensor([0.0403, 0.0453, 0.0467, 0.0464, 0.0397, 0.0424, 0.0458, 0.0421, 0.0412,
        0.0469, 0.0439, 0.0469, 0.0434, 0.0417, 0.0454, 0.0452, 0.0438, 0.0415,
        0.0404, 0.0400, 0.0396, 0.0467, 0.0446])
tensor(1.0000)


In [9]:
# next = np.random.choice(vocab_idx,p=output[0][0])


In [10]:
import numpy as np
# probab = output[0][0]
# probab = probab / torch.sum(probab)
# probab = probab.squeeze().tolist()
next = np.random.choice(vocab_idx,p=output[0][0])

print(next)

ValueError: probabilities do not sum to 1

In [11]:
print('length of vocab:',len(vocab_idx))
print('Length of vocab:',vocab_idx[-1])
print('Final elemenet : ', vocab_idx[-1])

length of vocab: 23
Length of vocab: 23
Final elemenet :  23


In [12]:
seed = []

while len(seed) < max_len-1:
    # print(seed)
    sequence = torch.zeros(max_len-1)
    sequence = sequence.reshape(1,1,max_len-1)
    # print(sequence.shape)
    
    with torch.no_grad():
        probab = model(sequence)
    
    probab = np.array(probab)
    
    # print(probab[0][0])
    # print(probab[0][0].sum())    
    
    normalized_probab = probab / probab.sum()
    next = np.random.choice(vocab_idx,p=normalized_probab[0][0])
    if next == len(vocab_idx):
        break
    seed.append(next)

print('Length of vocab before adding final layer:',len(seed))
print('Final layer id:',len(vocab_idx))
seed.append(len(vocab_idx))
print('Final architecture: ',seed)

Length of vocab before adding final layer: 9
Final layer id: 23
Final architecture:  [14, 2, 20, 14, 3, 20, 1, 15, 15, 23]


In [13]:
# print(seed)

In [14]:
# from parameters import *
# 
# import torch
# from torch.nn.utils.rnn import pad_sequence
# seed = []
# seed = torch.zeros(max_len-1)
# sequence = seed.reshape(1,1,max_len-1)
# print(sequence.shape)

In [13]:
## Next step is to convert the encoded architecture back to form where we understand what each layer mean

def decode_architecture(sequence):
    seed = []
    for _ in sequence:
        original_param = encoded[_]
        seed.append(original_param)
    
    # print(original_param)
    return seed

decoded = decode_architecture(seed)
print(decoded)

[{'type': 'hidden', 'nodes': 128, 'activation': 'Tanh'}, {'type': 'hidden', 'nodes': 8, 'activation': 'Tanh'}, {'type': 'hidden', 'nodes': 512, 'activation': 'Tanh'}, {'type': 'hidden', 'nodes': 128, 'activation': 'Tanh'}, {'type': 'hidden', 'nodes': 8, 'activation': 'ReLU'}, {'type': 'hidden', 'nodes': 512, 'activation': 'Tanh'}, {'type': 'hidden', 'nodes': 8, 'activation': 'Sigmoid'}, {'type': 'hidden', 'nodes': 128, 'activation': 'ReLU'}, {'type': 'hidden', 'nodes': 128, 'activation': 'ReLU'}, {'type': 'output', 'nodes': 3, 'activation': 'Softmax'}]


In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict
from torchviz import make_dot

class CustomModel(nn.Module):
    def __init__(self, mlp_input_shape, mlp_dropout,sequence):
        super(CustomModel, self).__init__()
        self.mlp_input_shape = mlp_input_shape
        self.mlp_dropout = mlp_dropout

        # Initialize layers in __init__ method
        self.layers = self.initialize_layers(sequence)

    def initialize_layers(self,sequence):
        layer_configs = decode_architecture(sequence)  # Assuming you have a method to decode the sequence
        layers = []
        prev_nodes = []
        
        if len(self.mlp_input_shape) > 1:
            layers.append(nn.Flatten())

        for i, layer_conf in enumerate(layer_configs):
            # print(i,layer_conf)
            if layer_conf['type'] == 'dropout':
                layers.append(('dropout{}'.format(i), nn.Dropout(self.mlp_dropout)))
            else:
                activation = getattr(nn, layer_conf['activation'])() if layer_conf['activation'] else None
                # print(activation)
                
                if i == 0:
                    linear_layer = nn.Linear(self.mlp_input_shape[0],layer_conf['nodes'])
                    prev_nodes = layer_conf['nodes']
                else:
                    # print(self.mlp_input_shape.shape())
                    linear_layer = nn.Linear(prev_nodes, layer_conf['nodes'])
                    prev_nodes = layer_conf['nodes']
                    
                layers.append(('linear{}'.format(i), linear_layer))
                if activation:
                    layers.append(('activation{}'.format(i), activation))
        # print(layers)
        return nn.Sequential(OrderedDict(layers))

    def forward(self, x):
        return self.layers(x)

# Example usage:
mlp_input_shape = (10,)  # Example input shape
mlp_dropout = 0.5  # Example dropout rate

# Create an instance of the model

input_tensor = torch.randn((0,) + mlp_input_shape)
# print(input_tensor.shape)
custom_model = CustomModel(mlp_input_shape, mlp_dropout,seed)

# Create a random input tensor for testing

# Forward pass
output_tensor = custom_model(input_tensor)

# Print the model architecture
print(custom_model)


CustomModel(
  (layers): Sequential(
    (linear0): Linear(in_features=10, out_features=128, bias=True)
    (activation0): Tanh()
    (linear1): Linear(in_features=128, out_features=8, bias=True)
    (activation1): Tanh()
    (linear2): Linear(in_features=8, out_features=512, bias=True)
    (activation2): Tanh()
    (linear3): Linear(in_features=512, out_features=128, bias=True)
    (activation3): Tanh()
    (linear4): Linear(in_features=128, out_features=8, bias=True)
    (activation4): ReLU()
    (linear5): Linear(in_features=8, out_features=512, bias=True)
    (activation5): Tanh()
    (linear6): Linear(in_features=512, out_features=8, bias=True)
    (activation6): Sigmoid()
    (linear7): Linear(in_features=8, out_features=128, bias=True)
    (activation7): ReLU()
    (linear8): Linear(in_features=128, out_features=128, bias=True)
    (activation8): ReLU()
    (linear9): Linear(in_features=128, out_features=3, bias=True)
    (activation9): Softmax(dim=None)
  )
)


  return self._call_impl(*args, **kwargs)


In [57]:
# make_dot(output, params=dict(custom_model.named_parameters()))

In [57]:
## Train the model
def train_model (model, x,y):
    history = model.fit(x,y)
    return history
