In [None]:
import numpy as np
import torch
import torch.nn as nn


class Agent(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_steps=4, device=''):
        super(Agent, self).__init__()

        # Could add an embedding layer
        # embedding_size = 100
        # self.embedding = nn.Embedding(input_size, embedding_size)
        # dropout layer
        #self.drop = nn.Dropout(dropout)
        self.DEVICE = device
        self.num_filter_option = 3
        self.filter_size_option = 3

        self.lstm1 = nn.LSTMCell(input_size, hidden_size)
        # May be could just use different decoder if these two numbers are the same, not sure
        self.decoder = nn.Linear(hidden_size, self.num_filter_option)
        #self.decoder2 = nn.Linear(hidden_size, self.filter_size_option)

        # num_steps = max_layer * 2 # two conv layer * 2 h-parameters (kernel size and number of kernels)
        self.num_steps = num_steps
        self.nhid = hidden_size
        self.hidden = self.init_hidden()

    def forward(self, input):
        outputs = []
        h_t, c_t = self.hidden

        for i in range(self.num_steps):
            # input_data = self.embedding(step_data)
            h_t, c_t = self.lstm1(input, (h_t, c_t))
            # Add drop out
            # h_t = self.drop(h_t)
            output = self.decoder(h_t)
            input = output
            outputs += [output]

        outputs = torch.stack(outputs).squeeze(1)

        return outputs

    def init_hidden(self):
        h_t = torch.zeros(1, self.nhid, dtype=torch.float, device=self.DEVICE)
        c_t = torch.zeros(1, self.nhid, dtype=torch.float, device=self.DEVICE)

        return (h_t, c_t)

In [None]:
# generic model design

import torch.nn as nn
import torch.nn.functional as F


class NASModel(nn.Module):
    def __init__(self, actions):
        super(NASModel, self).__init__()
        # unpack the actions from the list
        self.kernel_1, self.filters_1, self.kernel_2, self.filters_2 = actions.tolist()
        # input size 3 * 32 * 32, use default stride=1 and padding=0
        # w and h could be calculated using the below equation
        # w = (w - filter_size + 2*p)/stride + 1 = w - filter_size + 1
        # thus if use 2*2 max pooling, (w - filter_size + 1) % 2 = 0
        # filter size could be in [3, 5, 7], also, we limit filter numbers to be [8, 16, 32]
        self.conv1 = nn.Conv2d(3, self.filters_1, self.kernel_1)
        # input filters_1 * (33 - kernel_1) * (33 - kernel_1)
        self.pool = nn.MaxPool2d(2, 2)
        # input filters_1 * ((33 - kernel_1) / 2) * ((33 - kernel_1) / 2)
        self.conv2 = nn.Conv2d(self.filters_1, self.filters_2, self.kernel_2)
        # input filters_2 * ((33 - kernel_1) / 2 - kernel_2 + 1) * ((33 - kernel_1) / 2 - kernel_2 + 1)


        self.tmp = int(self.filters_2 * ((33 - self.kernel_1) / 2 - self.kernel_2 + 1) *
                             ((33 - self.kernel_1) / 2 - self.kernel_2 + 1))
        self.fc1 = nn.Linear(self.tmp, 84)
        self.fc2 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = F.relu(self.conv2(x))
        x = x.view(-1, self.tmp)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


#
# class Net(nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(3, 6, 5)
#         self.pool = nn.MaxPool2d(2, 2)
#         self.conv2 = nn.Conv2d(6, 16, 5)
#         self.fc1 = nn.Linear(16 * 5 * 5, 120)
#         self.fc2 = nn.Linear(120, 84)
#         self.fc3 = nn.Linear(84, 10)
#
#     def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
#         x = x.view(-1, 16 * 5 * 5)
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
#         x = self.fc3(x)
#         return x

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

from torch.nn.functional import one_hot, log_softmax, softmax, normalize
from torch.distributions import Categorical
from torch.utils.tensorboard import SummaryWriter
#from controller import  Agent
from collections import deque
#from model import  NASModel
from tensorflow.keras.utils import plot_model

class PolicyGradient:
    def __init__(self, config, train_set, test_set, use_cuda=False):

        self.NUM_EPOCHS = config.NUM_EPOCHS
        self.ALPHA = config.ALPHA
        self.BATCH_SIZE = config.BATCH_SIZE # number of models to generate for each action
        self.HIDDEN_SIZE = config.HIDDEN_SIZE
        self.BETA = config.BETA
        self.GAMMA = config.GAMMA
        self.DEVICE = torch.device('cuda' if torch.cuda.is_available() and use_cuda else 'cpu')
        self.INPUT_SIZE = config.INPUT_SIZE
        self.NUM_STEPS = config.NUM_STEPS
        self.ACTION_SPACE = config.ACTION_SPACE

        self.train = train_set
        self.test = test_set

        # instantiate the tensorboard writer
        self.writer = SummaryWriter(comment=f'_PG_CP_Gamma={self.GAMMA},'
                                            f'LR={self.ALPHA},'
                                            f'BS={self.BATCH_SIZE},'
                                            f'NH={self.HIDDEN_SIZE},'
                                            f'BETA={self.BETA}')

        # the agent driven by a neural network architecture
        if use_cuda:
            self.agent = Agent(self.INPUT_SIZE, self.HIDDEN_SIZE, self.NUM_STEPS, device=self.DEVICE).cuda()
        else:
            self.agent = Agent(self.INPUT_SIZE, self.HIDDEN_SIZE, self.NUM_STEPS, device=self.DEVICE)
        self.adam = optim.Adam(params=self.agent.parameters(), lr=self.ALPHA)
        self.total_rewards = deque([], maxlen=100)


    def solve_environment(self):
        """
            The main interface for the Policy Gradient solver
        """
        # init the episode and the epoch
        epoch = 0

        while epoch < self.NUM_EPOCHS:
            # init the epoch arrays
            # used for entropy calculation
            epoch_logits = torch.empty(size=(0, self.ACTION_SPACE), device=self.DEVICE)
            epoch_weighted_log_probs = torch.empty(size=(0,), dtype=torch.float, device=self.DEVICE)

            # Sample BATCH_SIZE models and do average
            for i in range(self.BATCH_SIZE):
                # play an episode of the environment
                (episode_weighted_log_prob_trajectory,
                 episode_logits,
                 sum_of_episode_rewards) = self.play_episode()

                # after each episode append the sum of total rewards to the deque
                self.total_rewards.append(sum_of_episode_rewards)

                # append the weighted log-probabilities of actions
                epoch_weighted_log_probs = torch.cat((epoch_weighted_log_probs, episode_weighted_log_prob_trajectory),
                                                     dim=0)
                # append the logits - needed for the entropy bonus calculation
                epoch_logits = torch.cat((epoch_logits, episode_logits), dim=0)

            # calculate the loss
            loss, entropy = self.calculate_loss(epoch_logits=epoch_logits,
                                                weighted_log_probs=epoch_weighted_log_probs)

            # zero the gradient
            self.adam.zero_grad()

            # backprop
            loss.backward()

            # update the parameters
            self.adam.step()

            # feedback
            print("\r", f"Epoch: {epoch}, Avg Return per Epoch: {np.mean(self.total_rewards):.3f}",
                  end="",
                  flush=True)

            self.writer.add_scalar(tag='Average Return over 100 episodes',
                                   scalar_value=np.mean(self.total_rewards),
                                   global_step=epoch)

            self.writer.add_scalar(tag='Entropy',
                                   scalar_value=entropy,
                                   global_step=epoch)
            # check if solved
            # if np.mean(self.total_rewards) > 200:
            #     print('\nSolved!')
            #     break
            epoch += 1
        # close the writer
        self.writer.close()

    def play_episode(self):
        """
            Plays an episode of the environment.
            episode: the episode counter
            Returns:
                sum_weighted_log_probs: the sum of the log-prob of an action multiplied by the reward-to-go from that state
                episode_logits: the logits of every step of the episode - needed to compute entropy for entropy bonus
                finished_rendering_this_epoch: pass-through rendering flag
                sum_of_rewards: sum of the rewards for the episode - needed for the average over 200 episode statistic
        """
        # Init state
        init_state = [[3, 8, 16]]

        # get the action logits from the agent - (preferences)
        episode_logits = self.agent(torch.tensor(init_state).float().to(self.DEVICE))

        # sample an action according to the action distribution
        action_index = Categorical(logits=episode_logits).sample().unsqueeze(1)

        mask = one_hot(action_index, num_classes=self.ACTION_SPACE)

        episode_log_probs = torch.sum(mask.float() * log_softmax(episode_logits, dim=1), dim=1)

        # append the action to the episode action list to obtain the trajectory
        # we need to store the actions and logits so we could calculate the gradient of the performance
        #episode_actions = torch.cat((episode_actions, action_index), dim=0)

        # Get action actions
        action_space = torch.tensor([[3, 5, 7], [8, 16, 32], [3, 5, 7], [8, 16, 32]], device=self.DEVICE)
        action = torch.gather(action_space, 1, action_index).squeeze(1)
        # generate a submodel given predicted actions
        net = NASModel(action)
        print(net)
        #net = Net()

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

        for epoch in range(2):  # loop over the dataset multiple times

            running_loss = 0.0
            for i, data in enumerate(self.train, 0):
                # get the inputs; data is a list of [inputs, labels]
                inputs, labels = data

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                # print statistics
                running_loss += loss.item()
                if i % 2000 == 1999:  # print every 2000 mini-batches
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 2000))
                    running_loss = 0.0

        print('Finished Training')

        # load best performance epoch in this training session
        # model.load_weights('weights/temp_network.h5')

        # evaluate the model
        correct = 0
        total = 0
        with torch.no_grad():
            for data in self.test:
                images, labels = data
                outputs = net(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        acc = 100 * correct / total
        print('Accuracy of the network on the 10000 test images: {}'.format(acc))
        torch.save(net, 'child_model.pth')

        # compute the reward
        reward = acc

        episode_weighted_log_probs = episode_log_probs * reward
        sum_weighted_log_probs = torch.sum(episode_weighted_log_probs).unsqueeze(dim=0)

        return  sum_weighted_log_probs, episode_logits, reward

    def calculate_loss(self, epoch_logits: torch.Tensor, weighted_log_probs: torch.Tensor) -> (torch.Tensor, torch.Tensor):
        """
            Calculates the policy "loss" and the entropy bonus
            Args:
                epoch_logits: logits of the policy network we have collected over the epoch
                weighted_log_probs: loP * W of the actions taken
            Returns:
                policy loss + the entropy bonus
                entropy: needed for logging
        """
        policy_loss = -1 * torch.mean(weighted_log_probs)

        # add the entropy bonus
        p = softmax(epoch_logits, dim=1)
        log_p = log_softmax(epoch_logits, dim=1)
        entropy = -1 * torch.mean(torch.sum(p * log_p, dim=1), dim=0)
        entropy_bonus = -1 * self.BETA * entropy

        return policy_loss + entropy_bonus, entropy

In [None]:
import argparse

import torch
import torchvision
import torchvision.transforms as transforms
import sys
#from policy_gradient import PolicyGradient

parser = argparse.ArgumentParser()
parser.add_argument('--use_cuda', help='use GPU')


class Params:
    NUM_EPOCHS = 2
    ALPHA = 5e-3        # learning rate
    BATCH_SIZE = 3     # how many episodes we want to pack into an epoch
    HIDDEN_SIZE = 64    # number of hidden nodes we have in our dnn
    BETA = 0.1          # the entropy bonus multiplier
    INPUT_SIZE = 3
    ACTION_SPACE = 3
    NUM_STEPS = 4
    GAMMA = 0.99


def main():
    # Use sys.argv[1:] to exclude the first element (script name) and Jupyter-specific arguments
    # args = parser.parse_args(sys.argv[1:])

    # use_cuda = args.use_cuda
    use_cuda = True
    # args = parser.parse_args()
    # use_cuda = args.use_cuda
    # use_cuda = True

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                              shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                           download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                             shuffle=False, num_workers=2)


    policy_gradient = PolicyGradient(config=Params, train_set=trainloader, test_set=testloader, use_cuda=use_cuda)
    policy_gradient.solve_environment()


if __name__ == "__main__":
    main()

Files already downloaded and verified
Files already downloaded and verified
NASModel(
  (conv1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=3200, out_features=84, bias=True)
  (fc2): Linear(in_features=84, out_features=10, bias=True)
)
[1,  2000] loss: 1.901
[1,  4000] loss: 1.612
[1,  6000] loss: 1.465
[1,  8000] loss: 1.404
[1, 10000] loss: 1.348
[1, 12000] loss: 1.314
[2,  2000] loss: 1.219
[2,  4000] loss: 1.183
[2,  6000] loss: 1.173
[2,  8000] loss: 1.135
[2, 10000] loss: 1.109
[2, 12000] loss: 1.103
Finished Training
Accuracy of the network on the 10000 test images: 58.89
NASModel(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
loaded_model = torch.load('child_model.pth')
loaded_model = loaded_model.to(device)

transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

testset = torchvision.datasets.CIFAR10(root='./data', train=False,download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,shuffle=False, num_workers=2)

with torch.no_grad():

    for data in testloader:
        images, labels = data
        loaded_model.eval()

        # Move images to the same device as the model
        images = images.to(device)  # device is the device your model is on

        predictions = loaded_model(images)
        print(predictions)
        break


Files already downloaded and verified
tensor([[ 0.5383, -1.9863,  1.7260,  2.2973, -0.1701,  2.8148,  0.6227, -0.5442,
         -0.8852, -1.9654],
        [ 5.0698,  8.5521, -3.7695, -2.3170, -3.6925, -5.2821, -7.5790, -5.2462,
          9.7848,  4.4661],
        [ 2.7481,  3.5586, -1.7064, -1.4353, -1.6955, -2.3632, -3.9211, -2.2712,
          4.5197,  2.3213],
        [ 4.4624,  1.9231, -0.3616, -0.3905, -0.7897, -3.1313, -2.9989, -3.7576,
          4.6670,  0.8637]], device='cuda:0')


In [None]:
import argparse

import torch
import torchvision
import torchvision.transforms as transforms
import sys
#from policy_gradient import PolicyGradient

parser = argparse.ArgumentParser()
parser.add_argument('--use_cuda', help='use GPU')


class Params:
    NUM_EPOCHS = 5
    ALPHA = 5e-3        # learning rate
    BATCH_SIZE = 3     # how many episodes we want to pack into an epoch
    HIDDEN_SIZE = 64    # number of hidden nodes we have in our dnn
    BETA = 0.1          # the entropy bonus multiplier
    INPUT_SIZE = 3
    ACTION_SPACE = 3
    NUM_STEPS = 4
    GAMMA = 0.99


def main():
    # Use sys.argv[1:] to exclude the first element (script name) and Jupyter-specific arguments
    # args = parser.parse_args(sys.argv[1:])

    # use_cuda = args.use_cuda
    use_cuda = True
    # args = parser.parse_args()
    # use_cuda = args.use_cuda
    # use_cuda = True

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                              shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                           download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                             shuffle=False, num_workers=2)


    policy_gradient = PolicyGradient(config=Params, train_set=trainloader, test_set=testloader, use_cuda=use_cuda)
    policy_gradient.solve_environment()


if __name__ == "__main__":
    main()

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:12<00:00, 13152239.87it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
[1,  2000] loss: 1.950
[1,  4000] loss: 1.612
[1,  6000] loss: 1.505
[1,  8000] loss: 1.405
[1, 10000] loss: 1.352
[1, 12000] loss: 1.300
[2,  2000] loss: 1.230
[2,  4000] loss: 1.179
[2,  6000] loss: 1.149
[2,  8000] loss: 1.121
[2, 10000] loss: 1.094
[2, 12000] loss: 1.093
Finished Training
Accuracy of the network on the 10000 test images: 62.7
[1,  2000] loss: 1.935
[1,  4000] loss: 1.553
[1,  6000] loss: 1.451
[1,  8000] loss: 1.356
[1, 10000] loss: 1.313
[1, 12000] loss: 1.265
[2,  2000] loss: 1.120
[2,  4000] loss: 1.127
[2,  6000] loss: 1.099
[2,  8000] loss: 1.052
[2, 10000] loss: 1.056
[2, 12000] loss: 0.998
Finished Training
Accuracy of the network on the 10000 test images: 64.79
[1,  2000] loss: 1.956
[1,  4000] loss: 1.646
[1,  6000] loss: 1.523
[1,  8000] loss: 1.494
[1, 10000] loss: 1.406
[1, 12000] loss: 1.378
[2,  2000] loss: 1.296
[2,  4000] loss: 1.287
[2,  6000] loss: 1.256
[2,  

In [None]:
print(actions)

NameError: ignored