In [1]:
%matplotlib inline

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision

import random
import matplotlib.pyplot as plt

from distributions import Categorical, DiagGaussian
from collections import namedtuple

import img_env 

import utils

import model

from PIL import Image

from random import randint
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


Define the pretrained CNN
======================

In [4]:
class CNN_pretrained(nn.Module):
    def __init__(self):
        super(CNN_pretrained, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)

############ train the CNN_pretrained on MNIST ##################
# n_epochs = 3
# batch_size_train = 64
# batch_size_test = 1000
# learning_rate = 0.01
# momentum = 0.5
# log_interval = 10

# random_seed = 1
# torch.backends.cudnn.enabled = False
# torch.manual_seed(random_seed)


# train_loader = torch.utils.data.DataLoader(
#   torchvision.datasets.MNIST('./pretrained_CNN/data/', train=True, download=True,
#                              transform=torchvision.transforms.Compose([
#                                torchvision.transforms.ToTensor(),
#                                torchvision.transforms.Normalize(
#                                  (0.1307,), (0.3081,))
#                              ])),
#   batch_size=batch_size_train, shuffle=True)

# test_loader = torch.utils.data.DataLoader(
#   torchvision.datasets.MNIST('./pretrained_CNN/data/', train=False, download=True,
#                              transform=torchvision.transforms.Compose([
#                                torchvision.transforms.ToTensor(),
#                                torchvision.transforms.Normalize(
#                                  (0.1307,), (0.3081,))
#                              ])),
#   batch_size=batch_size_test, shuffle=True)
    
# CNN = CNN_pretrained()
# optimizer_CNN = optim.SGD(CNN.parameters(), lr=learning_rate,
#                       momentum=momentum)

# train_losses = []
# train_counter = []
# test_losses = []
# test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

# def train(epoch):
#     CNN.train()
#     for batch_idx, (data, target) in enumerate(train_loader):
#         optimizer_CNN.zero_grad()
#         output = CNN(data)
#         loss = F.nll_loss(output, target)
#         loss.backward()
#         optimizer_CNN.step()
#         if batch_idx % log_interval == 0:
#             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
#                 epoch, batch_idx * len(data), len(train_loader.dataset),
#                 100. * batch_idx / len(train_loader), loss.item()))
#             train_losses.append(loss.item())
#             train_counter.append(
#                 (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))
#             torch.save(optimizer_CNN.state_dict(), './pretrained_CNN/results/model.pth')
#             torch.save(optimizer_CNN.state_dict(), './pretrained_CNN/results/optimizer.pth')
            
# def test():
#     CNN.eval()
#     test_loss = 0
#     correct = 0
#     with torch.no_grad():
#         for data, target in test_loader:
# #             print (target.shape)
# #             print (data.shape)
#             output = CNN(data)
#             test_loss += F.nll_loss(output, target, size_average=False).item()
#             pred = output.data.max(1, keepdim=True)[1]
#             correct += pred.eq(target.data.view_as(pred)).sum()
#             test_loss /= len(test_loader.dataset)
#             test_losses.append(test_loss)
#             print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
#                 test_loss, correct, len(test_loader.dataset),
#                 100. * correct / len(test_loader.dataset)))
            
# test()
# for epoch in range(1, n_epochs + 1):
#     train(epoch)
#     test()





Test the pretrained CNN
=======
Try to classify the images emitted by our env. using the pretrained CNN
Note: The pretrained CNN was trained on (28, 28) MNIST images, so I removed the resize to (32, 32) in img_env.py to keep the original (28, 28) image shape.

In [5]:
################### load the pretrained CNN
CNN_pretr = CNN_pretrained()
CNN_state_dict = torch.load('./pretrained_CNN/results/model.pth')
CNN_pretr.load_state_dict(CNN_state_dict)

for param in CNN_pretr.parameters():
    param.requires_grad = False

env = img_env.ImgEnv('mnist', train=True, max_steps=1, channels=2, window=28, num_labels=2)

test_loss = 0
correct = 0

with torch.no_grad():
    for t in range(1000):
        obs = env.reset()
        data = torch.from_numpy(obs[1])
        data = torch.from_numpy(obs[1]).float().resize_(1, 1, 28, 28).to(device)
#         plt.imshow(obs[1])
#         plt.show()
        target = env.curr_label.resize_((1))
        output = CNN_pretr(data)
        test_loss += F.nll_loss(output, target, size_average=False).item()
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).sum()
#         print ('target = {target}, pred = {pred}'.format(**locals()))
test_loss /= 1000
print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
                test_loss, correct, 1000,
                100. * correct / 1000))

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m





Test set: Avg. loss: 0.1416, Accuracy: 961/1000 (96%)



Build the model
-------------

In [53]:
def smoothing_average(x, factor=10):
    running_x = 0
    for i in range(len(x)):
        U = 1. / min(i+1, factor)
        running_x = running_x * (1 - U) + x[i] * U
        x[i] = running_x
    return x




class myNet_CNNpretrained(nn.Module):
    def __init__(self, obs_shape, action_space, recurrent_policy=False, dataset=None, resnet=False, pretrained=False):
        super(myNet_CNNpretrained, self).__init__()
        self.dataset = dataset
        if len(obs_shape) == 3: #our mnist case
            self.base = model.CNNBase(obs_shape[0], recurrent_policy, dataset=dataset)
        elif len(obs_shape) == 1:
            assert not recurrent_policy, \
                "Recurrent policy is not implemented for the MLP controller"
            self.base = MLPBase(obs_shape[0])
        else:
            raise NotImplementedError

        if action_space.__class__.__name__ == "Discrete": # our case
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError

        if dataset in ['mnist', 'cifar10']:
            self.clf = CNN_pretr
            for param in self.clf.parameters(): # freeze the pretrained CNN
                param.requires_grad = False
    
        self.state_size = self.base.state_size

    def forward(self, inputs, states, masks):
        raise NotImplementedError

    def act(self, inputs, states, masks, deterministic=False):
        actor_features, states = self.base(inputs, states, masks)
        self.actor_features = actor_features
        dist = self.dist(actor_features)
        Q_values = dist.logits
        
        if deterministic:
            action = dist.mode()
        else:
            action = dist.sample()

        action_log_probs = dist.log_probs(action)

        if self.dataset in img_env.IMG_ENVS:
            input_CNN = torch.from_numpy(states[1]).float().resize_(1, 1, 28, 28).to(device)
            with torch.no_grad():
                clf_proba = self.clf(input_CNN)
                classif = clf_proba.data.max(1, keepdim=True)[1]
#                 print (clf_proba)
#                 print (classif)
#                 print (clf_proba.gather(1, classif))
            
            action = torch.cat([action, classif], 1)
            action_log_probs += clf_proba.gather(1, classif)

        return action, Q_values, clf_proba, action_log_probs, states #dist.logits = Q values



Replay Memory
-------------

In [22]:
class ReplayMemory(object):

    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def push(self, *args):
        """Saves a transition."""
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward', 'curr_label'))


Optimization: ToDo
-------------

Training ....
-------------


In [54]:
if __name__ == '__main__':
    BATCH_SIZE = 128
    NUM_STEPS = 1
    GAMMA = 1 - (1 / NUM_STEPS) # Set to horizon of max episode length
    EPS = 0.05
    NUM_LABELS = 2
    WINDOW_SIZE = 28
    NUM_EPISODES = 1000
    TARGET_UPDATE = 10

    env = img_env.ImgEnv('mnist', train=True, max_steps=NUM_STEPS, channels=2, window=WINDOW_SIZE, num_labels=NUM_LABELS)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    net = myNet_CNNpretrained(\
        obs_shape=env.observation_space.shape, \
        action_space=env.action_space, dataset='mnist', pretrained=True).to(device)
    memory = ReplayMemory(10000)

    total_rewards = {}
    episode_durations = {}
    loss_classification = {}


    for i_episode in range(NUM_EPISODES):
        total_reward_i = 0
        observation = env.reset()
        curr_label = env.curr_label.item()
        for t in range(NUM_STEPS): 
            actionS, Q_values, clf_proba, action_log_probs, states = net.act(inputs=torch.from_numpy(observation).float().to(device), \
                states=observation, masks=observation[1])
            actionS = actionS.cpu().numpy()[0]
            class_pred = actionS[1]
            last_observation = observation
            rand = np.random.rand()
            if rand < EPS:
                actionS = np.array(
                    [np.random.choice(range(4)), np.random.choice(range(NUM_LABELS))])
            action = actionS[0]
            observation, reward, done, info = env.step(actionS)
            total_reward_i = reward + GAMMA*total_reward_i
            memory.push(torch.from_numpy(last_observation), torch.from_numpy(actionS), \
                torch.from_numpy(observation), torch.tensor([reward]).float(), torch.tensor([curr_label]))
            print ('t = %i: action = %i, label = %i, class_pred = %i, reward = %f'%(t, action, curr_label, class_pred, reward))
#             optimize_myNet(net, curr_label, optimizer_clf, BATCH_SIZE)

            if done:
    # 				# print ('Done after %i steps'%(t+1))
                break
        
        loss_classification_i = F.nll_loss(clf_proba, env.curr_label.unsqueeze_(dim=0).to(device))

        if curr_label in total_rewards.keys():
            total_rewards[curr_label].append(total_reward_i)
            episode_durations[curr_label].append(t)
            loss_classification[curr_label].append(loss_classification_i)
        else:
            total_rewards[curr_label] = [total_reward_i]
            episode_durations[curr_label] = [t]
            loss_classification[curr_label] = [loss_classification_i]
    plt.title('Class 0')
    plt.subplot(3, 1, 1)
    plt.xlabel('Episode')
    plt.ylabel('Episode_Duration')
    durations_t = torch.tensor(episode_durations[0], dtype=torch.float)
    plt.plot(smoothing_average(durations_t.numpy()))

    plt.subplot(3, 1, 2)
    plt.xlabel('Episode')
    plt.ylabel('Rewards')
    total_rewards_t = torch.tensor(total_rewards[0], dtype=torch.float)
    plt.plot(smoothing_average(total_rewards_t.numpy()))

    plt.subplot(3, 1, 3)
    plt.ylim(top=1)
    plt.xlabel('Episode')
    plt.ylabel('Loss Classification')
    loss_classification_t = torch.tensor(loss_classification[0], dtype=torch.float)
    plt.plot(smoothing_average(loss_classification_t.numpy()))
    plt.show()

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


RuntimeError: Expected 4-dimensional input for 4-dimensional weight [10, 2, 5, 5], but got 3-dimensional input of size [2, 28, 28] instead