In [1]:
# TEMP: Import lagom
# Not useful once lagom is installed
import sys
sys.path.append('/home/zuo/Code/lagom/')

### use functiontools.partial to set make_env function without argument but internally

In [18]:
import numpy as np

np.swapaxes()

<module 'numpy' from '/home/zuo/anaconda3/envs/RL_server/lib/python3.6/site-packages/numpy/__init__.py'>

# VecEnv

In [None]:
import numpy as np
from multiprocessing import Process  # easier code than threading
from multiprocessing import Pipe  # Much faster than Queue


class CloudpickleWrapper(object):
    """
    Uses cloudpickle to serialize contents (otherwise multiprocessing tries to use pickle)
    """
    def __init__(self, x):
        self.x = x
        
    def __call__(self):
        return self.x()
    
    def __getstate__(self):
        import cloudpickle
        return cloudpickle.dumps(self.x)
    def __setstate__(self, ob):
        import pickle
        self.x = pickle.loads(ob)


def worker(child_conn, parent_conn, make_env):
    parent_conn.close()
    # Create an environment
    env = make_env()
    
    while True:
        cmd, data = child_conn.recv()
        if cmd == 'step':
            obs, reward, done, info = env.step(data)
            if done:
                obs = env.reset()  # TODO: why reset
            child_conn.send([obs, reward, done, info])
        elif cmd == 'reset':
            obs = env.reset()
            child_conn.send(obs)
        elif cmd == 'reset_task':
            obs = env.reset_task()
            child_conn.send(obs)
        elif cmd == 'close':
            child_conn.close()
            break
        elif cmd == 'get_spaces':
            child_conn.send([env.observation_space, env.action_space])
        else:
            raise NotImplementedError

class SubprocVecEnv(VecEnv):
    """
    Run a list of environment in subprocesses
    """
    def __init__(self, list_make_env):
        self.waiting = False
        self.closed = False
        self.num_envs = len(list_make_env)
        self.parent_conns, self.child_conns = zip(*[Pipe() for _ in range(self.num_envs)])
        self.processes = []
        for parent_conn, child_conn, make_env in zip(self.parent_conns, self.child_conns, list_make_env):
            self.processes.append(Process(target=worker, args=[child_conn, parent_conn, CloudpickleWrapper(make_env)]))  # TODO: CloudpickleWrapper make_env
        for process in self.processes:
            process.daemon = True  # if the main process crashes, we should not cause things to hang
            process.start()
        for conn in self.child_conns:
            conn.close()
        
        # Obtain observation and action spaces
        self.parent_conns[0].send(['get_spaces', None])
        observation_space, action_space = self.parent_conns[0].recv()
        super().__init__(self.num_envs, observation_space, action_space)
        
    def step_asyn(self, actions):
        for parent_conn, action in zip(self.parent_conns, actions):
            parent_conn.send(['step', action])
            
        self.waiting = True
        
    def step_wait(self):
        observations, rewards, dones, infos = zip(*[parent_conn.recv() for parent_conn in self.parent_conns])
        self.waiting = False
        return np.stack(observations), np.stack(rewards), np.stack(dones), infos
    
    def reset(self):
        for parent_conn in self.parent_conns:
            parent_conn.send(['reset', None])
        return np.stack([parent_conn.recv() for parent_conn in self.parent_conns])
    
    def reset_task(self):
        for parent_conn in self.parent_conns:
            parent_conn.send(['reset_task', None])
        return np.stack([parent_conn.recv() for parent_conn in self.parent_conns])
    
    def close(self):
        if self.closed:
            return
        if self.waiting:
            for parent_conn in self.parent_conns:
                parent_conn.recv()
        for parent_conn in self.parent_conns:
            parent_conn.send(['close', None])
        for process in self.processes:
            process.join()
        self.closed = True

# LSTM network

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim


import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt


np.random.seed(2)

T = 20
L = 1000
N = 100

x = np.empty((N, L), 'int64')
x[:] = np.array(range(L)) + np.random.randint(-4 * T, 4 * T, N).reshape(N, 1)
data = np.sin(x/T).astype('float32')


class Sequence(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.lstm1 = nn.LSTMCell(1, 51)
        self.lstm2 = nn.LSTMCell(51, 51)
        self.linear = nn.Linear(51, 1)
        
    def forward(self, x, future=0):
        outputs = []
        batch_size, x_size = x.size()
        
        h_t = torch.zeros(batch_size, 51)
        c_t = torch.zeros(batch_size, 51)
        h_t_2 = torch.zeros(batch_size, 51)
        c_t_2 = torch.zeros(batch_size, 51)
        
        chunks = x.chunk(x_size, dim=1)
        
        for x_t in chunks:
            h_t, c_t = self.lstm1(x_t, (h_t, c_t))
            h_t_2, c_t_2 = self.lstm2(h_t, (h_t_2, c_t_2))
            
            output = self.linear(h_t_2)
            
            outputs.append(output)
            
        for _ in range(future):  # if we should predict the future
            h_t, c_t = self.lstm1(output, (h_t, c_t))
            h_t_2, c_t_2 = self.lstm2(h_t, (h_t_2, c_t_2))
            
            output = self.linear(h_t_2)
            
            outputs.append(output)
            
        outputs = torch.stack(outputs, dim=1).squeeze(2)
        
        return outputs
        
if __name__ == '__main__':

    np.random.seed(0)
    torch.manual_seed(0)

    input = torch.from_numpy(data[3:, :-1])
    target = torch.from_numpy(data[3:, 1:])
    test_input = torch.from_numpy(data[:3, :-1])
    test_target = torch.from_numpy(data[:3, 1:])

    seq = Sequence()

    criterion = nn.MSELoss()

    # Use LBFGS since we load whole data to train
    optimizer = optim.LBFGS(seq.parameters(), lr=0.8)

    for i in range(2):
        print(f'STEP: {i}')
        def closure():
            optimizer.zero_grad()
            out = seq(input)
            loss = criterion(out, target)
            print(f'Loss: {loss.item()}')
            loss.backward()
            return loss
        optimizer.step(closure)

        # Prediction
        future = 1000
        pred = seq(test_input, future=future)
        loss = criterion(pred[:, :-future], test_target)
        print(f'Test loss: {loss.item()}')
        y = pred.data.numpy()

        # Drawing
        plt.figure(figsize=(30, 10))
        plt.xlabel('x')
        plt.ylabel('y')
        plt.xticks(fontsize=20)
        plt.yticks(fontsize=20)
        def draw(yi, color):
            plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth=2.0)
            plt.plot(np.arange(input.size(1), input.size(1)+future), yi[input.size(1):], color + ':', linewidth=2.0)
        draw(y[0], 'r')
        draw(y[1], 'g')
        draw(y[2], 'b')
        
        plt.savefig(f'logs/{i}.pdf')

# VAE

In [1]:
# TEMP: Import lagom
# Not useful once lagom is installed
import sys
sys.path.append('/home/zuo/Code/lagom/')

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from lagom.core.networks import MLP


class VAE(nn.Module):
    """
    Variational Autoencoders (VAE) with MLP
    """
    def __init__(self, 
                 input_dim, 
                 encoder_sizes, 
                 encoder_nonlinearity, 
                 latent_dim, 
                 decoder_sizes, 
                 decoder_nonlinearity):
        """
        Set up VAE with configurations
        
        Args:
            input_dim (int): input dimension
            encoder_sizes (list): a list of sizes for encoder hidden layers
            encoder_nonlinearity (nn.functional): nonlinearity for encoder hidden layers
            latent_dim (int): latent dimension
            decoder_sizes (list): a list of sizes for decoder hidden layers
            decoder_nonlinearity (nn.functional): nonlinearity for decoder hidden layers
        """
        super().__init__()
        
        self.input_dim = input_dim
        self.encoder_sizes = encoder_sizes
        self.encoder_nonlinearity = encoder_nonlinearity
        self.latent_dim = latent_dim
        self.decoder_sizes = decoder_sizes
        self.decoder_nonlinearity = decoder_nonlinearity
        
        # Create encoder network
        self.encoder = MLP(input_dim=self.input_dim, 
                           hidden_sizes=self.encoder_sizes, 
                           hidden_nonlinearity=self.encoder_nonlinearity, 
                           output_dim=None, 
                           output_nonlinearity=None)
        # Last layer of encoder network to output mean and log-variance for latent variable
        self.mu_head = nn.Linear(in_features=self.encoder_sizes[-1], out_features=self.latent_dim)
        self.logvar_head = nn.Linear(in_features=self.encoder_sizes[-1], out_features=self.latent_dim)
        
        # Create decoder network
        self.decoder = MLP(input_dim=self.latent_dim, 
                           hidden_sizes=self.decoder_sizes, 
                           hidden_nonlinearity=self.decoder_nonlinearity, 
                           output_dim=self.input_dim, 
                           output_nonlinearity=None)
        
        # Initialize parameters for newly defined layers
        self._init_params()
        
    def _init_params(self):
        """
        Initialize the network parameters, weights, biases
        
        Orthogonal weight initialization and zero bias initialization
        """
        # Initialize mu_head, it does not have nonlinearity
        # Weight initialization
        nn.init.orthogonal_(self.mu_head.weight, gain=1)  # gain=1 due to identity
        # Bias initialization
        nn.init.constant_(self.mu_head.bias, 0.0)
        
        # Initialize logvar_head, it does not have nonlinearity
        # Weight initialization
        nn.init.orthogonal_(self.logvar_head.weight, gain=1)  # gain=1 due to identity
        # Bias initialization
        nn.init.constant_(self.logvar_head.bias, 0.0)
        
    def encode(self, x):
        """
        Forward pass of encoder network. 
        
        Args:
            x (Tensor): input tensor to the encoder network
            
        Returns:
            mu (Tensor): mean of the latent variable
            logvar (Tensor): log-variance of the latent variable. 
                Note that log operation allows to optimize negative values,
                though variance must be non-negative. 
        """
        x = self.encoder(x)
        mu = self.mu_head(x)
        logvar = self.logvar_head(x)
        
        return mu, logvar
    
    def decode(self, z):
        """
        Forward pass of decoder network
        
        Args:
            z (Tensor): the sampled latent variable
            
        Returns:
            x (Tensor): the reconstruction of the input
        """
        x = self.decoder(z)
        # Use sigmoid to constraint all values in (0, 1)
        x = F.sigmoid(x)
        
        return x
    
    def reparameterize(self, mu, logvar):
        """
        Sampling using reparameterization trick
        
        i.e. mu + eps*std, eps sampled from N(0, 1)
        
        Args:
            mu (Tensor): mean of a Gaussian random variable
            logvar (Tensor): log-variance of a Gaussian random variable
                Note that log operation allows to optimize negative values,
                though variance must be non-negative.
        
        Returns:
            sampled tensor according to the reparameterization trick
        """
        if self.training:  # training: sample with reparameterization trick
            # Recover std from log-variance
            # 0.5*logvar by logarithm law is more numerically stable than taking square root
            std = torch.exp(0.5*logvar)
            # Sample standard Gaussian noise
            eps = torch.randn_like(std)
            
            return mu + eps*std
        else:  # evaluation: no sampling, simply pass mu
            return mu
        
    def forward(self, x):
        # Enforce the shape of x to be consistent with first layer
        x = x.view(-1, self.input_dim)
        
        # Forward pass through encoder to get mu and logvar for latent variable
        mu, logvar = self.encode(x)
        # Sample latent variable by reparameterization trick
        z = self.reparameterize(mu, logvar)
        # Forward pass through decoder of sampled latent variable to reconstruct input
        reconstructed_x = self.decode(z)
        
        return reconstructed_x, mu, logvar
    
    def calculate_loss(self, reconstructed_x, x, mu, logvar):
        """
        Calculate the VAE loss function
        VAE_loss = Reconstruction_loss + KL_loss
        Note that the losses are summed over all elements and batch
        
        For details, see https://arxiv.org/abs/1312.6114
        The KL loss is derived in Appendix B
        
        Args:
            reconstructed_x (Tensor): reconstructed x output from decoder
            x (Tensor): ground-truth x
            mu (Tensor): mean of the latent variable
            logvar (Tensor): log-variance of the latent variable
        
        Returns:
            loss (Tensor): VAE loss
        """
        # Enforce the shape of x is the same as reconstructed x
        x = x.view_as(reconstructed_x)
        
        # Calculate reconstruction loss
        reconstruction_loss = F.binary_cross_entropy(reconstructed_x, 
                                                     x, 
                                                     size_average=False)  # summed up losses
        # Calculate KL loss
        # Gaussian: 0.5*sum(1 + log(sigma^2) - mu^2 - sigma^2)
        KL_loss = -0.5*torch.sum(1 + logvar - mu**2 - logvar.exp())
        
        loss = reconstruction_loss + KL_loss
        
        return loss

torch.Size([2, 10])


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
from torchvision.utils import save_image

config = {}
config['batch_size'] = 128
config['epochs'] = 1
config['seed'] = 1
config['log_interval'] = 10

# Set random seed
torch.manual_seed(config['seed'])

# Automatic check if there is GPU
cuda = torch.cuda.is_available()

# Define device
device = torch.device('cuda' if cuda else 'cpu')

# Define GPU-dependent keywords for DataLoader
if cuda:
    kwargs = {'num_workers': 1, 'pin_memory': True}
else:
    kwargs = {}
train_loader = DataLoader(datasets.MNIST('data/', 
                                         train=True, 
                                         download=True, 
                                         transform=transforms.ToTensor()), 
                          batch_size=config['batch_size'], 
                          shuffle=True, 
                          **kwargs)
test_loader = DataLoader(datasets.MNIST('data/', 
                                         train=False, 
                                         transform=transforms.ToTensor()), 
                         batch_size=config['batch_size'], 
                         shuffle=True, 
                         **kwargs)



    
model = VAE(input_dim=784, 
            encoder_sizes=[400], 
            encoder_nonlinearity=F.relu, 
            latent_dim=20, 
            decoder_sizes=[400], 
            decoder_nonlinearity=F.relu)
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)



def train(epoch):
    model.train()
    train_loss = 0
    for batch_idx, (data, _) in enumerate(train_loader):
        data = data.to(device)
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(data)
        loss = model.calculate_loss(recon_batch, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
        if batch_idx % config['log_interval'] == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader),
                loss.item() / len(data)))

    print('====> Epoch: {} Average loss: {:.4f}'.format(
          epoch, train_loss / len(train_loader.dataset)))


def test(epoch):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for i, (data, _) in enumerate(test_loader):
            data = data.to(device)
            recon_batch, mu, logvar = model(data)
            test_loss += model.calculate_loss(recon_batch, data, mu, logvar).item()
            if i == 0:
                n = min(data.size(0), 8)
                comparison = torch.cat([data[:n],
                                      recon_batch.view(config['batch_size'], 1, 28, 28)[:n]])
                save_image(comparison.cpu(),
                         'data/reconstruction_' + str(epoch) + '.png', nrow=n)

    test_loss /= len(test_loader.dataset)
    print('====> Test set loss: {:.4f}'.format(test_loss))


for epoch in range(1, config['epochs'] + 1):
    train(epoch)
    test(epoch)
    with torch.no_grad():
        sample = torch.randn(64, 20).to(device)
        sample = model.decode(sample).cpu()
        save_image(sample.view(64, 1, 28, 28),
                   'data/sample_' + str(epoch) + '.png')

====> Epoch: 1 Average loss: 165.3256
====> Test set loss: 119.1399


In [None]:
====> Epoch: 1 Average loss: 164.4686
====> Test set loss: 119.2985

In [None]:
165.3256