In [4]:
%%file config.py
import numpy as np
import minirl.neural_nets.nn.init
import minirl.neural_nets.nn.optim
from minirl.neural_nets.nn.init import custom
from minirl.neural_nets.nn.optim import rmsprop

class Config(object):
    def __init__(self, args):
        # Default training settings
        self.init_func = custom
        self.init_config = {
            'function': lambda shape: np.random.randn(shape[0], shape[1]) / np.sqrt(shape[1])
        }
        self.learning_rate = 1e-3
        self.update_rule = rmsprop
        self.grad_clip = True
        self.clip_magnitude = 40.0

        # Default model settings
        self.hidden_size = 200
        self.gamma = 0.99
        self.lambda_ = 1.0
        self.vf_wt = 0.5        # Weight of value function term in the loss
        self.entropy_wt = 0.01  # Weight of entropy term in the loss

        # Override defaults with values from `args`.
        for arg in self.__dict__:
            if arg in args.__dict__:
                self.__setattr__(arg, args.__dict__[arg])

Writing config.py


In [5]:
%%file model.py
from itertools import chain
import numpy
import scipy.signal

import minirl.neural_nets.nn.init
import minirl.neural_nets.core as core
import minirl.neural_nets.numpy as np
from minirl.neural_nets.nn.model import ModelBase
import minirl.neural_nets.numpy as np
from minirl.neural_nets.nn.init import constant

class Agent(ModelBase):
    def __init__(self, input_size, act_space, config):
        super(Agent, self).__init__()
        self.ctx = config.ctx
        self.act_space = act_space
        self.config = config
        self.add_param('fc1', (config.hidden_size, input_size))
        self.add_param('policy_fc_last', (act_space, config.hidden_size))
        self.add_param('vf_fc_last', (1, config.hidden_size))
        self.add_param('vf_fc_last_bias', (1,))

        self._init_params()

        self.optim_configs = {}
        for p in self.param_configs:
            self.optim_configs[p] = {'learning_rate': self.config.learning_rate}

    def forward(self, X):
        a = np.dot(self.params['fc1'], X.T)
        h = np.maximum(0, a)
        logits = np.dot(h.T, self.params['policy_fc_last'].T)
        ps = np.exp(logits - np.max(logits, axis=1, keepdims=True))
        ps /= np.sum(ps, axis=1, keepdims=True)
        vs = np.dot(h.T, self.params['vf_fc_last'].T) + self.params['vf_fc_last_bias']
        return ps, vs

    def loss(self, ps, as_, vs, rs, advs):
        ps = np.maximum(1.0e-5, np.minimum(1.0 - 1e-5, ps))
        policy_grad_loss = -np.sum(np.log(ps) * as_ * advs)
        vf_loss = 0.5*np.sum((vs - rs)**2)
        entropy = -np.sum(ps*np.log(ps))
        loss_ = policy_grad_loss + self.config.vf_wt*vf_loss - self.config.entropy_wt*entropy
        return loss_

    def act(self, ps):
        us = numpy.random.uniform(size=ps.shape[0])[:, np.newaxis]
        as_ = (numpy.cumsum(ps.asnumpy(), axis=1) > us).argmax(axis=1)
        return as_

    def train_step(self, env_xs, env_as, env_rs, env_vs):
        # Stack all the observations and actions.
        xs = np.vstack(list(chain.from_iterable(env_xs)))
        as_ = numpy.array(list(chain.from_iterable(env_as)))[:, np.newaxis]
        # One-hot encode the actions.
        buf = numpy.zeros([xs.shape[0], self.act_space])
        as_ = np.onehot_encode(np.array(as_.ravel(), self.ctx), buf).asnumpy()

        # Compute discounted rewards and advantages.
        drs, advs = [], []
        gamma, lambda_ = self.config.gamma, self.config.lambda_
        for i in range(len(env_vs)):
            # Compute discounted rewards with a 'bootstrapped' final value.
            rs_bootstrap = [] if env_rs[i] == [] else env_rs[i] + [env_vs[i][-1]]
            drs.extend(self._discount(rs_bootstrap, gamma)[:-1])

            # Compute advantages using Generalized Advantage Estimation;
            # see eqn. (16) of [Schulman 2016].
            delta_t = env_rs[i] + gamma*numpy.array(env_vs[i][1:]) - numpy.array(env_vs[i][:-1])
            advs.extend(self._discount(delta_t, gamma * lambda_))

        drs = numpy.array(drs)[:, np.newaxis]
        advs = numpy.array(advs)[:, np.newaxis]

        def loss_func(*params):
            ps, vs = self.forward(xs)
            loss_ = self.loss(ps, as_, vs, drs, advs)
            return loss_

        grads = self._forward_backward(loss_func)
        self._update_params(grads)

    def _discount(self, x, gamma):
        return scipy.signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1]

    def _forward_backward(self, loss_func):
        param_arrays = list(self.params.values())
        param_keys = list(self.params.keys())
        grad_and_loss_func = core.grad_and_loss(loss_func, argnum=range(len(param_arrays)))
        grad_arrays, loss = grad_and_loss_func(*param_arrays)
        grads = dict(zip(param_keys, grad_arrays))
        if self.config.grad_clip:
            for k, v in grads.iteritems():
                grads[k] = numpy.clip(v, -self.config.clip_magnitude, self.config.clip_magnitude)

        return grads

    def _update_params(self, grads):
        for p, w in self.params.iteritems():
            dw = grads[p]
            config = self.optim_configs[p]
            next_w, next_config = self.config.update_rule(w, dw, config)
            self.params[p] = next_w
            self.optim_configs[p] = next_config

    def _init_params(self):
        for name, config in self.param_configs.items():
            init_func = constant if name.endswith('bias') else self.config.init_func
            self.params[name] = init_func(config['shape'], self.config.init_config)

Writing model.py


In [None]:
agent = Agent()

In [None]:
import gym
from itertools import count
from minirl import uniAgent
log_interval=100
render_interval = -1
#env = gym.make("LunarLander-v2")
env = gym.make("CartPole-v1")
ob_n = env.observation_space.shape[0]
ac_n = env.action_space.n
agent = uniAgent(ob_n,ac_n,p_alpha=0.001,v_alpha=0.001,algo="ppo",clip=0.2,capacity=10000,batch_size=1000)
def main():
    """Run REINFORCE algorithm to train on the environment"""
    avg_reward = []
    for i_episode in count(1):
        ep_reward = 0
        obs,_ = env.reset()
        for t in range(10000):  # Don't infinite loop while learning
            #sprint(obs)
            action,p = agent.act(obs)
            next_obs, reward, done, _,_ = env.step(action)
            ep_reward += reward
            #reinforce.rewards.append(reward)

            
    
            if render_interval != -1 and i_episode % render_interval == 0:
                env.render()

            agent.learn(obs, reward, next_obs)
            if done:
                break
            
            obs=next_obs
        #reinforce.finish_episode()

        if i_episode % log_interval == 0:
            print("Ave reward: {}".format(sum(avg_reward)/len(avg_reward)))
            avg_reward = []

        else:
            avg_reward.append(ep_reward)
            
main()


In [3]:

from minirl.neural_nets.core import grad

def foo(x):
    if x >= 0:
        return x
    else:
        return 2 * x

foo_grad = grad(foo)
print (foo_grad(3))  # should print 1.0
print (foo_grad(-1)) # should print 2.0

True


AttributeError: 'numpy.bool_' object has no attribute 'mark_for_bp'

In [4]:
import minirl
minirl.set_global_policy('only_numpy')

In [5]:

from minirl.neural_nets.core import grad

def foo(x):
    if x >= 0:
        return x
    else:
        return 2 * x

foo_grad = grad(foo)
print (foo_grad(3))  # should print 1.0
print (foo_grad(-1)) # should print 2.0

True


AttributeError: 'numpy.bool_' object has no attribute 'mark_for_bp'

In [8]:
import minirl.neural_nets.numpy as np
x = np.zeros((2, 3))     # Use MXNet GPU implementation
y = np.ones((2, 3))      # Use MXNet GPU implementation
z = np.logaddexp(x, y)   # Use NumPy CPU implementation

z

array([[1.31326169, 1.31326169, 1.31326169],
       [1.31326169, 1.31326169, 1.31326169]])

In [None]:
import minirl.neural_nets.numpy as np
from minirl.neural_nets.nn import layers
from minirl.neural_nets.nn.model import ModelBase
from minirl.neural_nets.nn.solver import Solver
from minirl.neural_nets.nn.io import NDArrayIter
#from examples.utils.data_utils import adding_problem_generator as data_gen


class RNNNet(ModelBase):
    def __init__(self,
                 batch_size=100,
                 input_size=2,  # input dimension
                 hidden_size=64,
                 num_classes=1):
        super(RNNNet, self).__init__()
        self.add_param(name='Wx', shape=(input_size, hidden_size))\
            .add_param(name='Wh', shape=(hidden_size, hidden_size))\
            .add_param(name='b', shape=(hidden_size,))\
            .add_param(name='Wa', shape=(hidden_size, num_classes))\
            .add_param(name='ba', shape=(num_classes,))

    def forward(self, X, mode):
        seq_len = X.shape[1]
        batch_size = X.shape[0]
        hidden_size = self.params['Wh'].shape[0]
        h = np.zeros((batch_size, hidden_size))
        for t in range(seq_len):
            h = layers.rnn_step(X[:, t, :], h, self.params['Wx'],
                                self.params['Wh'], self.params['b'])
        y = layers.affine(h, self.params['Wa'], self.params['ba'])
        return y

    def loss(self, predict, y):
        return layers.l2_loss(predict, y)


def main():
    model = RNNNet()
    x_train, y_train = data_gen(10000)
    x_test, y_test = data_gen(1000)

    train_dataiter = NDArrayIter(x_train,
                                 y_train,
                                 batch_size=100,
                                 shuffle=True)

    test_dataiter = NDArrayIter(x_test,
                                y_test,
                                batch_size=100,
                                shuffle=False)

    solver = Solver(model,
                    train_dataiter,
                    test_dataiter,
                    num_epochs=10,
                    init_rule='xavier',
                    update_rule='adam',
                    task_type='regression',
                    verbose=True,
                    print_every=20)
    solver.init()
    solver.train()


if __name__ == '__main__':
    main()

[[ 5.01194055e-02 -1.74980294e-01  7.07506112e-03 ...  3.28273683e-02
   4.34751892e-02  3.45092676e-02]
 [ 2.21709575e-01 -7.74047621e-01  3.12974342e-02 ...  1.45216046e-01
   1.92318038e-01  1.52656142e-01]
 [ 4.23388377e-03 -1.47816244e-02  5.97672422e-04 ...  2.77312272e-03
   3.67260737e-03  2.91520274e-03]
 ...
 [ 1.81787910e-01 -7.19881408e-01  6.40112994e-02 ...  2.44758262e-01
  -4.49299641e-01 -2.36971948e-01]
 [ 8.35874165e-02 -2.91826101e-01  1.17995430e-02 ...  5.47483531e-02
   7.25064217e-02  5.75533668e-02]
 [ 1.03760713e-01 -3.62256492e-01  1.46472884e-02 ...  6.79615231e-02
   9.00053896e-02  7.14435091e-02]]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[ 5.01194055e-02 -1.74980294e-01  7.07506112e-03 ...  3.28273683e-02
   4.34751892e-02  3.45092676e-02]
 [ 2.21709575e-01 -7.74047621e-01  3.12974342e-02 ...  1.45216046e-01
   1.92318038e-01  1.52656142e-01

In [21]:
""" Code from cs231n course """
import six.moves.cPickle as pickle
import numpy as np
import os, sys, random
#from scipy.misc import imread
import imageio.v2  as imread

def load_CIFAR_batch(filename):
    """ load single batch of cifar """
    with open(filename, 'rb') as f:
        if sys.version_info > (3, 0):
            datadict = pickle.load(f, encoding='latin1')
        else:
            datadict = pickle.load(f)
        X = datadict['data']
        Y = datadict['labels']
        X = X.reshape(10000, 3, 32, 32).transpose(0, 2, 3, 1).astype("float")
        Y = np.array(Y)
        return X, Y


def load_CIFAR10(ROOT):
    """ load all of cifar """
    xs = []
    ys = []
    for b in range(1, 6):
        f = os.path.join(ROOT, 'data_batch_%d' % (b,))
        X, Y = load_CIFAR_batch(f)
        xs.append(X)
        ys.append(Y)
    Xtr = np.concatenate(xs)
    Ytr = np.concatenate(ys)
    del X, Y
    Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
    return Xtr, Ytr, Xte, Yte


def get_CIFAR10_data(data_dir,
                     num_training=49000,
                     num_validation=1000,
                     num_test=1000):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for classifiers. These are the same steps as we used for the SVM, but
    condensed to a single function.
    """
    # Load the raw CIFAR-10 data
    X_train, y_train, X_test, y_test = load_CIFAR10(data_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    # Transpose so that channels come first
    X_train = X_train.transpose(0, 3, 1, 2).copy()
    X_val = X_val.transpose(0, 3, 1, 2).copy()
    X_test = X_test.transpose(0, 3, 1, 2).copy()

    # Package data into a dictionary
    return {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': X_test,
        'y_test': y_test,
    }


def load_tiny_imagenet(path, dtype=np.float32):
    """Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
    TinyImageNet-200 have the same directory structure, so this can be used
    to load any of them.

    Inputs:
        - path: String giving path to the directory to load.
        - dtype: numpy datatype used to load the data.

    Returns: A tuple of
        - class_names: A list where class_names[i] is a list of strings giving the
          WordNet names for class i in the loaded dataset.
        - X_train: (N_tr, 3, 64, 64) array of training images
        - y_train: (N_tr,) array of training labels
        - X_val: (N_val, 3, 64, 64) array of validation images
        - y_val: (N_val,) array of validation labels
        - X_test: (N_test, 3, 64, 64) array of testing images.
        - y_test: (N_test,) array of test labels; if test labels are not available
          (such as in student code) then y_test will be None.
    """
    # First load wnids
    with open(os.path.join(path, 'wnids.txt'), 'r') as f:
        wnids = [x.strip() for x in f]

    # Map wnids to integer labels
    wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}

    # Use words.txt to get names for each class
    with open(os.path.join(path, 'words.txt'), 'r') as f:
        wnid_to_words = dict(line.split('\t') for line in f)
        for wnid, words in wnid_to_words.iteritems():
            wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
    class_names = [wnid_to_words[wnid] for wnid in wnids]

    # Next load training data.
    X_train = []
    y_train = []
    for i, wnid in enumerate(wnids):
        if (i + 1) % 20 == 0:
            print('loading training data for synset %d / %d' % (i + 1,
                                                                len(wnids)))
        # To figure out the filenames we need to open the boxes file
        boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
        with open(boxes_file, 'r') as f:
            filenames = [x.split('\t')[0] for x in f]
        num_images = len(filenames)

        X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
        y_train_block = wnid_to_label[wnid] * np.ones(num_images,
                                                      dtype=np.int64)
        for j, img_file in enumerate(filenames):
            img_file = os.path.join(path, 'train', wnid, 'images', img_file)
            img = imread(img_file)
            if img.ndim == 2:
                ## grayscale file
                img.shape = (64, 64, 1)
            X_train_block[j] = img.transpose(2, 0, 1)
        X_train.append(X_train_block)
        y_train.append(y_train_block)

        # We need to concatenate all training data
    X_train = np.concatenate(X_train, axis=0)
    y_train = np.concatenate(y_train, axis=0)

    # Next load validation data
    with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
        img_files = []
        val_wnids = []
        for line in f:
            img_file, wnid = line.split('\t')[:2]
            img_files.append(img_file)
            val_wnids.append(wnid)
        num_val = len(img_files)
        y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
        X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
        for i, img_file in enumerate(img_files):
            img_file = os.path.join(path, 'val', 'images', img_file)
            img = imread(img_file)
            if img.ndim == 2:
                img.shape = (64, 64, 1)
            X_val[i] = img.transpose(2, 0, 1)

    # Next load test images
    # Students won't have test labels, so we need to iterate over files in the
    # images directory.
    img_files = os.listdir(os.path.join(path, 'test', 'images'))
    X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
    for i, img_file in enumerate(img_files):
        img_file = os.path.join(path, 'test', 'images', img_file)
        img = imread(img_file)
        if img.ndim == 2:
            img.shape = (64, 64, 1)
        X_test[i] = img.transpose(2, 0, 1)

    y_test = None
    y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
    if os.path.isfile(y_test_file):
        with open(y_test_file, 'r') as f:
            img_file_to_wnid = {}
            for line in f:
                line = line.split('\t')
                img_file_to_wnid[line[0]] = line[1]
        y_test = [wnid_to_label[img_file_to_wnid[img_file]]
                  for img_file in img_files]
        y_test = np.array(y_test)

    return class_names, X_train, y_train, X_val, y_val, X_test, y_test


def data_gen(N, seq_len=30, high=1):
    """ A data generator for adding problem.

    The data definition strictly follows Quoc V. Le, Navdeep Jaitly, Geoffrey E.
    Hintan's paper, A Simple Way to Initialize Recurrent Networks of Rectified
    Linear Units.

    The single datum entry is a 2D vector with two rows with same length.
    The first row is a list of random data; the second row is a list of binary
    mask with all ones, except two positions sampled by uniform distribution.
    The corresponding label entry is the sum of the masked data. For
    example:
    
     input          label
     -----          -----
    1 4 5 3  ----->   9 (4 + 5)
    0 1 1 0

    :param N: the number of the entries.
    :param seq_len: the length of a single sequence.
    :param p: the probability of 1 in generated mask
    :param high: the random data is sampled from a [0, high] uniform distribution.
    :return: (X, Y), X the data, Y the label.
    """
    X_num = np.random.uniform(low=0, high=high, size=(N, seq_len, 1))
    X_mask = np.zeros((N, seq_len, 1))
    Y = np.ones((N, 1))
    for i in range(N):
        # Default uniform distribution on position sampling
        positions = np.random.choice(seq_len, size=2, replace=False)
        X_mask[i, positions] = 1
        Y[i, 0] = np.sum(X_num[i, positions])
    X = np.append(X_num, X_mask, axis=2)
    return X, Y


def gaussian_cluster_generator(num_samples=10000, num_features=500, num_classes=5):
    """ Generates several clusters of Gaussian points """
    mu = np.random.rand(num_classes, num_features)
    sigma = np.ones((num_classes, num_features)) * 0.1
    num_cls_samples = num_samples / num_classes
    x = np.zeros((num_samples, num_features))
    y = np.zeros((num_samples, num_classes))
    for i in range(num_classes):
        cls_samples = np.random.normal(mu[i,:], sigma[i,:], (num_cls_samples, num_features))
        x[i*num_cls_samples:(i+1)*num_cls_samples] = cls_samples
        y[i*num_cls_samples:(i+1)*num_cls_samples,i] = 1
    return x, y


def load_models(models_dir):
    """Load saved models from disk. This will attempt to unpickle all files in a
    directory; any files that give errors on unpickling (such as README.txt) will
    be skipped.

    Inputs:
        - models_dir: String giving the path to a directory containing model files.
          Each model file is a pickled dictionary with a 'model' field.

    Returns:
        A dictionary mapping model file names to models.
    """
    models = {}
    for model_file in os.listdir(models_dir):
        with open(os.path.join(models_dir, model_file), 'rb') as f:
            try:
                models[model_file] = pickle.load(f, encoding='latin1')['model']
            except pickle.UnpicklingError:
                continue
    return models

In [17]:
import imageio.v2  as imageio
image = imageio.imread('engine.jpg')
image

array([[[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        ...,
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]],

       [[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        ...,
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]],

       [[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        ...,
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]],

       ...,

       [[ 4, 15, 33],
        [ 2, 13, 31],
        [ 7, 18, 36],
        ...,
        [ 2,  1,  6],
        [ 1,  0,  5],
        [ 1,  0,  5]],

       [[ 4, 15, 33],
        [ 2, 13, 31],
        [ 6, 17, 35],
        ...,
        [ 2,  1,  6],
        [ 1,  0,  5],
        [ 1,  0,  5]],

       [[ 4, 15, 33],
        [ 1, 12, 30],
        [ 6, 17, 35],
        ...,
        [ 2,  1,  6],
        [ 1,  0,  5],
        [ 1,  0,  5]]], dtype=uint8)