In [31]:
ReloadProject('deep_learning')

notebook_init.py imported and reloaded
forwarded symbol: Activation
forwarded symbol: Dense
forwarded symbol: Sequential
reloaded: gym
forwarded symbol: gym
reloaded: interface
forwarded symbol: interface
reloaded: keras
forwarded symbol: keras
reloaded: layers
forwarded symbol: layers
reloaded: models
forwarded symbol: models


## Simple policy
First create a simple environment that gives a 4-value tuple each step, but only the first value matters. If the first value is <= 0.5, the expected action is 0, otherwise it's 1.

In [2]:
class SimpleEnv():
    def __init__(self, num_of_steps: int):
        self._curret_status = np.random.rand(4)
        self._total_num_of_steps = num_of_steps
        
        self._current_step_idx = 0
    
    def action_space_n(self):
        return 2
    
    def action_space_sample(self):
        return np.random.randint(2)
    
    def step(self, action):
        status = (self._curret_status[0] <= 0.5)
        if (status and action == 0) or (not status and action == 1):
            reward = 1
        else:
            reward = -1
        self._curret_status = np.random.rand(4)
        self._current_step_idx += 1
        return self._curret_status, reward, self._current_step_idx >= self._total_num_of_steps, None

## Non-RL
Let's see if we can create a simple model that learns this behavior without RL.

In [36]:
train_data = np.random.rand(10000, 4)
train_labels = train_data[:,0] > 0.5
one_hot_train_labels = keras.utils.to_categorical(train_labels, num_classes=2)

In [42]:
model = Sequential()
model.add(Dense(6, activation='relu', input_dim=4))
model.add(Dense(6, activation='relu'))
model.add(Dense(6, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(train_data, one_hot_train_labels, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fe769824668>

In [43]:
test_data = np.random.rand(10, 4)
print(test_data[:,0])
model.predict(test_data)

[0.37939964 0.99905268 0.02726233 0.81033626 0.94175805 0.7477724
 0.13083237 0.32436433 0.98274507 0.6977157 ]


array([[9.9985409e-01, 1.4586173e-04],
       [2.6681549e-13, 1.0000000e+00],
       [9.9999964e-01, 3.4639271e-07],
       [1.8640400e-10, 1.0000000e+00],
       [1.3699250e-12, 1.0000000e+00],
       [2.2386972e-09, 1.0000000e+00],
       [9.9999595e-01, 4.0039445e-06],
       [9.9990284e-01, 9.7114847e-05],
       [2.1742263e-14, 1.0000000e+00],
       [1.0138200e-07, 9.9999988e-01]], dtype=float32)

Ok the binary classification is a success. Out of curiosity, let's see if a multiclass classification can work.

In [56]:
bins = np.array([0.0, 0.25, 0.5, 0.75, 1.0])
train_data = np.random.rand(10000, 4)
train_labels = np.digitize(train_data[:,0], bins) - 1
one_hot_train_labels = keras.utils.to_categorical(train_labels, num_classes=4)

model = Sequential()
model.add(Dense(6, activation='relu', input_dim=4))
model.add(Dense(6, activation='relu'))
model.add(Dense(6, activation='relu'))
model.add(Dense(4, activation='softmax'))

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(train_data, one_hot_train_labels, epochs=10, batch_size=32)

test_data = np.random.rand(10, 4)
print(test_data[:,0])
model.predict(test_data)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.74922858 0.11994309 0.15804021 0.62309971 0.4978077  0.73010793
 0.91255119 0.26384172 0.17103494 0.19295214]


array([[1.2368723e-17, 1.0725839e-06, 4.2867875e-01, 5.7132024e-01],
       [9.9806923e-01, 1.9307450e-03, 7.3191377e-09, 3.8498161e-27],
       [9.8754787e-01, 1.2452028e-02, 1.0891723e-07, 5.2449232e-25],
       [7.9667989e-13, 8.2713383e-04, 9.9825686e-01, 9.1593619e-04],
       [5.2043276e-07, 3.9721370e-01, 6.0278529e-01, 5.2876754e-07],
       [4.6349812e-18, 1.8140678e-06, 7.3542351e-01, 2.6457474e-01],
       [4.6628866e-26, 4.6066701e-12, 1.8695020e-03, 9.9813050e-01],
       [2.9422596e-01, 7.0568389e-01, 9.0169458e-05, 8.4738005e-19],
       [9.8274910e-01, 1.7250579e-02, 1.8972604e-07, 1.6241024e-24],
       [9.4077790e-01, 5.9221227e-02, 9.2271250e-07, 1.9560873e-23]],
      dtype=float32)

Great, so it's working!

In [54]:
bins = np.array([0.0, 0.25, 0.5, 0.75, 1.0])
train_data = np.random.rand(20, 4)
train_labels = np.digitize(train_data[:,0], bins) - 1
print(train_labels)
one_hot_train_labels = keras.utils.to_categorical(train_labels, num_classes=4)
print(one_hot_train_labels)

[2 1 0 0 1 3 3 2 1 1 2 3 0 0 1 0 3 3 0 2]
[[0. 0. 1. 0.]
 [0. 1. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 0. 1. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [1. 0. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]
 [0. 0. 1. 0.]]


In [32]:
model = Sequential()
model.add(Dense(32, activation='relu', input_dim=100))
model.add(Dense(10, activation='softmax'))
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Generate dummy data
import numpy as np
data = np.random.random((1000, 100))
labels = np.random.randint(10, size=(1000, 1))

# Convert labels to categorical one-hot encoding
one_hot_labels = keras.utils.to_categorical(labels, num_classes=10)

# Train the model, iterating on the data in batches of 32 samples
model.fit(data, one_hot_labels, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fe76a700f60>

In [33]:
model.predict(np.random.rand(1, 100))

array([[0.07188411, 0.16237906, 0.132416  , 0.131351  , 0.10634772,
        0.0702582 , 0.08143447, 0.09775764, 0.06400643, 0.08216538]],
      dtype=float32)

In [34]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Generate dummy data
x_train = np.random.random((1000, 20))
y_train = np.random.randint(2, size=(1000, 1))
x_test = np.random.random((100, 20))
y_test = np.random.randint(2, size=(100, 1))

model = Sequential()
model.add(Dense(64, input_dim=20, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(x_train, y_train,
          epochs=20,
          batch_size=128)
score = model.evaluate(x_test, y_test, batch_size=128)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [35]:
model.predict(x_test)

array([[0.50066435],
       [0.49652892],
       [0.50010145],
       [0.48597416],
       [0.47340217],
       [0.52196383],
       [0.4891368 ],
       [0.5038686 ],
       [0.4983057 ],
       [0.5235382 ],
       [0.49910942],
       [0.48698172],
       [0.4880431 ],
       [0.48587176],
       [0.48998708],
       [0.5028596 ],
       [0.4940164 ],
       [0.5047681 ],
       [0.48678192],
       [0.47945258],
       [0.51610047],
       [0.4979995 ],
       [0.5015438 ],
       [0.5231593 ],
       [0.474516  ],
       [0.48865914],
       [0.49641323],
       [0.5035954 ],
       [0.48940527],
       [0.4930465 ],
       [0.5050711 ],
       [0.49330568],
       [0.49054077],
       [0.49664098],
       [0.49356365],
       [0.49157763],
       [0.51939327],
       [0.4919153 ],
       [0.47439837],
       [0.50665563],
       [0.46942124],
       [0.5032462 ],
       [0.49407402],
       [0.5298711 ],
       [0.47959152],
       [0.48972762],
       [0.4784638 ],
       [0.501

In [25]:
env = SimpleEnv(10000)
for _ in range(10):
    action = env.action_space_sample()
    print(action)
    observation, reward, done, info = env.step(action)
    print(observation, reward)
    if done:
        print("Episode finished after {} timesteps".format(t+1))
        break    

1
[0.890261   0.30248666 0.7008052  0.39708824] 1
0
[0.34393121 0.06560637 0.12531579 0.73289267] -1
1
[0.42257391 0.68990481 0.54848968 0.2782972 ] -1
1
[0.03662617 0.9784738  0.54163047 0.96236332] -1
1
[0.94987607 0.08062103 0.72281151 0.87880736] -1
0
[0.89082242 0.87890617 0.22894185 0.79595253] -1
1
[0.17689491 0.7672027  0.00398238 0.60066968] 1
0
[0.38071283 0.45143663 0.96309125 0.1316958 ] 1
0
[0.87130597 0.8817402  0.60087639 0.53345163] 1
1
[0.1398914  0.31910011 0.78802726 0.88674186] 1


In [None]:
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)

In [5]:
np.random.rand(4)

array([0.46876781, 0.12773732, 0.76752294, 0.22103882])

In [26]:
observation

array([0.1398914 , 0.31910011, 0.78802726, 0.88674186])

In [None]:
env = gym.make('CartPole-v1')
env.reset()
env.action_space

In [1]:
"""Provides an easy to use notebook environment."""
# For notebook setup.
import importlib
import os
import sys

# For actual work.
import numpy as np
import scipy
import pandas as pd
from IPython import display
from matplotlib import pyplot as plt

_EXTERNAL_ROOT = '/workspace/external/'
_EXCLUDE_PREFIXES = ('__', '_', '.')
_PYTHON_SUFFIX = '.py'


def ReloadProject(project_name: str) -> None:
  """Loads and reloads all modules for a project.

  Args:
    project_name: a project is a directory under `/workspaces/external/`.
  """
  project_root = os.path.join(_EXTERNAL_ROOT, project_name)
  if project_root not in sys.path:
    if os.path.exists(project_root):
      sys.path.append(project_root)
      print('Path %s added to PYTHON_PATH.' % project_root)
    else:
      print('Path %s does not exist.' % project_root)
      return

  project_root_len = len(project_root)
  python_suffix_len = len(_PYTHON_SUFFIX)
  for current_dir, dirnames, filenames in os.walk(
      project_root, followlinks=False):
    dirnames[:] = [
        dirname for dirname in dirnames
        if not dirname.startswith(_EXCLUDE_PREFIXES)]
    filenames[:] = [
        filename for filename in filenames
        if not filename.startswith(_EXCLUDE_PREFIXES) and
        filename.endswith(_PYTHON_SUFFIX) and
        not 'test' in filename]
    
    if filenames or dirnames:
      package_name = current_dir[project_root_len+1:].replace('/', '.')
      if package_name:
        Import(package_name)

    for filename in filenames:
      # This is the full module name, including packages.
      module_name = package_name + '.' + filename[:-python_suffix_len]
      try:
        if module_name in sys.modules:
          importlib.reload(sys.modules.get(module_name))
          print('reloaded: ' + module_name)
        else:
          Import(module_name)
      except Exception as e:
        print('error loading module %s: %s' % (module_name, e))
        
def Import(module_name):
    exec('import ' + module_name)
    print('imported: ' + module_name)

In [2]:
ReloadProject('deep_learning')

Path /workspace/external/deep_learning added to PYTHON_PATH.
imported: notebook
imported: lib
imported: lib.policy
imported: lib.bb
bb!
imported: lib.bb.bb


In [7]:
importlib.import_module(name='lib', package='lib')

<module 'lib' (namespace)>

In [5]:
lib

NameError: name 'lib' is not defined

In [5]:
for nn, mm in sys.modules.items():
    if 'lib' == nn:
        print(nn, mm)

lib <module 'lib' (namespace)>


In [4]:
import lib

In [2]:
lib.policy

NameError: name 'lib' is not defined

In [3]:
__import__('lib')

<module 'lib' (namespace)>

In [3]:
import lib.bb

In [4]:
exec('import lib')

In [49]:
ReloadProject('deep_learning')

notebook_init.py imported and reloaded
reloaded: gym
reloaded: numpy
reloaded: policy


In [47]:
policy.PolicyInterface

<module 'lib.policy' from '/workspace/external/deep_learning/lib/policy.py'>

In [32]:
for a in sys.modules:
    if 'policy' in a:
        print(a)

email._policybase
lib.policy


In [30]:
dir(notebook_init)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'gym',
 'numpy',
 'policy']

In [29]:
import notebook_init

In [33]:
importlib.reload(notebook_init.policy)

<module 'lib.policy' from '/workspace/external/deep_learning/lib/policy.py'>

In [34]:
a = notebook_init.policy

In [35]:
dir(a)

['PolicyInterface',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__']

In [37]:
hasattr('notebook_init.__doc__', '__loader__')

False

In [40]:
hasattr(eval('notebook_init.policy'), '__loader__')

True

In [39]:
hasattr(notebook_init.policy, '__loader__')

True

<module 'lib.policy' from '/workspace/external/deep_learning/lib/policy.py'>

In [19]:
_EXTERNAL_ROOT = '/workspace/external/'
_NOTEBOOK_INIT = 'notebook_init'


def ReloadProject(project_name: str) -> None:
  """Loads and reloads all modules for a project.
  
  Imports `project_name/notebook_init.py`, then reload it.
  All symbols in this module will be forwarded to the global scope.
  They are then reloaded.

  Args:
    project_name: a project is a directory under `/workspaces/external/`.
  """
  project_root = os.path.join(_EXTERNAL_ROOT, project_name)
  if project_root not in sys.path:
    if os.path.exists(project_root):
      sys.path.append(project_root)
      print('Path %s added to PYTHON_PATH.' % project_root)
    else:
      print('Path %s does not exist.' % project_root)
      return

  if not os.path.exists(os.path.join(project_root, _NOTEBOOK_INIT + '.py')):
    print(_NOTEBOOK_INIT + '.py not found from project directory.')
    return

  import notebook_init
  importlib.reload(notebook_init)
  print(_NOTEBOOK_INIT + '.py imported and reloaded')

  for symbol_name in dir(notebook_init):
    symbol = eval(_NOTEBOOK_INIT + '.' + symbol_name)
    # Reload modules.
    if hasattr(symbol, '__loader__'):
      importlib.reload(symbol)
      print('reloaded: ' + symbol_name)
    # Forward all public symbols.
    if not symbol_name.startswith('_'):
      exec('global %s; %s = %s.%s' % (
          symbol_name, symbol_name, _NOTEBOOK_INIT, symbol_name))
      print('forwarded symbol: ' + symbol_name)

In [20]:
ReloadProject('deep_learning')

notebook_init.py imported and reloaded
forwarded symbol: Activation
forwarded symbol: Dense
forwarded symbol: Sequential
reloaded: gym
forwarded symbol: gym
reloaded: interface
forwarded symbol: interface
reloaded: layers
forwarded symbol: layers
reloaded: models
forwarded symbol: models


In [15]:
  import notebook_init
  importlib.reload(notebook_init)

<module 'notebook_init' from '/workspace/external/deep_learning/notebook_init.py'>

In [9]:
import notebook_init