# rl2.ipynb

This notebook contains the second step in the 5-step process outlined in November 13th, 2020, in order to produce an actor-critic architecture for pendulum control.

In step 1 we create a network with N units that takes the activity of 1 unit and transforms it into a bell-shaped representation.
In here we put 2 of those as the inputs to a (N/2)x(N/2) grid, and visualize.

To achieve bell-shaped tuning, we use the `bell_shaped_1D` unit model.

In [7]:
%cd ../..
import numpy as np
import matplotlib.pyplot as plt
import time
from draculab import *
from tools.visualization import plotter

/home


In [13]:
N = 20 # number of units in each S network
No2 = int(np.ceil(N/2))
net_params = {'min_delay' : 0.005,
              'min_buff_size' : 10 }
A_params = {'type' : unit_types.source,
            'init_val' : 0.5,
            'function' : lambda t: None}
L_params = {'type' : unit_types.sigmoidal,
            'thresh' : 1.5,
            'slope' : 5.,
            'tau' : 0.02,
            'init_val' : 0.5 }
L_geom = {'shape' : 'sheet',
          'arrangement' : 'grid',
          'rows' : No2,
          'columns' : No2,
          'center' : [0., 0.],
          'extent' : [1., 1.] }
S_params = {'type' : unit_types.bell_shaped_1D,
            'init_val' : 0.1,
            'tau' : 0.01,
            'center' : list(np.linspace(0., 1., N)),
            'b' : 30. }
S1_params = S_params.copy()
S2_params = S_params.copy()
S1_params['coordinates'] = [np.array([-.6, -.5 + i/N]) for i in range(N)]
S2_params['coordinates'] = [np.array([-.5 + i/N, -.6]) for i in range(N)]

net = network(net_params)
topo = topology()
A = net.create(2, A_params)
S1 = net.create(N, S1_params)
S2 = net.create(N, S2_params)
L = topo.create_group(net, L_geom, L_params)

net.units[A[0]].set_function(lambda t: 0.5*(np.sin(t)+1.))
net.units[A[1]].set_function(lambda t: 0.5*(np.cos(t)+1.))

A__S_conn = {'rule' : 'all_to_all',
             'delay' : 0.01 }
A__S_syn = {'type' : synapse_types.static,
            'init_w' : 1. }
S1__L_conn_spec = {'connection_type' : 'divergent',
                  'mask' : {'circular' : {'radius' : 2. }},
                  'kernel' : 1.,
                  'delays' : {'linear' : {'c' : 0.01, 'a': 0.01}},
                  'weights' : {'gaussian' : {'w_center' : 1., 'sigma' : 0.05}},
                  'dist_dim' : 'y',
                  'edge_wrap' : True,
                  'boundary' : {'center' : [-0.05, 0.], 'extent':[1.1, 1.]} }
S2__L_conn_spec = S1__L_conn_spec.copy()
S2__L_conn_spec['dist_dim'] = 'x'
S2__L_conn_spec['boundary'] = {'center' : [0., -0.05], 'extent':[1., 1.1]}
S1__L_syn_spec = {'type' : synapse_types.static }
S2__L_syn_spec = {'type' : synapse_types.static }

net.connect([A[0]], S1, A__S_conn, A__S_syn)
net.connect([A[1]], S2, A__S_conn, A__S_syn)
topo.topo_connect(net, S1, L, S1__L_conn_spec, S1__L_syn_spec)
topo.topo_connect(net, S2, L, S2__L_conn_spec, S2__L_syn_spec)

In [9]:
start_time = time.time()
#times, activs, _ = net.run(10.)
times, activs, _ = net.flat_run(1.)
print('Execution time is %s seconds' % (time.time() - start_time))
activs = np.array(activs)

Execution time is 2.584442377090454 seconds


In [10]:
# plot of sources
fs = (20,6)
A_fig = plt.figure(figsize=fs)
A_activs = np.array(activs[A])
plt.plot(times, A_activs.transpose())
plt.title('A')

S1_fig = plt.figure(figsize=fs)
S1_activs = np.array(activs[S1])
plt.plot(times, S1_activs.transpose())
S1_legends = [str(ctr) for ctr in S1_params['center']]
plt.legend(S1_legends)
plt.title('S1')

S2_fig = plt.figure(figsize=fs)
S2_activs = np.array(activs[S2])
plt.plot(times, S2_activs.transpose())
S2_legends = [str(ctr) for ctr in S2_params['center']]
plt.legend(S2_legends)
plt.title('S2')

plt.show()

In [11]:
plotty = plotter(net, times, activs)
plotty.act_anim(S1+S2+L, 0.1, interv=30, slider=False)


<matplotlib.animation.FuncAnimation at 0x7f30340b8850>

In [12]:
plotty.conn_anim(S1+S2, L, interv=200, slider=False)

<matplotlib.animation.FuncAnimation at 0x7f30280e3460>