Replication of Florian 2007 XOR gate experiments.
* Rate based input coding
* Temporal pattern coding

https://www.florian.io/papers/2007_Florian_Modulated_STDP.pdf

Florian R (2007) Reinforcement Learning Through Modulation of Spike-Timing-Dependent Synaptic Plasticity. Neural Computation 19(6). https://doi.org/10.1162/neco.2007.19.6.1468

In [1]:
from copy import deepcopy
import numpy as np
import matplotlib.pyplot as plt

from spikey.core import *
from spikey.snn import *
from spikey.RL import *

np.random.seed(0)

In [2]:
def print_rates(experiment_output, training_params):
    _, __, ___, info = experiment_output

    states = np.array(info['step_states'])
    inrates = np.array(info['step_inrates'])
    sysrates = np.array(info['step_sysrates'])
    outrates = np.array(info['step_outrates'])

    for state in [[False, False], [False, True], [True, False], [True, True]]:
        mean_inrates = np.mean(inrates[np.all(states == state, axis=2)][-10:])

        try:
            mean_outrates = [np.mean(outrates[np.all(states == state, axis=2)][-10:, i]) for i in range(2)]
        except IndexError:
            mean_outrates = np.mean(outrates[np.all(states == state, axis=2)][-10:])

        print(f"{state}: {mean_inrates} -> {mean_outrates}")

In [3]:
def print_w_diffs(experiment_output, training_params, layer_cutoff=None):
    network, __, ___, info = experiment_output

    layer_cutoff = layer_cutoff or network._n_inputs

    original_w = info['weights_original']
    final_w = network.synapses.weights.matrix

    print(f"{np.sum(original_w[:, :layer_cutoff])} -> {np.sum(final_w[:, :layer_cutoff])}")
    print(f"{np.sum(original_w[:, layer_cutoff:])} -> {np.sum(final_w[:, layer_cutoff:])}")

In [4]:
def print_success(experiment_output, training_params):
    _, __, ___, info = experiment_output

    states = np.array(info['step_states']).reshape((-1, 2))
    inrates = np.array(info['step_inrates']).reshape((-1))
    sysrates = np.array(info['step_sysrates']).reshape((-1))
    outrates = np.array(info['step_outrates']).reshape((-1))

    HIGH = [[False, True], [True, False]]
    LOW =  [[False, False], [True, True]]

    relevant_timeframe = training_params['eval_steps'] // 4

    high_rate = min([np.mean(outrates[np.all(states == state, axis=1)][-relevant_timeframe:]) for state in HIGH])
    low_rate = max([np.mean(outrates[np.all(states == state, axis=1)][-relevant_timeframe:]) for state in LOW])

    florian_win = high_rate > low_rate

    correct = 0
    for i in range(training_params['eval_steps']):
        state = states[-i]
        rate = outrates[-i]

        if np.sum(state) % 2:
            correct += int(rate > low_rate)
        else:
            correct += int(rate < high_rate)

    florian_accuracy = correct / training_params['eval_steps']

    print(f"Florian - Win: {florian_win}, Accuracy: {florian_accuracy}")

In [5]:
def print_runtime(callback):
    print(f"{callback.results['total_time']:.2f}s")

## Rate Coding

In [6]:
callback = RLCallback(logging=True, reduced=False, measure_rates=True)

In [7]:
## No training - Topology
training_params = {
    'n_episodes': 1,
    'len_episode': 50,
    'eval_steps': 50, 
}

N_INPUTS = 60
N_NEURONS = 61
N_OUTPUTS = 1


w_matrix = np.vstack((  # Fully connected, generated randomly over interval
    np.hstack((
        np.random.uniform(0, .2, (N_INPUTS, N_NEURONS - N_OUTPUTS)),
        np.zeros((N_INPUTS, N_OUTPUTS)))),
    np.hstack((
        np.zeros((N_NEURONS - N_OUTPUTS, N_NEURONS - N_OUTPUTS)),
        np.random.uniform(0, .2, (N_NEURONS - N_OUTPUTS, N_OUTPUTS)))),
    np.zeros((N_OUTPUTS, N_NEURONS)),
))
w_matrix = np.ma.array(np.float16(w_matrix), mask=(w_matrix == 0), fill_value=0)

##
class network_template(ContinuousRLNetwork):
    config = {
        "n_inputs": N_INPUTS,
        "n_outputs": N_OUTPUTS,
        'matrix': w_matrix,                  # v/
        'n_neurons': N_NEURONS,       # v/
        'input_pct_inhibitory': .5,   # v/
        'neuron_pct_inhibitory': 0,          # v/
        'processing_time': 500,       # v/ 500ms

        'firing_threshold': 16,       # v/
        'magnitude': 1,               # v/
        'potential_decay': .05,       # v/ Decay constant Tau=20ms, lambda=e^(-t/T)
        'prob_rand_fire': .05,        # Seemingly 0 in paper but this is critical to learning.
        'refractory_period': 0,       # v/ Gutig, Aharonov, Rotter, & Sompolinsky 2003

        'learning_rate': .0 / 25,   # v/ gamma_0 = gamma / Tau_z
        'max_weight': 5,              # v/
        'stdp_window': 20,            # v/ Tau_+ = Tau_- = 20ms
        'trace_decay': .04,           # v/ T_z = 25, lambda = e^(-1/T_z)
        'action_threshold': 0,        # v/ Irrelevant

        'expected_value': lambda state: np.sum(state) % 2,
        'continuous_rwd_action': lambda *a: None,
        'state_rate_map': [0, .08],  # v/ 40hz = 40spikes/500ms
        "punish_mult": 1,
    }
    _template_parts = {
        'inputs': input.RateMap,# Poisson
        'neurons': neuron.Neuron,       # v/
        'synapses': synapse.RLSTDPET,          # v/
        'weights': weight.Manual,             # v/
        'readout': readout.Threshold,         # v/
        'rewarder': reward.MatchExpected,
        'modifiers': None,
    }

class game_template(Logic):
    config = Logic.PRESETS['XOR']

In [8]:
training_loop = GenericLoop(network_template, game_template, callback, **training_params)
e_output = training_loop()

print_rates(e_output, training_params)
print_w_diffs(e_output, training_params, layer_cutoff=None)
print_success(e_output, training_params)
print_runtime(callback)

[False, False]: 0.0 -> 0.049591064453125
[False, True]: 0.03985595703125 -> 0.04681396484375
[True, False]: 0.040069580078125 -> 0.055206298828125
[True, True]: 0.08001708984375 -> 0.052001953125
362.25 -> 362.25
5.6953125 -> 5.6953125
Florian - Win: False, Accuracy: 0.4
14.23s


In [9]:
## Default - WORKING
training_params = {
    'n_episodes': 1,
    'len_episode': 800,
    'eval_steps': 50, 
}

N_INPUTS = 60
N_NEURONS = 61
N_OUTPUTS = 1


w_matrix = np.vstack((  # Fully connected, generated randomly over interval
    np.hstack((
        np.random.uniform(0, .2, (N_INPUTS, N_NEURONS - N_OUTPUTS)),
        np.zeros((N_INPUTS, N_OUTPUTS)))),
    np.hstack((
        np.zeros((N_NEURONS - N_OUTPUTS, N_NEURONS - N_OUTPUTS)),
        np.random.uniform(0, .2, (N_NEURONS - N_OUTPUTS, N_OUTPUTS)))),
    np.zeros((N_OUTPUTS, N_NEURONS)),
))
w_matrix = np.ma.array(np.float16(w_matrix), mask=(w_matrix == 0), fill_value=0)

# Timestep = 1ms
# Trace suggestion multipliers - A_+ = 1, A_- = -1
# Paper uses poisson input @ 40hz high
# Paper uses inh weights not inh neurons
# Paper seemingly has no random fires

class network_template(ContinuousRLNetwork):
    config = {
        "n_inputs": N_INPUTS,
        "n_outputs": N_OUTPUTS,
        'matrix': w_matrix,                  # v/
        'n_neurons': N_NEURONS,       # v/
        'input_pct_inhibitory': .5,   # v/
        'neuron_pct_inhibitory': 0,          # v/
        'processing_time': 500,       # v/ 500ms

        'firing_threshold': 16,       # v/
        'magnitude': 1,               # v/
        'potential_decay': .05,       # v/ Decay constant Tau=20ms, lambda=e^(-t/T)
        'prob_rand_fire': .15,        # Seemingly 0 in paper but this is critical to learning.
        'refractory_period': 0,       # v/ Gutig, Aharonov, Rotter, & Sompolinsky 2003

        'learning_rate': .625 / 25,   # v/ gamma_0 = gamma / Tau_z
        'max_weight': 5,              # v/
        'stdp_window': 20,            # v/ Tau_+ = Tau_- = 20ms
        'trace_decay': .04,           # v/ T_z = 25, lambda = e^(-1/T_z)

        'expected_value': lambda state: np.sum(state) % 2,
        'continuous_rwd_action': lambda *a: True,

        'action_threshold': 0,        # v/ Irrelevant
        'state_rate_map': [0, .08],  # v/ 40hz = 40spikes/500ms
        "punish_mult": 1,
    }
    _template_parts = {
        'inputs': input.RateMap,# Poisson
        'neurons': neuron.Neuron,       # v/
        'synapses': synapse.RLSTDPET,          # v/
        'weights': weight.Manual,             # v/
        'readout': readout.Threshold,         # v/
        'rewarder': reward.MatchExpected,
        'modifiers': None,
    }

class game_template(Logic):
    config = Logic.PRESETS['XOR']

In [10]:
training_loop = GenericLoop(network_template, game_template, callback, **training_params)
e_output = training_loop()

print_rates(e_output, training_params)
print_w_diffs(e_output, training_params, layer_cutoff=None)
print_success(e_output, training_params)
print_runtime(callback)

[False, False]: 0.0 -> 0.291015625
[False, True]: 0.039703369140625 -> 0.98095703125
[True, False]: 0.03997802734375 -> 0.95556640625
[True, True]: 0.0794677734375 -> 0.2958984375
363.25 -> 8632.0
5.69921875 -> 39.84375
Florian - Win: True, Accuracy: 1.0
284.86s


## Temporal Coding

In [11]:
callback = RLCallback(logging=True, reduced=False, measure_rates=True)

In [12]:
# Basic
N_INPUTS = 2
N_NEURONS = 21
N_OUTPUTS = 1

PROCESSING_TIME = 500
simple_map = {  # 100hz spike trains  - 50hz/500
    False: np.int_(np.random.uniform(0, 1, (PROCESSING_TIME, N_INPUTS // 2)) <= .1),
    True: np.int_(np.random.uniform(0, 1, (PROCESSING_TIME, N_INPUTS // 2)) <= .1),
}

input_map = {
    (A, B): np.hstack((simple_map[A], simple_map[B]))
    for A in [False, True] for B in [False, True]
}
N_HIDDEN = N_NEURONS - N_OUTPUTS
training_params = {
    'n_episodes': 1,
    'len_episode': 800,
    'eval_steps': 50,
}
matrix = np.vstack((
    np.hstack((np.random.uniform(0, .2, (N_INPUTS, N_HIDDEN)), np.zeros((N_INPUTS, N_OUTPUTS)))),
    np.hstack((np.zeros((N_HIDDEN, N_HIDDEN)), np.random.uniform(0, .2,(N_HIDDEN, N_OUTPUTS)))),
    np.hstack((np.zeros((N_OUTPUTS, N_NEURONS)))),
))
matrix = np.ma.array(matrix, mask=(matrix == 0), fill_value=0)

##
class network_template(ContinuousRLNetwork):
    config = {
        "n_inputs": N_INPUTS,
        "n_outputs": N_OUTPUTS,
        'n_neurons': N_NEURONS,
        'input_pct_inhibitory': .5,
        'neuron_pct_inhibitory': 0,
        'processing_time': PROCESSING_TIME,
        'input_firing_steps': PROCESSING_TIME - 3,
        'magnitude': 1,
        'max_weight': 1,
        'firing_threshold': 1,
        'trace_decay': .05,
        'potential_decay': .8,
        'prob_rand_fire': .15,
        'refractory_period': 5,
        'stdp_window': 4,
        'learning_rate': .25,
        'action_threshold': .0,
        'matrix': matrix,
        'state_spike_map': input_map,
        'expected_value': lambda state: np.sum(state) % 2,
        'continuous_rwd_action': lambda *a: True,
        "punish_mult": 1,
    }
    _template_parts = {
        'inputs': input.StaticMap,
        'neurons': neuron.Neuron,
        'synapses': synapse.RLSTDPET,
        'weights': weight.Manual,
        'readout': readout.Threshold,
        'rewarder': reward.MatchExpected,
        'modifiers': None,
    }

class game_template(Logic):
    config = Logic.PRESETS['XOR']

In [13]:
training_loop = GenericLoop(network_template, game_template, callback, **training_params)
e_output = training_loop()

print_rates(e_output, training_params)
print_w_diffs(e_output, training_params, layer_cutoff=None)
print_success(e_output, training_params)
print_runtime(callback)

[False, False]: 0.10797119140625 -> 0.122802734375
[False, True]: 0.10699462890625 -> 0.1597900390625
[True, False]: 0.10699462890625 -> 0.159423828125
[True, True]: 0.10601806640625 -> 0.12103271484375
0.3998296810219967 -> 0.0
5.961753829297675 -> 24.0
Florian - Win: True, Accuracy: 1.0
138.22s


In [14]:
## Basic w/ poisson process
N_INPUTS = 2
N_NEURONS = 21
N_OUTPUTS = 1

PROCESSING_TIME = 500
simple_map = {  # 100hz spike trains
    False: np.int_(np.random.uniform(0, 1, (PROCESSING_TIME, N_INPUTS // 2)) <= 50 * .0001),
    True: np.int_(np.random.uniform(0, 1, (PROCESSING_TIME, N_INPUTS // 2)) <= 50 * .0001),
}

input_map = {
    (A, B): np.hstack((simple_map[A], simple_map[B]))
    for A in [False, True] for B in [False, True]
}
N_HIDDEN = N_NEURONS - N_OUTPUTS
training_params = {
    'n_episodes': 1,
    'len_episode': 800,
    'eval_steps': 50,
}
matrix = np.vstack((
    np.hstack((np.random.uniform(0, .2, (N_INPUTS, N_HIDDEN)), np.zeros((N_INPUTS, N_OUTPUTS)))),
    np.hstack((np.zeros((N_HIDDEN, N_HIDDEN)), np.random.uniform(0, .2,(N_HIDDEN, N_OUTPUTS)))),
    np.hstack((np.zeros((N_OUTPUTS, N_NEURONS)))),
))
matrix = np.ma.array(matrix, mask=(matrix == 0), fill_value=0)

##
class network_template(ContinuousRLNetwork):
    config = {
        "n_inputs": N_INPUTS,
        "n_outputs": N_OUTPUTS,
        'n_neurons': N_NEURONS,
        'input_pct_inhibitory': .5,
        'neuron_pct_inhibitory': 0,
        'processing_time': PROCESSING_TIME,
        'input_firing_steps': PROCESSING_TIME - 3,
        'magnitude': 1,
        'max_weight': 1,
        'firing_threshold': 1,
        'trace_decay': .05,
        'potential_decay': .8,
        'prob_rand_fire': .15,
        'refractory_period': 5,
        'stdp_window': 4,
        'learning_rate': .25,
        'action_threshold': .0,
        'matrix': matrix,
        'state_spike_map': input_map,
        'expected_value': lambda state: np.sum(state) % 2,
        'continuous_rwd_action': lambda *a: True,
        "punish_mult": 1,
    }
    _template_parts = {
        'inputs': input.StaticMap,
        'neurons': neuron.Neuron,
        'synapses': synapse.RLSTDPET,
        'weights': weight.Manual,
        'readout': readout.Threshold,
        'rewarder': reward.MatchExpected,
        'modifiers': None,
    }

class game_template(Logic):
    config = Logic.PRESETS['XOR']

In [15]:
training_loop = GenericLoop(network_template, game_template, callback, **training_params)
e_output = training_loop()

print_rates(e_output, training_params)
print_w_diffs(e_output, training_params, layer_cutoff=None)
print_success(e_output, training_params)
print_runtime(callback)

[False, False]: 0.004001617431640625 -> 0.12078857421875
[False, True]: 0.0020008087158203125 -> 0.16015625
[True, False]: 0.0020008087158203125 -> 0.1600341796875
[True, True]: 0.0 -> 0.12139892578125
0.3784368300520454 -> 1.0
5.83401465911749 -> 16.0
Florian - Win: True, Accuracy: 1.0
124.17s


In [16]:
## For realism
training_params = {
    'n_episodes': 1,
    'len_episode': 800,
    'eval_steps': 50,
}

N_INPUTS = 2
N_NEURONS = 21
N_OUTPUTS = 1

PROCESSING_TIME = 500
simple_map = {  # 100hz spike trains  - 50hz/500
    False: np.int_(np.random.uniform(0, 1, (PROCESSING_TIME, N_INPUTS // 2)) <= .1),
    True: np.int_(np.random.uniform(0, 1, (PROCESSING_TIME, N_INPUTS // 2)) <= .1),
}

input_map = {
    (A, B): np.hstack((simple_map[A], simple_map[B]))
    for A in [False, True] for B in [False, True]
}
w_matrix = np.vstack((  # Fully connected, generated randomly over interval
    np.hstack((
        np.random.uniform(0, .4, (N_INPUTS, N_NEURONS - N_OUTPUTS)),
        np.zeros((N_INPUTS, N_OUTPUTS)))),
    np.hstack((
        np.zeros((N_NEURONS - N_OUTPUTS, N_NEURONS - N_OUTPUTS)),
        np.random.uniform(0, .4, (N_NEURONS - N_OUTPUTS, N_OUTPUTS)))),
    np.zeros((N_OUTPUTS, N_NEURONS)),
))
w_matrix = np.ma.array(np.float16(w_matrix), mask=(w_matrix == 0), fill_value=0)

##
class network_template(ContinuousRLNetwork):
    config = {
        "n_inputs": N_INPUTS,
        "n_outputs": N_OUTPUTS,
        'matrix': w_matrix,                  # v/
        'n_neurons': N_NEURONS,       # v/
        'input_pct_inhibitory': .5,   # v/
        'neuron_pct_inhibitory': 0,          # v/
        'processing_time': PROCESSING_TIME,# v/
        'state_spike_map': input_map,

        'firing_threshold': 16,       # v/
        'magnitude': 1,               # v/
        'potential_decay': .05,       # v/ Decay constant Tau=20ms, lambda=e^(-t/T)
        'prob_rand_fire': .15,
        'refractory_period': 0,       # v/ Gutig, Aharonov, Rotter, & Sompolinsky 2003

        'learning_rate': .25 / 25,   # v/ gamma_0 = gamma / Tau_z
        'max_weight': 5,              # v/
        'stdp_window': 20,            # v/ Tau_+ = Tau_- = 20ms
        'trace_decay': .04,           # v/ T_z = 25, lambda = e^(-1/T_z)
        'expected_value': lambda state: np.sum(state) % 2,
        'continuous_rwd_action': lambda *a: True,
        'action_threshold': 0,        # v/ Irrelevant
        "punish_mult": 1,
    }

    _template_parts = {
        'inputs': input.StaticMap,
        'neurons': neuron.Neuron,
        'synapses': synapse.RLSTDPET,
        'weights': weight.Manual,
        'readout': readout.Threshold,
        'rewarder': reward.MatchExpected,
        'modifiers': None,
    }

class game_template(Logic):
    config = Logic.PRESETS['XOR']

In [17]:
training_loop = GenericLoop(network_template, game_template, callback, **training_params)
e_output = training_loop()

print_rates(e_output, training_params)
print_w_diffs(e_output, training_params, layer_cutoff=None)
print_success(e_output, training_params)
print_runtime(callback)

[False, False]: 0.114013671875 -> 0.2183837890625
[False, True]: 0.1099853515625 -> 0.572265625
[True, False]: 0.1099853515625 -> 0.572265625
[True, True]: 0.10601806640625 -> 0.2149658203125
1.19140625 -> 20.0
10.46875 -> 126.5
Florian - Win: True, Accuracy: 1.0
141.05s


In [18]:
## Realistic w/ poisson process - WORKING
## For realism
training_params = {
    'n_episodes': 1,
    'len_episode': 800,
    'eval_steps': 50,
}

N_INPUTS = 2
N_NEURONS = 21
N_OUTPUTS = 1

PROCESSING_TIME = 500
simple_map = {  # 100hz spike trains
    False: np.int_(np.random.uniform(0, 1, (PROCESSING_TIME, N_INPUTS // 2)) <= 50 * .0001),
    True: np.int_(np.random.uniform(0, 1, (PROCESSING_TIME, N_INPUTS // 2)) <= 50 * .0001),
}

input_map = {
    (A, B): np.hstack((simple_map[A], simple_map[B]))
    for A in [False, True] for B in [False, True]
}
w_matrix = np.vstack((  # Fully connected, generated randomly over interval
    np.hstack((
        np.random.uniform(0, .4, (N_INPUTS, N_NEURONS - N_OUTPUTS)),
        np.zeros((N_INPUTS, N_OUTPUTS)))),
    np.hstack((
        np.zeros((N_NEURONS - N_OUTPUTS, N_NEURONS - N_OUTPUTS)),
        np.random.uniform(0, .4, (N_NEURONS - N_OUTPUTS, N_OUTPUTS)))),
    np.zeros((N_OUTPUTS, N_NEURONS)),
))
w_matrix = np.ma.array(np.float16(w_matrix), mask=(w_matrix == 0), fill_value=0)

##
class network_template(ContinuousRLNetwork):
    config = {
        "n_inputs": N_INPUTS,
        "n_outputs": N_OUTPUTS,
        'matrix': w_matrix,                  # v/
        'n_neurons': N_NEURONS,       # v/
        'input_pct_inhibitory': .5,   # v/
        'neuron_pct_inhibitory': 0,          # v/
        'processing_time': PROCESSING_TIME,# v/
        'state_spike_map': input_map,

        'firing_threshold': 16,       # v/
        'magnitude': 1,               # v/
        'potential_decay': .05,       # v/ Decay constant Tau=20ms, lambda=e^(-t/T)
        'prob_rand_fire': .15,

        'refractory_period': 0,       # v/ Gutig, Aharonov, Rotter, & Sompolinsky 2003

        'learning_rate': .25 / 25,   # v/ gamma_0 = gamma / Tau_z
        'max_weight': 5,              # v/
        'stdp_window': 20,            # v/ Tau_+ = Tau_- = 20ms
        'trace_decay': .04,           # v/ T_z = 25, lambda = e^(-1/T_z)
        'expected_value': lambda state: np.sum(state) % 2,
        'continuous_rwd_action': lambda *a: True,
        'action_threshold': 0,        # v/ Irrelevant
        "punish_mult": 1,
    }

    _template_parts = {
        'inputs': input.StaticMap,
        'neurons': neuron.Neuron,
        'synapses': synapse.RLSTDPET,
        'weights': weight.Manual,
        'readout': readout.Threshold,
        'rewarder': reward.MatchExpected,
        'modifiers': None,
    }

class game_template(Logic):
    config = Logic.PRESETS['XOR']

In [19]:
training_loop = GenericLoop(network_template, game_template, callback, **training_params)
e_output = training_loop()

print_rates(e_output, training_params)
print_w_diffs(e_output, training_params, layer_cutoff=None)
print_success(e_output, training_params)
print_runtime(callback)

[False, False]: 0.0020008087158203125 -> 0.224853515625
[False, True]: 0.005001068115234375 -> 0.5673828125
[True, False]: 0.005001068115234375 -> 0.556640625
[True, True]: 0.00800323486328125 -> 0.2052001953125
1.0439453125 -> 10.7890625
11.9296875 -> 101.3125
Florian - Win: True, Accuracy: 1.0
138.86s
