In [None]:
# Install all the parts needed
#%pip install gym pyvirtualdisplay matplotlib
#%pip install gym[atari]
#%pip install gym[accept-rom-license]
#%pip install sklearn -qq
#%pip install scikit-image -qq
#%pip install torchvision -qq
#%pip install tensorflow -qq

In [None]:
import cv2
import time
import imageio
import numpy as np
from skimage.transform import resize
#import torch.nn.functional as F
import random
import torchvision.transforms as transforms
import tensorflow.compat.v1 as tf
ACTIONS = 4
start = time.time()

In [None]:
def frameprocess(frame,frame_height=84, frame_width=84):

    frame_height = frame_height
    frame_width = frame_width
    processed = tf.image.rgb_to_grayscale(frame)
    processed = tf.image.crop_to_bounding_box(processed, 34, 0, 160, 140)
    processed = tf.image.resize_images(processed, 
                                            [frame_height, frame_width], 
                                            method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    return processed

In [None]:
actions = 4

In [None]:
def selectOutput(records,previous_spikes):
    print("RECORDS",records)
    out = np.array([0 for _ in range(64)])
    if records==[]:
        return random.randint(0,ACTIONS-1)
    for record in records[0]:
        out[record]+=1
    return np.argmax(out)

In [None]:
class Atari(object):
    """Wrapper for the environment provided by gym"""
    def __init__(self, envName, no_op_steps=10, agent_history_length=4):
        self.env = gym.make(envName)
        self.state = None
        self.last_lives = 0
        self.no_op_steps = no_op_steps
        self.metadata = self.env.metadata
        self.agent_history_length = agent_history_length
        self.spec = self.env.spec
        self.action_space = self.env.action_space
        self.render = self.env.render

    def reset(self,evaluation=False):
        frame = self.env.reset()
        processed_frame = frameprocess(frame)
        self.state = np.repeat(processed_frame, self.agent_history_length, axis=2)

    def step(self,action):
        new_frame, reward, done, info = self.env.step(action)
        processed_new_frame = frameprocess(new_frame) 
        new_state = np.append(self.state[:, :, 1:], processed_new_frame, axis=2) 
        self.state = new_state
        
        return processed_new_frame, reward, done, new_frame

In [None]:
import numpy as np
def inputToSpikeRateArray(frame):
    f = np.array(frame)
    return f.flatten()
    

In [None]:
import pyNN.spiNNaker as p
DA_concentration = [0]*actions

def run_spinnaker_sim(input_label, output_label, connection_port):
    global DA_concentration
    
    tau_c = 1000  # Eligibility trace decay time constant.
    tau_d = 200  # Dopamine trace decay time constant.
      # Dopamine trace step increase size
    inputpopsize = 84*84
    # Set up the simulation itself
    p.setup(1.0)

    ##### INPUT LAYER #####
    input_pop = p.Population(inputpopsize,p.external_devices.SpikeInjector(
            database_notify_port_num=connection_port),label = input_label)
    ##### REWARD LAYER #####
    reward_pop = [p.Population(10,p.external_devices.SpikeInjector(database_notify_port_num=connection_port)) for _ in range(actions)]

    #####   STDP    #####
    timing_rule = p.SpikePairRule(tau_plus=0.1, tau_minus=0.1, A_plus=0.1, A_minus=0.1)
    weight_rule = p.AdditiveWeightDependence(w_max=10.0, w_min=0.01)
    stdp_model_excitatory = p.STDPMechanism(timing_dependence=timing_rule, weight_dependence=weight_rule, weight=5)
    
    #### OUTPUT LAYER ####
    output_pop = [p.Population(int(100),p.IF_curr_exp(),label = output_label) for action in range(4)]
    
    ## Create dopaminergic connection
    k=0
    for pop in output_pop:      
        p.Projection(
            input_pop,pop,
            p.AllToAllConnector(),
            synapse_type=stdp_model_excitatory)
        
        p.Projection(
            reward_pop[k],pop,
            p.AllToAllConnector(),
            synapse_type=p.extra_models.Neuromodulation(
            weight=DA_concentration[k], tau_c=tau_c, tau_d=tau_d, w_max=20.0, w_min=0),
            receptor_type='reward', label='reward synapses')
        
        k+=1

    
    # Make the population output spikes
    for op in output_pop:
        p.external_devices.activate_live_output_for(
            op, database_notify_port_num=connection_port)

    # Run in sections of 20ms
    p.external_devices.run_forever(sync_time=20)
    
    # End the simulation once complete (run_forever stops when requested elsewhere)
    p.end()

In [None]:
import gym
from gym import wrappers
import matplotlib.pyplot as plt
from IPython import display
import pyNN.spiNNaker as p


# Note the odd order of parameters here is because input_label and connection
# are from the default interface, but output_label and spike_queue are from
# additional parameters
def run_openai_gym_sim(input_label, connection, output_label, spike_queue, new_spike_queue, plt, img, env):

    observation = env.reset()
    step = 0
    total_reward = 0
    previous_spikes = []
    while True:
        print(total_reward)
        step += 1
        
        # Display the simulation at the start of the step
        #img.set_data(env.render(mode='rgb_array'))
        display.display(plt.gcf())
        display.clear_output(wait=True)

        action,previous_spikes = selectOutput(new_spike_queue,previous_spikes)
        
        # TODO: Pull from the spike queue and use them to "decide the action"
        while spike_queue:
            label, time, neuron_ids = spike_queue.pop()
        
        # Move the simulation forwards
        observation, reward, done, info = env.step(action)
        total_reward += reward
        
        # If finished, stop
        if done:
            print(f"Steps: {step}, score: {total_reward}")
            break
            
        # TODO: Use the output from the environment to send spikes to SpiNNaker
        connection.send_spikes(input_label,inputToSpikeRateArray(env.env.state))#SPIKES TO BE SENT)
        
        # Continue spinnaker simulation for next run of loop
        p.external_devices.continue_simulation()
        
    # Stop the OpenAI Gym
    #env.env.close()
    
    # Stop SpiNNaker
    p.external_devices.request_stop()

In [None]:
# Import the pulator
import pyNN.spiNNaker as p
from functools import partial
from collections import deque

# A queue of spikes
spike_queue = deque()
new_spike_queue = []
# A function to receive spikes and put them in a queue
def receive_spikes(label, time, neuron_ids):
    spike_queue.appendleft((label, time, neuron_ids))
    new_spike_queue = []
    new_spike_queue.append((label, time, neuron_ids))

# Keep track of the labels these need to match up in several places
input_label = "input"
output_label = "output"

# Create the connection.
# Note the use of local_port=None allows the automatic assignment of a port.
connection = p.external_devices.SpynnakerLiveSpikesConnection(
    local_port=None, send_labels=[input_label], receive_labels=[output_label])

# Make the OpenAI Gym simulation
env = Atari("Breakout-v0")
env = wrappers.Monitor(env, "/tmp/Breakout-v0", force=True)

# Display Breakout-v0 = plt.figure(figsize=(5, 5))
img = None#plt.imshow(env.env.render(mode='rgb_array'))

# Register the OpenAI Gym function to be called when the simulation starts
# Note: we have to use a label here so we chose input_label
connection.add_start_resume_callback(
    input_label, partial(run_openai_gym_sim, output_label=output_label, spike_queue=spike_queue, new_spike_queue=new_spike_queue, plt=plt, img=img, env=env))

# Register the receive spikes function
connection.add_receive_callback(output_label, receive_spikes)

# Run the simulation
run_spinnaker_sim(input_label, output_label, connection.local_port)


In [None]:
print(time.time()-start)

In [None]:
1231/1000

# 1 sec per frame is a good time allowing me to run a few thousand cycles