### IMPORT REQUIRED LIBRARIES





In [1]:
from __future__ import division
import numpy as np
from scipy.stats import norm
import random
import tqdm
import pandas as pd
from collections import OrderedDict
import matplotlib.pyplot as plt
import heapq
import pickle


### UTILS

In [2]:
def normalize_weight_matrix(weight_matrix):
    
    # Applied only while initializing the weight. Later Synaptic scalling applied on weight matrices
    
    """ Normalize the weights in the matrix such that incoming connections to a neuron sum up to 1
    
    Args:
        weight_matrix(array) -- Incoming Weights from W_ee or W_ei or W_ie
    
    Returns:
        weight_matrix(array) -- Normalized weight matrix"""

    normalized_weight_matrix = weight_matrix / np.sum(weight_matrix,axis = 0)

    return normalized_weight_matrix

### Implement lambda incoming connections for Excitatory neurons and outgoing connections per Inhibitory neuron

In [3]:


def generate_lambd_connections(synaptic_connection,ne,ni, lambd_w,lambd_std):
    
    
    """
    Args:
    synaptic_connection -  Type of sysnpatic connection (EE,EI or IE)
    ne - Number of excitatory units
    ni - Number of inhibitory units
    lambd_w - Average number of incoming connections
    lambd_std - Standard deviation of average number of connections per neuron
    
    Returns:
    
    connection_weights - Weight matrix
    
    """
    
    
    if synaptic_connection == 'EE':
        
        
        """Choose random lamda connections per neuron"""

        # Draw normally distribued ne integers with mean lambd_w

        lambdas_incoming = norm.ppf(np.random.random(ne), loc=lambd_w, scale=lambd_std).astype(int)
        
        # lambdas_outgoing = norm.ppf(np.random.random(ne), loc=lambd_w, scale=lambd_std).astype(int)
    
        # List of neurons 

        list_neurons= list(range(ne))

        # Connection weights

        connection_weights = np.zeros((ne,ne))

        # For each lambd value in the above list,
        # generate weights for incoming and outgoing connections
            
        #-------------Gaussian Distribution of weights --------------
            
        # weight_matrix = np.random.randn(Sorn.ne, Sorn.ni) + 2 # Small random values from gaussian distribution
                                                                # Centered around 2 to make all values positive 
            
        # ------------Uniform Distribution --------------------------
        global_incoming_weights = np.random.uniform(0.0,0.1,sum(lambdas_incoming))
        
        # Index Counter
        global_incoming_weights_idx = 0
        
        # Choose the neurons in order [0 to 199]
        
        for neuron in list_neurons:

            ### Choose ramdom unique (lambdas[neuron]) neurons from  list_neurons
            possible_connections = list_neurons.copy()
            
            possible_connections.remove(neuron)  # Remove the selected neuron from possible connections i!=j
            
            # Choose random presynaptic neurons
            possible_incoming_connections = random.sample(possible_connections,lambdas_incoming[neuron])  

           
            incoming_weights_neuron = global_incoming_weights[global_incoming_weights_idx:global_incoming_weights_idx+lambdas_incoming[neuron]]
            
            # ---------- Update the connection weight matrix ------------

            # Update incoming connection weights for selected 'neuron'

            for incoming_idx,incoming_weight in enumerate(incoming_weights_neuron):  
                connection_weights[possible_incoming_connections[incoming_idx]][neuron] = incoming_weight
            
            global_incoming_weights_idx += lambdas_incoming[neuron]
        
        return connection_weights
    
    if synaptic_connection == 'EI':
        
        """Choose random lamda connections per neuron"""

        # Draw normally distribued ni integers with mean lambd_w
        lambdas = norm.ppf(np.random.random(ni), loc=lambd_w, scale=lambd_std).astype(int)
        
        # List of neurons 

        list_neurons= list(range(ni))  # Each i can connect with random ne neurons 

        # Initializing connection weights variable

        connection_weights = np.zeros((ni,ne))

        # ------------Uniform Distribution -----------------------------
        global_outgoing_weights = np.random.uniform(0.0,0.1,sum(lambdas))
        
        # Index Counter
        global_outgoing_weights_idx = 0
        
        # Choose the neurons in order [0 to 40]

        for neuron in list_neurons:

            ### Choose ramdom unique (lambdas[neuron]) neurons from  list_neurons
            possible_connections = list(range(ne))
            
            possible_outgoing_connections = random.sample(possible_connections,lambdas[neuron])  # possible_outgoing connections to the neuron

            # Update weights
            outgoing_weights = global_outgoing_weights[global_outgoing_weights_idx:global_outgoing_weights_idx+lambdas[neuron]]

            # ---------- Update the connection weight matrix ------------

            # Update outgoing connections for the neuron

            for outgoing_idx,outgoing_weight in enumerate(outgoing_weights):  # Update the columns in the connection matrix
                connection_weights[neuron][possible_outgoing_connections[outgoing_idx]] = outgoing_weight
            
            # Update the global weight values index
            global_outgoing_weights_idx += lambdas[neuron]
            
        
        return connection_weights
        
        

### More Util functions

In [4]:
def get_incoming_connection_dict(weights):
    
    # Get the non-zero entires in columns is the incoming connections for the neurons
    
    # Indices of nonzero entries in the columns
    connection_dict=dict.fromkeys(range(1,len(weights)+1),0)
    
    for i in range(len(weights[0])):  # For each neuron
        connection_dict[i] = list(np.nonzero(weights[:,i])[0])
        
    return connection_dict
    

In [5]:
def get_outgoing_connection_dict(weights):
    # Get the non-zero entires in rows is the outgoing connections for the neurons
    
    # Indices of nonzero entries in the rows
    connection_dict=dict.fromkeys(range(1,len(weights)+1),1)
    
    for i in range(len(weights[0])):  # For each neuron
        connection_dict[i] = list(np.nonzero(weights[i,:])[0])
        
    return connection_dict

In [6]:
def prune_small_weights(weights,cutoff_weight):
    
    """ Prune the connections with negative connection strength"""
    weights[weights <= cutoff_weight] = cutoff_weight
    
    return weights
    

In [7]:
def set_max_cutoff_weight(weights, cutoff_weight):
    
    """ Set cutoff limit for the values in given array"""
    
    weights[weights > cutoff_weight] = cutoff_weight
    
    return weights

In [8]:
def get_unconnected_indexes(wee):
    
    """
    Helper function for Structural plasticity to randomly select the unconnected units
    
    Args: 
    wee -  Weight matrix
    
    Returns:
    list (indices) // indices = (row_idx,col_idx)"""
    

    i,j = np.where(wee <= 0.)
    indices = list(zip(i,j))
    
    self_conn_removed = []
    for i,idxs in enumerate(indices):
        
        if idxs[0] != idxs[1]:
            
            self_conn_removed.append(indices[i])
    
    return self_conn_removed

In [9]:
def white_gaussian_noise(mu, sigma,t):

    """Generates white gaussian noise with mean mu, standard deviation sigma and
    the noise length equals t """
    
    noise = np.random.normal(mu, sigma, t)   
    
    return np.expand_dims(noise,1)


In [10]:
### SANITY CHECK EACH WEIGHTS
#### Note this function has no influence in weight matrix, will be deprecated in next version

def zero_sum_incoming_check(weights):
    
    zero_sum_incomings = np.where(np.sum(weights,axis = 0) == 0.)
    
    if len(zero_sum_incomings[-1]) == 0:
        return weights
    else:
        for zero_sum_incoming in zero_sum_incomings[-1]:
            
            rand_indices = np.random.randint(40,size = 2)  # 5 because each excitatory neuron connects with 5 inhibitory neurons 
                                                            # given the probability of connections 0.2
            rand_values = np.random.uniform(0.0,0.1,2)
            
            for i,idx in enumerate(rand_indices):
                
                weights[:,zero_sum_incoming][idx] = rand_values[i]
            
    return weights

### SORN 

In [11]:
class Sorn(object):
    
    """SORN 1 network model Initialization"""

    def __init__(self):
        pass

    """Initialize network variables as class variables of SORN"""
    
    nu = 10                    # Number of input units
    ne = 200                   # Number of excitatory units
    ni = int(0.2*ne)           # Number of inhibitory units in the network
    eta_stdp = 0.004
    eta_inhib = 0.001
    eta_ip = 0.01
    te_max = 1.0 
    ti_max = 0.5
    ti_min = 0.0
    te_min = 0.0
    mu_ip = 0.1
    sigma_ip  = 0.0 # Standard deviation, variance == 0 
    
    
    # Initialize weight matrices

    def initialize_weight_matrix(self, network_type,synaptic_connection, self_connection, lambd_w): 

        
        """
        Args:
        
        network_type(str) - Spare or Dense
        synaptic_connection(str) - EE,EI,IE: Note that Spare connection is defined only for EE connections
        self_connection(str) - True or False: i-->i ; Network is tested only using j-->i
        lambd_w(int) - Average number of incoming and outgoing connections per neuron
        
        Returns:
        weight_matrix(array) -  Array of connection strengths 
        """
        
        if (network_type == "Sparse") and (self_connection == "False"):

            """Generate weight matrix for E-E/ E-I connections with mean lamda incoming and outgiong connections per neuron"""
            
            weight_matrix = generate_lambd_connections(synaptic_connection,Sorn.ne,Sorn.ni,lambd_w,lambd_std = 1)
        
        # Dense matrix for W_ie

        elif (network_type == 'Dense') and (self_connection == 'False'):

            # Gaussian distribution of weights
            # weight_matrix = np.random.randn(Sorn.ne, Sorn.ni) + 2 # Small random values from gaussian distribution
            # Centered around 1 
            # weight_matrix.reshape(Sorn.ne, Sorn.ni) 
            # weight_matrix *= 0.01 # Setting spectral radius 
            
            # Uniform distribution of weights
            weight_matrix = np.random.uniform(0.0,0.1,(Sorn.ne, Sorn.ni))
            weight_matrix.reshape((Sorn.ne,Sorn.ni))

        return weight_matrix

    def initialize_threshold_matrix(self, te_min,te_max, ti_min,ti_max):

        # Initialize the threshold for excitatory and inhibitory neurons
        
        """Args:
            te_min(float) -- Min threshold value for excitatory units
            ti_min(float) -- Min threshold value for inhibitory units
            te_max(float) -- Max threshold value for excitatory units
            ti_max(float) -- Max threshold value for inhibitory units
        Returns:
            te(vector) -- Threshold values for excitatory units
            ti(vector) -- Threshold values for inhibitory units"""

        te = np.random.uniform(0., te_max, (Sorn.ne, 1))
        ti = np.random.uniform(0., ti_max, (Sorn.ni, 1))

        return te, ti

    def initialize_activity_vector(self,ne, ni):
        
        # Initialize the activity vectors X and Y for excitatory and inhibitory neurons
        
        """Args:
            ne(int) -- Number of excitatory neurons
            ni(int) -- Number of inhibitory neurons
        Returns:
             x(array) -- Array of activity vectors of excitatory population
             y(array) -- Array of activity vectors of inhibitory population"""

        x = np.zeros((ne, 2))
        y = np.zeros((ni, 2))

        return x, y

In [12]:
class Plasticity(Sorn):
    """
    Instance of class Sorn. Inherits the variables and functions defined in class Sorn
    Encapsulates all plasticity mechanisms mentioned in the article """

    # Initialize the global variables for the class //Class attributes

    def __init__(self):
        
        super().__init__()
        self.nu = Sorn.nu                  # Number of input units
        self.ne = Sorn.ne                  # Number of excitatory units
        self.eta_stdp = Sorn.eta_stdp      # STDP plasticity Learning rate constant; SORN1 and SORN2
        self.eta_ip = Sorn.eta_ip          # Intrinsic plasticity learning rate constant; SORN1 and SORN2
        self.eta_inhib = Sorn.eta_inhib    # Intrinsic plasticity learning rate constant; SORN2 only
        self.h_ip = 2 * Sorn.nu / Sorn.ne  # Target firing rate
        self.mu_ip = Sorn.mu_ip            # Mean target firing rate  
        self.ni = Sorn.ni                  # Number of inhibitory units in the network
        self.time_steps = Sorn.time_steps  # Total time steps of simulation
        self.te_min = Sorn.te_min          # Excitatory minimum Threshold
        self.te_max = Sorn.te_max          # Excitatory maximum Threshold
        
    def stdp(self, wee, x, cutoff_weights):
        
        """ Apply STDP rule : Regulates synaptic strength between the pre(Xj) and post(Xi) synaptic neurons"""

        x = np.asarray(x)
        xt_1 = x[:,0]
        xt = x[:,1]
        wee_t = wee.copy()
        
        # STDP applies only on the neurons which are connected.
        
        for i in range(len(wee_t[0])): # Each neuron i, Post-synaptic neuron
            
            for j in range(len(wee_t[0:])): # Incoming connection from jth pre-synaptic neuron to ith neuron
                
                if wee_t[j][i] != 0. : # Check connectivity
                    
                    # Get the change in weight
                    delta_wee_t = self.eta_stdp * (xt[i] * xt_1[j] - xt_1[i]*xt[j])

                    # Update the weight between jth neuron to i ""Different from notation in article 

                    wee_t[j][i] = wee[j][i] + delta_wee_t
        
        """ Prune the smallest weights induced by plasticity mechanisms; Apply lower cutoff weight"""
        wee_t = prune_small_weights(wee_t,cutoff_weights[0])
        
        """Check and set all weights < upper cutoff weight """
        wee_t = set_max_cutoff_weight(wee_t,cutoff_weights[1])

        return wee_t

    def ip(self, te, x):
        
        # IP rule: Active unit increases its threshold and inactive decreases its threshold.

        xt = x[:, 1]

        te_update = te + self.eta_ip * (xt.reshape(self.ne, 1) - self.h_ip)
        
        """ Check whether all te are in range [0.0,1.0] and update acordingly"""
        
        # Update te < 0.0 ---> 0.0
        # te_update = prune_small_weights(te_update,self.te_min)
        
        # Set all te > 1.0 --> 1.0
        # te_update = set_max_cutoff_weight(te_update,self.te_max)

        return te_update

    def ss(self, wee_t):
        
        """Synaptic Scaling or Synaptic Normalization"""
        
        wee_t = wee_t / np.sum(wee_t,axis=0)

        return wee_t

    
    def istdp(self, wei, x, y, cutoff_weights):

        #  Apply iSTDP rule : Regulates synaptic strength between the pre(Yj) and post(Xi) synaptic neurons
        
        # Excitaotry network activity
        x = np.asarray(x) # Array sanity check
        xt_1 = x[:, 0]  
        xt = x[:, 1]  
    
        # Inhibitory network activity
        y = np.asarray(y)

        yt_1 = y[:, 0]    
        yt = y[:, 1]  
 

        # iSTDP applies only on the neurons which are connected.
        wei_t = wei.copy()

        for i in range(len(wei_t[0])): # Each neuron i, Post-synaptic neuron: means for each column; 
            
            for j in range(len(wei_t[0:])): # Incoming connection from j, pre-synaptic neuron to ith neuron
                
                if wei_t[j][i] != 0. : # Check connectivity
                    
                    # Get the change in weight
                    delta_wei_t = - self.eta_inhib * yt_1[j] * (1 - xt[i]*(1 + 1/self.mu_ip))

                    # Update the weight between jth neuron to i ""Different from notation in article 

                    wei_t[j][i] = wei[j][i] + delta_wei_t
        
        """ Prune the smallest weights induced by plasticity mechanisms; Apply lower cutoff weight"""
        wei_t = prune_small_weights(wei_t,cutoff_weights[0])
        
        """Check and set all weights < upper cutoff weight """
        wei_t = set_max_cutoff_weight(wei_t,cutoff_weights[1])
        
        return wei_t

    @staticmethod
    def structural_plasticity(wee):

        """ Add new connection value to the smallest weight between excitatory units randomly"""

        p_c = np.random.randint(0, 10, 1)

        if p_c == 0:  # p_c= 0.1

            """ Do structural plasticity """

            # Choose the smallest weights randomly from the weight matrix wee
            
            indexes = get_unconnected_indexes(wee) 

            # Choose any idx randomly
            idx_rand = random.choice(indexes)
            
            if idx_rand[0] == idx_rand[1]:
                
                idx_rand = random.choice(indexes)
                
            wee[idx_rand[0]][idx_rand[1]] = 0.001
            

        return wee

    ###########################################################

    @staticmethod
    def initialize_plasticity():

        wee = wee_init
        wei = wei_init
        wie = wie_init
        te = te_init
        ti = ti_init
        x = x_init
        y = y_init

        return wee, wei, wie, te, ti, x, y

    @staticmethod
    def reorganize_network():
        pass

In [13]:
class MatrixCollection(Sorn):
    def __init__(self,phase, matrices = None):
        super().__init__()
        
        self.phase = phase
        self.matrices = matrices
        if self.phase == 'Plasticity' and self.matrices == None :

            self.time_steps = Sorn.time_steps + 1  # Total training steps
            self.Wee, self.Wei, self.Wie, self.Te, self.Ti, self.X, self.Y = [0] * self.time_steps, [0] * self.time_steps, \
                                                                             [0] * self.time_steps, [0] * self.time_steps, \
                                                                             [0] * self.time_steps, [0] * self.time_steps, \
                                                                             [0] * self.time_steps
            wee, wei, wie, te, ti, x, y = Plasticity.initialize_plasticity()

            # Assign initial matrix to the master matrices
            self.Wee[0] = wee
            self.Wei[0] = wei
            self.Wie[0] = wie
            self.Te[0] = te
            self.Ti[0] = ti
            self.X[0] = x
            self.Y[0] = y
        
        elif self.phase == 'Plasticity' and self.matrices != None:
            
            self.time_steps = Sorn.time_steps + 1  # Total training steps
            self.Wee, self.Wei, self.Wie, self.Te, self.Ti, self.X, self.Y = [0] * self.time_steps, [0] * self.time_steps, \
                                                                             [0] * self.time_steps, [0] * self.time_steps, \
                                                                             [0] * self.time_steps, [0] * self.time_steps, \
                                                                             [0] * self.time_steps
            # Assign matrices from plasticity phase to the new master matrices for training phase
            self.Wee[0] = matrices['Wee']
            self.Wei[0] = matrices['Wei']
            self.Wie[0] = matrices['Wie']
            self.Te[0] = matrices['Te']
            self.Ti[0] = matrices['Ti']
            self.X[0] = matrices['X']
            self.Y[0] = matrices['Y']
            
        elif self.phase == 'Training':

            """NOTE:
            time_steps here is diferent for plasticity or trianing phase"""
            self.time_steps = Sorn.time_steps + 1  # Total training steps
            self.Wee, self.Wei, self.Wie, self.Te, self.Ti, self.X, self.Y = [0] * self.time_steps, [0] * self.time_steps, \
                                                                             [0] * self.time_steps, [0] * self.time_steps, \
                                                                             [0] * self.time_steps, [0] * self.time_steps, \
                                                                             [0] * self.time_steps
            # Assign matrices from plasticity phase to new respective matrices for training phase
            self.Wee[0] = matrices['Wee']
            self.Wei[0] = matrices['Wei']
            self.Wie[0] = matrices['Wie']
            self.Te[0] = matrices['Te']
            self.Ti[0] = matrices['Ti']
            self.X[0] = matrices['X']
            self.Y[0] = matrices['Y']
            
    # @staticmethod
    def weight_matrix(self, wee, wei, wie, i):
        # Get delta_weight from Plasticity.stdp
    
        # i - training step
        self.Wee[i + 1] = wee
        self.Wei[i + 1] = wei
        self.Wie[i + 1] = wie

        return self.Wee, self.Wei, self.Wie

    # @staticmethod
    def threshold_matrix(self, te, ti, i):
        self.Te[i + 1] = te
        self.Ti[i + 1] = ti
        return self.Te, self.Ti

    # @staticmethod
    def network_activity_t(self, excitatory_net, inhibitory_net, i):
        self.X[i + 1] = excitatory_net
        self.Y[i + 1] = inhibitory_net

        return self.X, self.Y

    # @staticmethod
    def network_activity_t_1(self, x, y, i):
        x_1, y_1 = [0] * self.time_steps, [0] * self.time_steps
        x_1[i] = x
        y_1[i] = y

        return x_1, y_1

In [23]:
class NetworkState(Plasticity):
    
    """The evolution of network states"""

    def __init__(self, v_t):
        super().__init__()
        self.v_t = v_t
    
    def incoming_drive(self,weights,activity_vector):
            
        # Broadcasting weight*acivity vectors 
        
        incoming = weights* activity_vector
        incoming = np.array(incoming.sum(axis=0))
        return incoming
        
    def excitatory_network_state(self, wee, wei, te, x, y,white_noise_e):
        
        """ Activity of Excitatory neurons in the network"""
    
        xt = x[:, 1]  
        xt = xt.reshape(self.ne, 1)
        yt = y[:, 1]
        yt = yt.reshape(self.ni, 1)
        
        
        incoming_drive_e = np.expand_dims(self.incoming_drive(weights = wee,activity_vector=xt),1)
        incoming_drive_i = np.expand_dims(self.incoming_drive(weights = wei,activity_vector=yt),1)
        tot_incoming_drive = incoming_drive_e -  incoming_drive_i + white_noise_e + np.expand_dims(np.asarray(self.v_t),1) - te
        
        """Heaviside step function"""

        heaviside_step = [0] * len(tot_incoming_drive)
        for t in range(len(tot_incoming_drive)):
            heaviside_step[t] = 0.0 if tot_incoming_drive[t] < te[t] else 1.0

        xt_next = np.asarray(heaviside_step.copy())

        return xt_next

    def inhibitory_network_state(self, wie, ti, x,white_noise_i):

        # Activity of inhibitory neurons

        wie = np.asarray(wie)
        xt = x[:, 1]
        xt = xt.reshape(Sorn.ne, 1)
    
        incoming_drive_e = np.expand_dims(self.incoming_drive(weights = wie,activity_vector=xt),1)
        
        tot_incoming_drive = incoming_drive_e + white_noise_i - ti
        
        """Implement Heaviside step function"""

        heaviside_step = [0] * len(tot_incoming_drive)

        for t in range(len(tot_incoming_drive)):
            heaviside_step[t] = 0.0 if tot_incoming_drive[t] < ti[t] else 1.0

        yt_next = np.asarray(heaviside_step.copy())  

        return yt_next

    
    def recurrent_drive(self, wee, wei, te, x, y,white_noise_e):
        
        """Network state due to recurrent drive received by the each unit at time t+1"""
        
    
        xt = x[:, 1]  
        xt = xt.reshape(self.ne, 1)
        yt = y[:, 1]
        yt = yt.reshape(self.ni, 1)
        
        incoming_drive_e = np.expand_dims(self.incoming_drive(weights = wee,activity_vector=xt),1)
        incoming_drive_i = np.expand_dims(self.incoming_drive(weights = wei,activity_vector=yt),1)
        
        tot_incoming_drive = incoming_drive_e -  incoming_drive_i + white_noise_e - te
        
        """Heaviside step function"""

        heaviside_step = [0] * len(tot_incoming_drive)
        for t in range(len(tot_incoming_drive)):
            heaviside_step[t] = 0.0 if tot_incoming_drive[t] < te[t] else 1.0

        xt_next = np.asarray(heaviside_step.copy())

        return xt_next

    

### Helper class for training SORN

In [24]:
class TrainSornPlasticity(Sorn):
    
    
    """
        Args:
        inputs - one hot vector of inputs
    
        Returns:
        matrix_collection - collection of all weight matrices in dictionaries
        """
    
    def __init__(self):
        
        super().__init__()
        
        pass
        

    
    def train_sorn(self,phase,matrices,inputs):
        
        self.phase = phase
        self.time_steps = 1
        Sorn.time_steps = 1
        self.inputs = np.asarray(inputs)
        self.phase = phase
        self.matrices = matrices
        
        # Collect the network activity at all time steps
        
        X_all = [0]*self.time_steps
        Y_all = [0]*self.time_steps
        R_all = [0]*self.time_steps
        
        frac_pos_active_conn = []
         
        """ DONOT INITIALIZE WEIGHTS"""
        
        matrix_collection = MatrixCollection(phase = self.phase, matrices = self.matrices)  
        
        for i in range(1):
            
            """ Generate white noise"""
            # white_noise_e = white_gaussian_noise(mu= 0., sigma = 0.04,t = Sorn.ne)
            # white_noise_i = white_gaussian_noise(mu= 0., sigma = 0.04,t = Sorn.ni)
            
            network_state = NetworkState(self.inputs.T)  # Feed Input as an argument to the class
            

            # Buffers to get the resulting x and y vectors at the current time step and update the master matrix

            x_buffer, y_buffer = np.zeros(( Sorn.ne, 2)), np.zeros((Sorn.ni, 2))

            te_buffer, ti_buffer = np.zeros((Sorn.ne, 1)), np.zeros((Sorn.ni, 1))

            # Get the matrices and rename them for ease of reading

            Wee, Wei, Wie = matrix_collection.Wee, matrix_collection.Wei, matrix_collection.Wie
            Te, Ti = matrix_collection.Te, matrix_collection.Ti
            X, Y = matrix_collection.X, matrix_collection.Y
            
            
            # Recurrent drive at t+1 used to predict the next external stimuli
            
            r = network_state.recurrent_drive(Wee[i], Wei[i], Te[i], X[i], Y[i],white_noise_e = 0.)

            """Get excitatory states and inhibitory states given the weights and thresholds"""

            # x(t+1), y(t+1)
            excitatory_state_xt_buffer = network_state.excitatory_network_state(Wee[i], Wei[i], Te[i], X[i], Y[i],white_noise_e = 0.)

            inhibitory_state_yt_buffer = network_state.inhibitory_network_state(Wie[i], Ti[i], X[i],white_noise_i = 0.)
            
            
            """ Update X and Y """
            x_buffer[:, 0] = X[i][:, 1]  # xt -->(becomes) xt_1
            x_buffer[:, 1] = excitatory_state_xt_buffer.T  # New_activation; x_buffer --> xt
            

            y_buffer[:, 0] = Y[i][:, 1]
            y_buffer[:, 1] = inhibitory_state_yt_buffer.T
            

            """Plasticity phase"""

            plasticity = Plasticity()

            # STDP 
            Wee_t = plasticity.stdp(Wee[i],x_buffer,cutoff_weights = (0.0,1.0))
              
            # Intrinsic plasticity
            Te_t = plasticity.ip(Te[i],x_buffer)
              
            # Structural plasticity
            Wee_t = plasticity.structural_plasticity(Wee_t)      
            
            # iSTDP 
            Wei_t = plasticity.istdp(Wei[i],x_buffer,y_buffer,cutoff_weights = (0.0,1.0))
            
            # Synaptic scaling Wee
            Wee_t = Plasticity().ss(Wee_t)
            
            # Synaptic scaling Wei
            Wei_t = Plasticity().ss(Wei_t)

            """Assign the matrices to the matrix collections"""
            matrix_collection.weight_matrix(Wee_t, Wei_t, Wie[i], i)
            matrix_collection.threshold_matrix(Te_t, Ti[i], i)
            matrix_collection.network_activity_t(x_buffer, y_buffer, i)
            
            X_all[i] = x_buffer[:,1]
            Y_all[i] = y_buffer[:,1]
            R_all[i] = r
   
        plastic_matrices = {'Wee':matrix_collection.Wee[-1], 
                            'Wei': matrix_collection.Wei[-1], 
                            'Wie':matrix_collection.Wie[-1],
                            'Te': matrix_collection.Te[-1], 'Ti': matrix_collection.Ti[-1],
                            'X': X[-1], 'Y': Y[-1]}
        
        return plastic_matrices,X_all,Y_all,R_all,frac_pos_active_conn

In [25]:
class TrainSorn(Sorn):
    
    
    """
        Args:
        inputs - one hot vector of inputs
    
        Returns:
        matrix_collection - collection of all weight matrices in dictionaries
        """
    
    def __init__(self):
        
        super().__init__()
        
        pass
        

    
    def train_sorn(self,phase,matrices,inputs):
        
        self.phase = phase
        self.time_steps = 1
        Sorn.time_steps = 1
        self.inputs = np.asarray(inputs)
        self.phase = phase
        self.matrices = matrices
        
        # Collect the network activity at all time steps
        
        X_all = [0]*self.time_steps
        Y_all = [0]*self.time_steps
        R_all = [0]*self.time_steps
        
        frac_pos_active_conn = []
         
        """ DONOT INITIALIZE WEIGHTS"""
        
        matrix_collection = MatrixCollection(phase = self.phase, matrices = self.matrices)  
        
        for i in range(1):
            
            """ Generate white noise"""
            # white_noise_e = white_gaussian_noise(mu= 0., sigma = 0.04,t = Sorn.ne)
            # white_noise_i = white_gaussian_noise(mu= 0., sigma = 0.04,t = Sorn.ni)

            network_state = NetworkState(self.inputs.T)  # Feed Input as an argument to the class
            

            # Buffers to get the resulting x and y vectors at the current time step and update the master matrix

            x_buffer, y_buffer = np.zeros(( Sorn.ne, 2)), np.zeros((Sorn.ni, 2))

            te_buffer, ti_buffer = np.zeros((Sorn.ne, 1)), np.zeros((Sorn.ni, 1))

            # Get the matrices and rename them for ease of reading

            Wee, Wei, Wie = matrix_collection.Wee, matrix_collection.Wei, matrix_collection.Wie
            Te, Ti = matrix_collection.Te, matrix_collection.Ti
            X, Y = matrix_collection.X, matrix_collection.Y
            
            
            # Recurrent drive at t+1 used to predict the next external stimuli
            
            r = network_state.recurrent_drive(Wee[i], Wei[i], Te[i], X[i], Y[i],white_noise_e = 0.)

            """Get excitatory states and inhibitory states given the weights and thresholds"""

            # x(t+1), y(t+1)
            excitatory_state_xt_buffer = network_state.excitatory_network_state(Wee[i], Wei[i], Te[i], X[i], Y[i],white_noise_e = 0.)

            inhibitory_state_yt_buffer = network_state.inhibitory_network_state(Wie[i], Ti[i], X[i],white_noise_i = 0.)
            
            
            """ Update X and Y """
            x_buffer[:, 0] = X[i][:, 1]  # xt -->(becomes) xt_1
            x_buffer[:, 1] = excitatory_state_xt_buffer.T  # New_activation; x_buffer --> xt
            

            y_buffer[:, 0] = Y[i][:, 1]
            y_buffer[:, 1] = inhibitory_state_yt_buffer.T
            

            """Plasticity phase"""

#             plasticity = Plasticity()

#             STDP 
#             Wee_t = plasticity.stdp(Wee[i],x_buffer,cutoff_weights = (0.0,1.0))
              
#             Intrinsic plasticity
#             Te_t = plasticity.ip(Te[i],x_buffer)
              
#             Structural plasticity
#             Wee_t = plasticity.structural_plasticity(Wee_t)      
            
#             iSTDP 
#             Wei_t = plasticity.istdp(Wei[i],x_buffer,y_buffer,cutoff_weights = (0.0,1.0))
            
#             Synaptic scaling Wee
#             Wee_t = Plasticity().ss(Wee_t)
            
#             Synaptic scaling Wei
#             Wei_t = Plasticity().ss(Wei_t)

            """Assign the matrices to the matrix collections"""
            matrix_collection.weight_matrix(Wee[i], Wei[i], Wie[i], i)
            matrix_collection.threshold_matrix(Te[i], Ti[i], i)
            matrix_collection.network_activity_t(x_buffer, y_buffer, i)
            
            X_all[i] = x_buffer[:,1]
            Y_all[i] = y_buffer[:,1]
            R_all[i] = r
   
        plastic_matrices = {'Wee':matrix_collection.Wee[-1], 
                            'Wei': matrix_collection.Wei[-1], 
                            'Wie':matrix_collection.Wie[-1],
                            'Te': matrix_collection.Te[-1], 'Ti': matrix_collection.Ti[-1],
                            'X': X[-1], 'Y': Y[-1]}
        
        return plastic_matrices,X_all,Y_all,R_all,frac_pos_active_conn

### PINGPONG AGENT ENVIRONEMNT ARCHITECTURE

In [26]:
## Architecture

# Take in inputs from the screen and preprocess them
# Pass them into SORN (200) with 10 input units in the reservoir
# Update the weights of the SORN input layer and the output layer using gradient descent
# weights['1'] - Matrix that holds weights of pixels passing into SORN reservoir. Dimensions: [10 x 80 x 80] -> [10 x 6400]
# weights['2'] - Matrix that holds weights of SORN reservoir passing into output. Dimensions: [1 x 200]

# Process is:

# processed_observations = image vector - [6400 x 1] array
# Compute input_layer_values = weights['1'] dot processed_observations ([10 x 6400] dot [6400 x 1]) -> [10 x 1] - this gives initial activation values.
# Next we need to transform those either via a sigmoid or an ReLU of some sort. Let's use ReLU
# ReLU(input_layer_values)
# Next pass this values into SORN input units
# Next we need to pass the reservoir activity of SORN one layer further
# output_layer_value = weights['2'] dot hidden_layer_values ([1 x 200] dot [200 x 1] -> [1 x 1])
# Now our output layer is the probability of going up or down. Let's make sure this output is between 0 and 1 by passing it through a sigmoid
# p = sigmoid(output_layer_value)

# Learning after round has finished:

# Figure out the result
# Compute the error
# Use the error to calculate the gradient
    # The below dimensions all assume we had exactly 10 frames in the round (not necessarily true!)
    # dC_dw2 = hidden_layer_values^T dot gradient_log_p ([1 x 2000] dot [2000 x 1] -> 1x1)
    # delta_1 = gradient_log_p outer_product weights['2'] = [2000 x 1] outer_product [1 x 200] ([2000 x 200])
    # dC_dw1 = delta_1^T dot input_observations ([10 x 100]x dot [100 x 64000] -> [10 x 64000])

# After some batch size of rounds has finished,
    # Use rmsprop to move weights['1'] and weights['2'] in the direction of the gradient
# Repeat!

In [31]:
import gym
import numpy as np

# Init sorn object
    
train_sorn = TrainSorn()
train_sorn_plasticity = TrainSornPlasticity()
    
def downsample(image):
    # Take only alternate pixels - basically halves the resolution of the image (which is fine for us)
    return image[::2, ::2, :]

def remove_color(image):
    """Convert all color (RGB is the third dimension in the image)"""
    return image[:, :, 0]

def remove_background(image):
    image[image == 144] = 0
    image[image == 109] = 0
    return image

def preprocess_observations(input_observation, prev_processed_observation, input_dimensions):
    """ convert the 210x160x3 uint8 frame into a 6400 float vector """
    processed_observation = input_observation[35:195] # crop
    processed_observation = downsample(processed_observation)
    processed_observation = remove_color(processed_observation)
    processed_observation = remove_background(processed_observation)
    processed_observation[processed_observation != 0] = 1 # everything else (paddles, ball) just set to 1
    # Convert from 80 x 80 matrix to 1600 x 1 matrix
    processed_observation = processed_observation.astype(np.float).ravel()

    # subtract the previous frame from the current one so we are only processing on changes in the game
    if prev_processed_observation is not None:
        input_observation = processed_observation - prev_processed_observation
    else:
        input_observation = np.zeros(input_dimensions)
    # store the previous frame so we can subtract from it next time
    prev_processed_observations = processed_observation
    return input_observation, prev_processed_observations


def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))

def relu(vector):
    vector[vector < 0] = 0
    return vector

def apply_neural_nets(observation_matrix, weights,plastic_matrices,episode):
    
    """ Based on the observation_matrix and weights, compute the new hidden layer values and the new output layer values"""
    input_layer_values = np.dot(weights['1'], observation_matrix)
    input_layer_values = relu(input_layer_values)
    
    
    if episode < 1000:
        
        # Note that reservoir states replaces variable name X_all
        plastic_matrices,reservoir_states,Y_all,R_all,frac_pos_active_conn = train_sorn_plasticity.train_sorn(phase = 'Plasticity',
                                                                                       matrices = plastic_matrices,
                                                                                       inputs = input_layer_values)
        
    else:
        
        if episode == 1000:
            with open('plasticmatrices.pkl','rb') as f:  
                plastic_matrices,X_all,Y_all,R_all,frac_pos_active_conn = pickle.load(f)
        
        # Note that reservoir states replaces variable name X_all
        plastic_matrices,reservoir_states,Y_all,R_all,frac_pos_active_conn = train_sorn.train_sorn(phase = 'Training',
                                                                                       matrices = plastic_matrices,
                                                                                       inputs = input_layer_values)
        
    output_layer_values = np.dot(reservoir_states, weights['2'])
    output_layer_values = sigmoid(output_layer_values)
    return input_layer_values, output_layer_values, plastic_matrices

def choose_action(probability):
    random_value = np.random.uniform()
    if random_value < probability:
        # signifies up in openai gym
        return 2
    else:
         # signifies down in openai gym
        return 3

def compute_gradient(gradient_log_p, input_layer_values, observation_values, weights):
    
    delta_L = gradient_log_p
    dC_dw2 = np.dot(input_layer_values.T, delta_L).ravel()
    delta_l2 = np.outer(delta_L, weights['2'])
    delta_l2 = relu(delta_l2)
    dC_dw1 = np.dot(delta_l2.T, observation_values)
    return {
        '1': dC_dw1,
        '2': dC_dw2
    }

def update_weights(episode,weights, expectation_g_squared, g_dict, decay_rate, learning_rate):
    
    epsilon = 1e-4
    for layer_name in weights.keys():
        g = g_dict[layer_name]
        expectation_g_squared[layer_name] = decay_rate * expectation_g_squared[layer_name] + (1 - decay_rate) * g**2
        weights[layer_name] += (learning_rate * g)/(np.sqrt(expectation_g_squared[layer_name] + epsilon))
        g_dict[layer_name] = np.zeros_like(weights[layer_name]) # reset batch gradient buffer
    
    if episode % 200 == 0:
        
        with open('%s.pickle' %episode, 'wb') as f:
            pickle.dump(weights, f, protocol=pickle.HIGHEST_PROTOCOL)
        
        
def discount_rewards(rewards, gamma):
    """ Actions you took 20 steps before the end result are less important to the overall result than an action you took a step ago.
    This implements that logic by discounting the reward on previous actions based on how long ago they were taken"""
    discounted_rewards = np.zeros_like(rewards)
    running_add = 0
    for t in reversed(range(0, rewards.size)):
        if rewards[t] != 0:
            running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
        running_add = running_add * gamma + rewards[t]
        discounted_rewards[t] = running_add
    return discounted_rewards

def discount_with_rewards(gradient_log_p, episode_rewards, gamma):
    """ discount the gradient with the normalized rewards """
    discounted_episode_rewards = discount_rewards(episode_rewards, gamma)
    # standardize the rewards to be unit normal (helps control the gradient estimator variance)
    discounted_episode_rewards -= np.mean(discounted_episode_rewards)
    discounted_episode_rewards /= np.std(discounted_episode_rewards)
    return gradient_log_p * discounted_episode_rewards


def main():
    env = gym.make("Pong-v0")
    # env = gym.make("CartPole-v0")
    observation = env.reset() # This gets us the image
    
    env.seed(10)
    
    # hyperparameters
    episode_number = 0
    batch_size = 5
    gamma = 0.99 # discount factor for reward
    decay_rate = 0.99
    num_hidden_layer_neurons = 200
    input_dimensions = 80 * 80
    learning_rate = 1e-3

    episode_number = 0
    reward_sum = 0
    running_reward = None
    prev_processed_observations = None

    weights = {
        '1': np.random.randn(num_hidden_layer_neurons, input_dimensions) / np.sqrt(input_dimensions),
        '2': np.random.randn(num_hidden_layer_neurons) / np.sqrt(num_hidden_layer_neurons)
    }

    # To be used with rmsprop algorithm (http://sebastianruder.com/optimizing-gradient-descent/index.html#rmsprop)
    expectation_g_squared = {}
    g_dict = {}
    for layer_name in weights.keys():
        expectation_g_squared[layer_name] = np.zeros_like(weights[layer_name])
        g_dict[layer_name] = np.zeros_like(weights[layer_name])

    episode_hidden_layer_values, episode_observations, episode_gradient_log_ps, episode_rewards = [], [], [], []

    with open('stdp2013_3020k.pkl','rb') as f:  
        plastic_matrices,X_all,Y_all,R_all,frac_pos_active_conn = pickle.load(f)
    
    
    while True:
        # env.render()
        processed_observations, prev_processed_observations = preprocess_observations(observation, prev_processed_observations, input_dimensions)
        
        
        hidden_layer_values, up_probability,plastic_matrices = apply_neural_nets(processed_observations, weights, plastic_matrices,episode_number)
    
        episode_observations.append(processed_observations)
        episode_hidden_layer_values.append(hidden_layer_values)

        action = choose_action(up_probability)

        # carry out the chosen action
        observation, reward, done, info = env.step(action)

        reward_sum += reward
        episode_rewards.append(reward)

        # see here: http://cs231n.github.io/neural-networks-2/#losses
        fake_label = 1 if action == 2 else 0
        loss_function_gradient = fake_label - up_probability
        episode_gradient_log_ps.append(loss_function_gradient)


        if done: # an episode finished
            episode_number += 1
            print(episode_number)
            # Combine the following values for the episode
            episode_hidden_layer_values = np.vstack(episode_hidden_layer_values)
            episode_observations = np.vstack(episode_observations)
            episode_gradient_log_ps = np.vstack(episode_gradient_log_ps)
            episode_rewards = np.vstack(episode_rewards)

            # Tweak the gradient of the log_ps based on the discounted rewards
            episode_gradient_log_ps_discounted = discount_with_rewards(episode_gradient_log_ps, episode_rewards, gamma)

            gradient = compute_gradient(
              episode_gradient_log_ps_discounted,
              episode_hidden_layer_values,
              episode_observations,
              weights
            )

            # Sum the gradient for use when we hit the batch size
            for layer_name in gradient:
                g_dict[layer_name] += gradient[layer_name]

            if episode_number % batch_size == 0:
                update_weights(episode_number,weights, expectation_g_squared, g_dict, decay_rate, learning_rate)

            episode_hidden_layer_values, episode_observations, episode_gradient_log_ps, episode_rewards = [], [], [], [] # reset values
            observation = env.reset() # reset env
            running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
            print ('resetting env. episode reward total was %f. running mean: %f' % (reward_sum, running_reward))
            reward_sum = 0
            prev_processed_observations = None

In [32]:
main()

1
resetting env. episode reward total was -21.000000. running mean: -21.000000
2
resetting env. episode reward total was -21.000000. running mean: -21.000000
3
resetting env. episode reward total was -20.000000. running mean: -20.990000
4
resetting env. episode reward total was -21.000000. running mean: -20.990100
5
resetting env. episode reward total was -21.000000. running mean: -20.990199
6
resetting env. episode reward total was -20.000000. running mean: -20.980297
7
resetting env. episode reward total was -21.000000. running mean: -20.980494
8
resetting env. episode reward total was -21.000000. running mean: -20.980689
9
resetting env. episode reward total was -20.000000. running mean: -20.970882
10
resetting env. episode reward total was -20.000000. running mean: -20.961173
11
resetting env. episode reward total was -20.000000. running mean: -20.951562
12
resetting env. episode reward total was -21.000000. running mean: -20.952046
13
resetting env. episode reward total was -21.00

resetting env. episode reward total was -21.000000. running mean: -20.537081
205
resetting env. episode reward total was -21.000000. running mean: -20.541710
206
resetting env. episode reward total was -20.000000. running mean: -20.536293
207
resetting env. episode reward total was -20.000000. running mean: -20.530930
208
resetting env. episode reward total was -18.000000. running mean: -20.505621
209
resetting env. episode reward total was -20.000000. running mean: -20.500565
210
resetting env. episode reward total was -20.000000. running mean: -20.495559
211
resetting env. episode reward total was -21.000000. running mean: -20.500603
212
resetting env. episode reward total was -21.000000. running mean: -20.505597
213
resetting env. episode reward total was -19.000000. running mean: -20.490541
214
resetting env. episode reward total was -20.000000. running mean: -20.485636
215
resetting env. episode reward total was -21.000000. running mean: -20.490780
216
resetting env. episode rewar

resetting env. episode reward total was -20.000000. running mean: -20.490642
407
resetting env. episode reward total was -21.000000. running mean: -20.495735
408
resetting env. episode reward total was -19.000000. running mean: -20.480778
409
resetting env. episode reward total was -21.000000. running mean: -20.485970
410
resetting env. episode reward total was -21.000000. running mean: -20.491111
411
resetting env. episode reward total was -21.000000. running mean: -20.496200
412
resetting env. episode reward total was -20.000000. running mean: -20.491238
413
resetting env. episode reward total was -21.000000. running mean: -20.496325
414
resetting env. episode reward total was -20.000000. running mean: -20.491362
415
resetting env. episode reward total was -21.000000. running mean: -20.496448
416
resetting env. episode reward total was -19.000000. running mean: -20.481484
417
resetting env. episode reward total was -21.000000. running mean: -20.486669
418
resetting env. episode rewar

resetting env. episode reward total was -20.000000. running mean: -20.287931
609
resetting env. episode reward total was -20.000000. running mean: -20.285052
610
resetting env. episode reward total was -20.000000. running mean: -20.282201
611
resetting env. episode reward total was -20.000000. running mean: -20.279379
612
resetting env. episode reward total was -21.000000. running mean: -20.286585
613
resetting env. episode reward total was -21.000000. running mean: -20.293720
614
resetting env. episode reward total was -19.000000. running mean: -20.280782
615
resetting env. episode reward total was -21.000000. running mean: -20.287974
616
resetting env. episode reward total was -18.000000. running mean: -20.265095
617
resetting env. episode reward total was -20.000000. running mean: -20.262444
618
resetting env. episode reward total was -20.000000. running mean: -20.259819
619
resetting env. episode reward total was -21.000000. running mean: -20.267221
620
resetting env. episode rewar

resetting env. episode reward total was -20.000000. running mean: -20.092592
811
resetting env. episode reward total was -20.000000. running mean: -20.091666
812
resetting env. episode reward total was -20.000000. running mean: -20.090750
813
resetting env. episode reward total was -20.000000. running mean: -20.089842
814
resetting env. episode reward total was -20.000000. running mean: -20.088944
815
resetting env. episode reward total was -20.000000. running mean: -20.088054
816
resetting env. episode reward total was -20.000000. running mean: -20.087174
817
resetting env. episode reward total was -19.000000. running mean: -20.076302
818
resetting env. episode reward total was -21.000000. running mean: -20.085539
819
resetting env. episode reward total was -21.000000. running mean: -20.094684
820
resetting env. episode reward total was -17.000000. running mean: -20.063737
821
resetting env. episode reward total was -21.000000. running mean: -20.073099
822
resetting env. episode rewar

1012
resetting env. episode reward total was -21.000000. running mean: -20.106117
1013
resetting env. episode reward total was -21.000000. running mean: -20.115056
1014
resetting env. episode reward total was -21.000000. running mean: -20.123905
1015
resetting env. episode reward total was -21.000000. running mean: -20.132666
1016
resetting env. episode reward total was -19.000000. running mean: -20.121339
1017
resetting env. episode reward total was -17.000000. running mean: -20.090126
1018
resetting env. episode reward total was -20.000000. running mean: -20.089225
1019
resetting env. episode reward total was -19.000000. running mean: -20.078332
1020
resetting env. episode reward total was -21.000000. running mean: -20.087549
1021
resetting env. episode reward total was -20.000000. running mean: -20.086674
1022
resetting env. episode reward total was -19.000000. running mean: -20.075807
1023
resetting env. episode reward total was -21.000000. running mean: -20.085049
1024
resetting e

resetting env. episode reward total was -21.000000. running mean: -20.236745
1211
resetting env. episode reward total was -21.000000. running mean: -20.244378
1212
resetting env. episode reward total was -21.000000. running mean: -20.251934
1213
resetting env. episode reward total was -18.000000. running mean: -20.229415
1214
resetting env. episode reward total was -21.000000. running mean: -20.237120
1215
resetting env. episode reward total was -20.000000. running mean: -20.234749
1216
resetting env. episode reward total was -21.000000. running mean: -20.242402
1217
resetting env. episode reward total was -21.000000. running mean: -20.249978
1218
resetting env. episode reward total was -20.000000. running mean: -20.247478
1219
resetting env. episode reward total was -21.000000. running mean: -20.255003
1220
resetting env. episode reward total was -20.000000. running mean: -20.252453
1221
resetting env. episode reward total was -20.000000. running mean: -20.249929
1222
resetting env. e

KeyboardInterrupt: 