In [1]:
import numpy as np
import math
np.random.seed(0)

XOR_inputs = [[0,0],
              [0,1],
              [1,0],
              [1,1]]

XOR_outputs = [0,
               1,
               1,
               0]

XOR_outputs = np.asarray(XOR_outputs,np.float32)
XOR_inputs = np.asarray(XOR_inputs,np.float32)

def Neural_Network(w1,w2,b1,b2,inputs,solutions):
    
    layer1 = np.maximum(np.dot(inputs,w1) + b1,0)
    layer2 = np.maximum(np.dot(layer1,w2) + b2,0)
    neural_net_out = layer2.reshape(-1)
    
    reward = -np.sum(np.square(neural_net_out-solutions))

    return reward,neural_net_out

# hyperparameters
population_size = 100 # population size
sigma = 0.1 # noise standard deviation
learning_rate = 0.001 
hidden_size = 3

# Network parameters random initialization
w1 = np.random.randn(2,hidden_size) 
w2 = np.random.randn(hidden_size,1)
b1 = np.random.randn(hidden_size)
b2 = np.random.randn(1)

MAX_ITERATIONS = 100000
display_step = 20
convergence_threshold = 0.00001

for i in range(MAX_ITERATIONS):
    
    reward,neural_net_out =  Neural_Network(w1,w2,b1,b2,XOR_inputs,XOR_outputs)
  
    if i % display_step == 0:
        
        print('iter %d:\nsolution: %s, prediction: %s,\nfitness: %f\n' % 
              (i, str(XOR_outputs), str(neural_net_out), reward))
        
    if math.fabs(reward) <= convergence_threshold:
        print('AFTER CONVERGENCE:\nsolution: %s, prediction: %s,\nfitness: %f\n' % 
              (str(XOR_outputs), str(neural_net_out), reward))
        break

   # initialize memory for a population of w's, and their rewards
    Nw1 = np.random.randn(population_size, 2, hidden_size) # samples from a normal distribution N(0,1)
    Nw2 = np.random.randn(population_size, hidden_size, 1) # samples from a normal distribution N(0,1)
    Nb1 = np.random.randn(population_size, hidden_size) # samples from a normal distribution N(0,1)
    Nb2 = np.random.randn(population_size, 1) # samples from a normal distribution N(0,1)
    Rewards = np.zeros(population_size)
    
    for j in range(population_size):
        
        mutated_w1 = w1 + sigma*Nw1[j] #adding jitter
        mutated_w2 = w2 + sigma*Nw2[j] 
        mutated_b1 = b1 + sigma*Nb1[j] 
        mutated_b2 = b2 + sigma*Nb2[j] 
        
        Rewards[j],_ = Neural_Network(mutated_w1,mutated_w2,
                                      mutated_b1,mutated_b2,
                                      XOR_inputs,XOR_outputs) 
    very_small_number = 1e-20
    #Standardize rewards
    fitness_scores = (Rewards - np.mean(Rewards))/max(np.std(Rewards),very_small_number)
    
    #Next_generation
    w1 = w1 + (learning_rate/(sigma*population_size))* np.matmul(np.transpose(Nw1,(1,2,0)),fitness_scores)
    w2 = w2 + (learning_rate/(sigma*population_size))* np.matmul(np.transpose(Nw2,(1,2,0)),fitness_scores)
    b1 = b1 + (learning_rate/(sigma*population_size))* np.dot(Nb1.T,fitness_scores)
    b2 = b2 + (learning_rate/(sigma*population_size))* np.dot(Nb2.T,fitness_scores)
    
   
    

iter 0:
solution: [ 0.  1.  1.  0.], prediction: [ 0.97923213  2.92648394  2.49364694  4.44089876],
fitness: -26.622799

iter 20:
solution: [ 0.  1.  1.  0.], prediction: [ 0.7154797   2.21819371  1.83867073  3.34138474],
fitness: -13.864128

iter 40:
solution: [ 0.  1.  1.  0.], prediction: [ 0.48023174  1.53147937  1.23485814  2.28610577],
fitness: -5.794531

iter 60:
solution: [ 0.  1.  1.  0.], prediction: [ 0.27001919  0.89451922  0.67023942  1.29473946],
fitness: -1.869129

iter 80:
solution: [ 0.  1.  1.  0.], prediction: [ 0.17477857  0.4456923   0.33624619  0.60715992],
fitness: -1.147017

iter 100:
solution: [ 0.  1.  1.  0.], prediction: [ 0.21137354  0.35329002  0.32144176  0.46335824],
fitness: -1.138055

iter 120:
solution: [ 0.  1.  1.  0.], prediction: [ 0.25812291  0.34045825  0.3596074   0.44194274],
fitness: -1.107039

iter 140:
solution: [ 0.  1.  1.  0.], prediction: [ 0.30558152  0.36206116  0.42324016  0.4797198 ],
fitness: -1.063129

iter 160:
solution: [ 0.  1.

iter 1420:
solution: [ 0.  1.  1.  0.], prediction: [ 0.38108336  0.70951358  0.68335395  0.17909083],
fitness: -0.361945

iter 1440:
solution: [ 0.  1.  1.  0.], prediction: [ 0.38684506  0.72740191  0.70928121  0.18428312],
fitness: -0.342437

iter 1460:
solution: [ 0.  1.  1.  0.], prediction: [ 0.3784468   0.72464381  0.7104295   0.15357444],
fitness: -0.326479

iter 1480:
solution: [ 0.  1.  1.  0.], prediction: [ 0.36909551  0.73323838  0.69911453  0.12293732],
fitness: -0.313039

iter 1500:
solution: [ 0.  1.  1.  0.], prediction: [ 0.35372725  0.73545537  0.68660876  0.08830077],
fitness: -0.301118

iter 1520:
solution: [ 0.  1.  1.  0.], prediction: [ 0.36144186  0.77470701  0.72144285  0.13607664],
fitness: -0.277508

iter 1540:
solution: [ 0.  1.  1.  0.], prediction: [ 0.35586401  0.75946556  0.72210674  0.0895915 ],
fitness: -0.269747

iter 1560:
solution: [ 0.  1.  1.  0.], prediction: [ 0.3577731   0.77995959  0.74583989  0.11035773],
fitness: -0.253196

iter 1580:
solut

iter 2860:
solution: [ 0.  1.  1.  0.], prediction: [ 0.04528199  1.04223856  0.93210289  0.        ],
fitness: -0.008445

iter 2880:
solution: [ 0.  1.  1.  0.], prediction: [ 0.04106089  1.03518493  0.93794381  0.        ],
fitness: -0.006775

iter 2900:
solution: [ 0.  1.  1.  0.], prediction: [ 0.03293346  1.02749391  0.93842068  0.        ],
fitness: -0.005633

iter 2920:
solution: [ 0.  1.  1.  0.], prediction: [ 0.03091936  1.02945919  0.94876721  0.        ],
fitness: -0.004449

iter 2940:
solution: [ 0.  1.  1.  0.], prediction: [ 0.03215674  1.00535619  0.93769851  0.        ],
fitness: -0.004944

iter 2960:
solution: [ 0.  1.  1.  0.], prediction: [ 0.01914469  1.00740154  0.941302    0.        ],
fitness: -0.003867

iter 2980:
solution: [ 0.  1.  1.  0.], prediction: [ 0.0305133   1.00438067  0.9550279   0.        ],
fitness: -0.002973

iter 3000:
solution: [ 0.  1.  1.  0.], prediction: [ 0.02319553  0.99654411  0.9469306   0.        ],
fitness: -0.003366

iter 3020:
solut