In [2]:
import math
import numpy as np
import torch
import math
import random
from game import PendulumEnv

pygame 2.6.1 (SDL 2.28.4, Python 3.10.9)
Hello from the pygame community. https://www.pygame.org/contribute.html


# Weight Agnostic Neural Network (WAN)

This code defines a Weight Agnostic Neural Network (WAN), which is a special type of neural network that uses mostly the same weight for its connections instead of learning many different weights.

## Main Ideas:

- The network has 5 input nodes, 10 hidden nodes, and 1 output node.
- Instead of separate weights for every connection, it uses a few shared weights assigned to specific connections.
- Each node applies a specific activation function like identity, sine, step, tanh, or others to process the input.
- When given an input state (a list of 5 numbers), the network:
  1. Sets the first node to 1 (bias), next nodes to the input values.
  2. Calculates the values of other nodes by multiplying inputs by the shared weights and applying the node’s activation function.
  3. Returns the output node’s value as the network’s action or output.
- You can change the shared weight value or use predefined weights to see how the network behaves differently.

## Why is this important?

WANs show that the structure of the network and types of node functions can be more important than having many different weights. This approach helps create robust networks that don’t rely heavily on tuning individual weights.

---

**In short:**  
A WAN uses mostly shared weights and focuses on how the network is built and how nodes transform signals, rather than on learning many different weights.


In [6]:
class WAN(object):  #Weight Agnostic Neural
    def __init__(self, init_shared_weight):
        self.num_hidden = 10
        self.input_size = 5
        self.output_size = 1
        self.shape_in = [self.input_size, self.num_hidden]
        self.shape_out = [self.num_hidden, self.output_size]
        self.aVec = [1,1,1,1,1,1,7,7,5,1,5,5,4,1,7,3,9,1,3,7,9,5,4,3,9,7,1,7,1]
        self.wKey = [10,35,36,41,64,69,95,97,108,125,128,142,157,202,231,257,289,302,331,361,362,363,364,367,368,373,374,376,394,395,398,401,403,425,461,484,517,543,574,576,602,603,604,606,633,662,692,722,723,753,782,811]
        self.weights = [-0.1783,-0.0303,1.5435,1.8088,-0.857,1.024,-0.3872,0.2639,-1.138,-0.2857,0.3797,-0.199,1.3008,-1.4126,-1.3841,7.1232,-1.5903,-0.6301,0.8013,-1.1348,-0.7306,0.006,1.4754,1.1144,-1.5251,-1.277,1.0933,0.1666,-0.5483,2.6779,-1.2728,0.4593,-0.2608,0.1183,-2.1036,-0.3119,-1.0469,0.2662,0.7156,0.0328,0.3441,-0.1147,-0.0553,-0.4123,-3.2276,2.5201,1.7362,-2.9654,0.9641,-1.7355,-0.1573,2.9135]
        self.weight_bias = -1.5
        nNodes = len(self.aVec)
        self.wVec = [0] * (nNodes*nNodes)
        for i in range(nNodes*nNodes):
            self.wVec[i] = 0
        self.set_weight(init_shared_weight, 0)

    def set_weight(self, weight, weight_bias):
        nValues = len(self.wKey)
        if type(weight_bias).__name__ not in ['int','long','float']:
            weight_bias = 0
        if type(weight).__name__ == 'list':
            weights = weight
        else:
            weights = [weight] * nValues
        for i in range(nValues):
            k = self.wKey[i]
            self.wVec[k] = weights[i] + weight_bias

    def tune_weights(self):
        self.set_weight(self.weights, self.weight_bias)

    def get_action(self, old_state):
        nNodes = len(self.aVec)
        wMat = np.array(self.wVec).reshape((nNodes, nNodes))
        nodeAct = [0] * nNodes
        nodeAct[0] = 1
        for i in range(len(old_state)):
            nodeAct[i+1] = old_state[i]
        for iNode in range(self.input_size+1, nNodes):
            rawAct = np.dot(nodeAct, wMat[:, iNode:iNode+1])  #TPJ
            rawAct = self.applyActSimple(self.aVec[iNode], rawAct.tolist()[0])
            nodeAct[iNode] = rawAct
        return nodeAct[-self.output_size:][0]

    def applyActSimple(self, actId, x):
        if actId == 1:
            return x
        elif actId == 2:
            return 0.0 if x<=0.0 else 1.0  #unsigned step
        elif actId == 3:
            return math.sin(math.pi*x)
        elif actId == 4:
            return math.exp(-(x*x)/2.0)  #gaussian with mean zero and unit variance 1
        elif actId == 5:
            return math.tanh(x)
        elif actId == 6:
            return (math.tanh(x/2.0) + 1.0)/2.0  #sigmoid
        elif actId == 7:
            return -x
        elif actId == 8:
            return math.abs(x)
        elif actId == 9:
            return max(x, 0)  #relu
        elif actId == 10:
            return math.cos(math.pi*x)
        else:
            print('unsupported actionvation type: ',actId)
            return None


In [7]:
def wan():
    environment = PendulumEnv()
    drl = WAN(-1.5)
    for epoch in range(20):
        if epoch == 0:
            print('init_weights:')
        elif epoch == 10:
            print()
            print('tune_weights:')
            drl.tune_weights()

        state_old = environment.reset()
        rewards = 1
        for step in range(2000):
            environment.render()
            action_now = drl.get_action(state_old)
            state_new, reward_now, done, _ = environment.step(action_now)
            if done:
                reward_now = -1
                break
            rewards += reward_now
            state_old = state_new
        print('epoch=%04d'%(epoch),'  ','rewards=%d'%(rewards),'  ','step=%d'%(step))

In [11]:
env = PendulumEnv()
print("WANN")
wan()
env.close()

WANN
init_weights:
epoch=0000    rewards=-1359    step=1999
epoch=0001    rewards=-1428    step=1999
epoch=0002    rewards=-1157    step=1999
epoch=0003    rewards=-1333    step=1999
epoch=0004    rewards=-1227    step=1999
epoch=0005    rewards=-1316    step=1999
epoch=0006    rewards=-1397    step=1999
epoch=0007    rewards=-1102    step=1999
epoch=0008    rewards=-1324    step=1999
epoch=0009    rewards=-1349    step=1999

tune_weights:
epoch=0010    rewards=-1137    step=1999
epoch=0011    rewards=-1321    step=1999
epoch=0012    rewards=-1247    step=1999
epoch=0013    rewards=-1133    step=1999
epoch=0014    rewards=-1206    step=1999
epoch=0015    rewards=-1158    step=1999
epoch=0016    rewards=-1189    step=1999
epoch=0017    rewards=-1216    step=1999
epoch=0018    rewards=-1188    step=1999
epoch=0019    rewards=-59    step=20


## Summary

- **Setup:**  
  The WAN uses a fixed network architecture with predefined connection patterns and activation types. The weights are initially set and then optionally tuned during training.

- **Training Process:**  
  - Runs for 20 epochs, with weight tuning starting at epoch 10.  
  - The controller generates actions based on the current state without explicit learning from rewards, relying on the fixed/tuned weights.
    

- **Results:**  
  - The initial weights allow the pendulum to survive close to the maximum number of steps (around 2000), though rewards remain negative.  
  - After tuning weights, some improvement is seen in terms of fewer early failures (higher rewards and fewer early terminations in some epochs).  
  - Performance is more consistent than DRL but still far from perfect control.