In [1]:
import macropy.activate
import importlib
import sys
sys.path.append('../')
import torch
import torch.nn as nn
import torch.optim as optim
from Code.iff_macro import set_config
from Code.lstm import lstmPolicyPredictor
from Code.envs.MountainCar import MultiMountainCar, LookupPolicy, PassiveEnv
from Code.SNN import RSNN, FeedForwardSNN, TestNN

In [2]:
config = {
    'ALPHA': 0,
    'BETA': 1,
    'RESET_ZERO': False,
    'THRESH_ADD': 1,
    'THRESH_DECAY': 1,
    'DECODING': 'potential',
    'SPIKE_FN': 'bellec',
    'SIM_TIME': 10
}
set_config(config)
import Code.macroNeurons as Neurons
importlib.reload(Neurons)

<module 'Code.macroNeurons' from '../Code/macroNeurons.py'>

In [3]:
env = MultiMountainCar()

In [4]:
BATCH_SIZE = 64

In [5]:
#model = lstmPolicyPredictor(1,8,16)
#model = RSNN(config, 1, 32, 16, 1, Neurons.LIFNeuron, Neurons.AdaptiveNeuron, Neurons.OutputNeuron)
model = FeedForwardSNN(config, [1, 128, 128, 1], Neurons.LIFNeuron, Neurons.OutputNeuron)
#model = TestNN(config, [1, 128, 10, 10, 1], Neurons.LIFNeuron, Neurons.OutputNeuron)


teacher = LookupPolicy()

In [6]:
import gym
testenv = gym.make('MountainCar-v0')

def validate(num_runs):
    sum = 0
    for i in range(num_runs):
        obs = testenv.reset()
        state = None
        for t in range(300):
            output, state = model(torch.tensor([[[obs[0]]]], dtype=torch.float), state)
            action = 2 if output > 0.5 else 0
            obs, _, done, _ = testenv.step(action)
            if done:
                #print(t+1)
                sum += t + 1
                break
    print('Validation: ', sum/num_runs)




In [16]:
bce = nn.BCELoss(reduction='none') #reduction='sum'
optimizer = optim.Adam(model.parameters(), lr=0.00002)#0.00001

In [8]:
#torch.autograd.set_detect_anomaly(True)

In [17]:
for i in range(5000):
    model.zero_grad()
    observation = env.reset(BATCH_SIZE)
    state = None
    loss = 0
    for t in range(200):
        output, state = model(observation[:,:1].unsqueeze(0), state)
        target = teacher(observation)/2
        #print(observation[:,:1].unsqueeze(0).shape, output.shape, target.shape)
        action = (output.squeeze() > 0.5) * 2.0
        observation, _, done, _ = env.step(action)
        loss = loss + (bce(torch.sigmoid(output.squeeze()), target) * (~done).float()).mean()
        #print(t, loss)
        if done.all():
            break
    if i%10 == 0:
        print(loss.item(), t+1, i) #, ((outputs>0.5) != targets).sum()
    loss.backward()
    optimizer.step()
    if i%100 == 0:
        validate(10)
    

114.16061401367188 200 0
Validation:  200.0
115.09379577636719 200 10


KeyboardInterrupt: 

In [10]:
validate(10)

Validation:  200.0


In [11]:
teacher(observation)/2



tensor([0., 1., 0., 1., 0., 0., 0., 1., 1., 1., 1., 0., 1., 0., 0., 1., 1., 0.,
        1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 0., 1., 0., 0., 0., 1.,
        0., 1., 1., 0., 1., 0., 1., 1., 1., 0., 0., 1., 0., 0., 1., 1., 0., 0.,
        1., 1., 1., 1., 1., 1., 0., 1., 0., 0.])

In [12]:
observation

tensor([[-4.1097e-01, -5.2399e-03],
        [-5.0933e-01,  5.7433e-03],
        [-4.8188e-01, -2.9103e-03],
        [-5.1684e-01, -1.7997e-03],
        [-3.8655e-01, -2.2448e-04],
        [-5.5993e-01, -1.7995e-02],
        [-5.4178e-01, -1.1824e-02],
        [-4.8765e-01,  4.3241e-03],
        [-5.5765e-01,  3.6127e-03],
        [-5.0088e-01,  1.1945e-03],
        [-4.1700e-01,  1.3139e-02],
        [-4.7802e-01, -2.2006e-03],
        [-5.4765e-01,  9.2372e-04],
        [-4.3905e-01, -7.1678e-03],
        [-4.3850e-01,  2.7524e-03],
        [-4.4123e-01,  1.3511e-02],
        [-5.1462e-01, -4.2303e-03],
        [-4.5692e-01, -6.3992e-03],
        [-5.4645e-01, -9.1707e-03],
        [-5.3869e-01, -2.6012e-03],
        [-4.1224e-01,  7.0268e-03],
        [-4.4384e-01,  9.2939e-03],
        [-5.0287e-01,  2.8545e-05],
        [-5.1650e-01, -2.3591e-03],
        [-3.9744e-01, -1.1960e-02],
        [-5.5335e-01,  8.2706e-03],
        [-5.5257e-01, -5.3668e-03],
        [-5.6656e-01,  3.021

In [13]:
#torch.save(model, '../models/rsnn_mountaincar2')




In [14]:
model.input_linear.bias


Parameter containing:
tensor([-0.2447,  0.8700,  0.0450, -0.3341,  0.4510, -0.2559,  0.3413,  0.3273,
         0.9719, -0.6632, -0.0270,  0.8269, -0.6678, -0.6522,  0.0889,  0.5426,
         0.0637, -0.1838, -0.1045, -0.9036, -0.9709,  0.0537,  0.3709, -0.1747,
         0.2357,  0.2325, -0.3533,  0.6667,  0.7104,  0.3493, -0.6436,  0.0571,
        -0.1335, -0.2258,  0.4684,  0.3427, -0.2529,  0.6614,  0.0761, -0.7235,
         0.6756,  0.4705,  0.1954, -0.3072, -0.8658,  0.2617,  0.0667,  0.5713,
        -0.3913,  0.5384, -0.7698, -0.2424, -0.6205,  0.6543,  0.4206, -0.3258,
        -0.3866,  0.5384, -0.5044, -0.8876, -0.6487, -0.9382,  0.8218,  0.8558,
         0.3634, -0.2184, -0.3525, -0.9538,  0.0824, -0.5214,  0.3286,  0.3333,
         0.9454, -0.3931,  0.4625,  0.3483, -0.3716,  0.5495,  0.5847,  0.0169,
         0.0502,  0.9809, -0.1724, -0.2716, -0.0553,  0.8741,  0.5977,  0.4438,
         0.5584, -0.8991,  0.7477,  0.0207, -0.4578, -0.4466,  0.0594, -0.1705,
         0.0508, -

In [15]:
iis




NameError: name 'iis' is not defined