In [1]:
from neural_nets import PolicyNetwork
import pickle
from evaluator import test_random_trajectories, test_single_trajectory
from systems import InvertedPendulum, CartPole

### Training network

In [16]:
data_path = "./data/lqr_pend_data.pkl"
pendulum = InvertedPendulum(dt=0.02)
network = PolicyNetwork(pendulum, device='cuda')

try:
    network.load_model("./nnets/pend_base.pth")
except:
    with open(data_path, 'rb') as f:
        data = pickle.load(f)
    states = data['states']
    actions = data['actions']
    network.prepare_data(states, actions)
    network.train()
    network.plot_losses()

Model loaded from ./nnets/pend_base.pth


In [17]:
success_rate = test_random_trajectories(network, pendulum, n_trials=100, n_steps=200, verbose=True, safe_region_limit=0.8)

  Trials: 100
  Successes: 82/100 (82.0%)
  Failures: 18/100


### Magnitude pruning

In [18]:
pendulum = InvertedPendulum(dt=0.02)
sparsities = [0.1, 0.3, 0.5, 0.7]

with open(data_path, 'rb') as f:
    data = pickle.load(f)
states = data['states']
actions = data['actions']

for s in sparsities:
    print(f"\n\nSparsity = {s}")
    
    network = PolicyNetwork(pendulum, device='cuda')
    network.load_model("./nnets/pend_base.pth")
    train_loader, val_loader = network.prepare_data(states, actions)

    baseline_success = test_random_trajectories(network, pendulum, n_trials=100, n_steps=200, verbose=False, safe_region_limit=0.8)
    print(f"Baseline success rate: {baseline_success}")

    print(f"Initial sparsity: {network.get_sparsity()*100:.1f}%")
    network.magnitude_pruning(sparsity=s, verbose=True)
    network.train(train_loader, val_loader)

    pruning_success = test_random_trajectories(network, pendulum, n_trials=100, n_steps=200, verbose=True, safe_region_limit=0.8)
    print(f"Success after pruning: {pruning_success}")

    print(f"Final sparsity: {network.get_sparsity()*100:.1f}%")
    network.save_model(f"./nnets/pend_mag_prun_{s}.pth")



Sparsity = 0.1
Model loaded from ./nnets/pend_base.pth
Baseline success rate: 89.0
Initial sparsity: 0.0%
MAGNITUDE PRUNING RESULTS
0
  Pruned: 4/32 (12.5%)
2
  Pruned: 13/128 (10.2%)
4
  Pruned: 1/8 (12.5%)

Total pruned: 18/168 (10.7%)
Epoch  10 | Train Loss: 0.000009 | Val Loss: 0.000001
Epoch  20 | Train Loss: 0.000006 | Val Loss: 0.000011
Epoch  30 | Train Loss: 0.000005 | Val Loss: 0.000107
Epoch  40 | Train Loss: 0.000008 | Val Loss: 0.000006

Early stopping at epoch 46
  Trials: 100
  Successes: 84/100 (84.0%)
  Failures: 16/100
Success after pruning: 84.0
Final sparsity: 0.5%
Model saved to ./nnets/pend_mag_prun_0.1.pth


Sparsity = 0.3
Model loaded from ./nnets/pend_base.pth
Baseline success rate: 84.0
Initial sparsity: 0.0%
MAGNITUDE PRUNING RESULTS
0
  Pruned: 10/32 (31.2%)
2
  Pruned: 39/128 (30.5%)
4
  Pruned: 3/8 (37.5%)

Total pruned: 52/168 (31.0%)
Epoch  10 | Train Loss: 0.000003 | Val Loss: 0.000007
Epoch  20 | Train Loss: 0.000020 | Val Loss: 0.000001
Epoch  30 | 

In [19]:
pendulum = InvertedPendulum(dt=0.02)
sparsities = [0.1, 0.3, 0.5, 0.7]

with open(data_path, 'rb') as f:
    data = pickle.load(f)
states = data['states']
actions = data['actions']

for s in sparsities:
    print(f"\n\nSparsity = {s}")
    
    network = PolicyNetwork(pendulum, device='cuda')
    network.load_model("./nnets/pend_base.pth")
    train_loader, val_loader = network.prepare_data(states, actions)

    baseline_success = test_random_trajectories(network, pendulum, n_trials=100, n_steps=200, verbose=False, safe_region_limit=0.8)
    print(f"Baseline success rate: {baseline_success}")

    print(f"Initial sparsity: {network.get_sparsity()*100:.1f}%")
    network.activation_pruning(train_loader, sparsity=s, verbose=True)
    network.train(train_loader, val_loader)

    pruning_success = test_random_trajectories(network, pendulum, n_trials=100, n_steps=200, verbose=True, safe_region_limit=0.8)
    print(f"Success after pruning: {pruning_success}")

    print(f"Final sparsity: {network.get_sparsity()*100:.1f}%")
    network.save_model(f"./nnets/pend_act_prun_{s}.pth")



Sparsity = 0.1
Model loaded from ./nnets/pend_base.pth
Baseline success rate: 84.0
Initial sparsity: 0.0%
ACTIVATION PRUNING RESULTS
Epoch  10 | Train Loss: 0.000005 | Val Loss: 0.000004
Epoch  20 | Train Loss: 0.000004 | Val Loss: 0.000002
Epoch  30 | Train Loss: 0.000002 | Val Loss: 0.000001
Epoch  40 | Train Loss: 0.000001 | Val Loss: 0.000001
Epoch  50 | Train Loss: 0.000004 | Val Loss: 0.000003

Early stopping at epoch 50
  Trials: 100
  Successes: 81/100 (81.0%)
  Failures: 19/100
Success after pruning: 81.0
Final sparsity: 0.0%
Model saved to ./nnets/pend_act_prun_0.1.pth


Sparsity = 0.3
Model loaded from ./nnets/pend_base.pth
Baseline success rate: 89.0
Initial sparsity: 0.0%
ACTIVATION PRUNING RESULTS
Epoch  10 | Train Loss: 0.000011 | Val Loss: 0.000002
Epoch  20 | Train Loss: 0.000004 | Val Loss: 0.000019
Epoch  30 | Train Loss: 0.000006 | Val Loss: 0.000010
Epoch  40 | Train Loss: 0.000005 | Val Loss: 0.000003

Early stopping at epoch 46
  Trials: 100
  Successes: 87/100