In [1]:
from neural_nets import PolicyNetwork
import pickle
from evaluator import test_random_trajectories, test_single_trajectory
from systems import InvertedPendulum, CartPole

### Training network

In [7]:
data_path = "./data/lqr_pend_data.pkl"
network_path = "./nnets/pend_base_tanh.pth"
act = "tanh"
pendulum = InvertedPendulum(dt=0.02)
network = PolicyNetwork(pendulum, device='cuda', activation=act)

try:
    network.load_model(network_path)
except:
    with open(data_path, 'rb') as f:
        data = pickle.load(f)
    states = data['states']
    actions = data['actions']
    network.prepare_data(states, actions)
    network.train()
    network.plot_losses()
    network.save_model(network_path)

Model loaded from ./nnets/pend_base_tanh.pth


In [4]:
success_rate = test_random_trajectories(network, pendulum, n_trials=100, n_steps=200, verbose=True, safe_region_limit=0.8)

  Trials: 100
  Successes: 82/100 (82.0%)
  Failures: 18/100


### Magnitude pruning

In [8]:
pendulum = InvertedPendulum(dt=0.02)
sparsities = [0.1, 0.3, 0.5, 0.7]

with open(data_path, 'rb') as f:
    data = pickle.load(f)
states = data['states']
actions = data['actions']

for s in sparsities:
    print(f"\n\nSparsity = {s}")
    
    network = PolicyNetwork(pendulum, device='cuda', activation=act)
    network.load_model(network_path)
    train_loader, val_loader = network.prepare_data(states, actions)

    baseline_success = test_random_trajectories(network, pendulum, n_trials=100, n_steps=200, verbose=False, safe_region_limit=0.8)
    print(f"Baseline success rate: {baseline_success}")

    print(f"Initial sparsity: {network.get_sparsity()*100:.1f}%")
    network.magnitude_pruning(sparsity=s, verbose=True)
    network.train(train_loader, val_loader)

    pruning_success = test_random_trajectories(network, pendulum, n_trials=100, n_steps=200, verbose=True, safe_region_limit=0.8)
    print(f"Success after pruning: {pruning_success}")

    print(f"Final sparsity: {network.get_sparsity()*100:.1f}%")
    network.save_model(f"./nnets/pend_{act}_mag_prun_{s}.pth")



Sparsity = 0.1
Model loaded from ./nnets/pend_base_tanh.pth
Baseline success rate: 91.0
Initial sparsity: 0.0%
MAGNITUDE PRUNING RESULTS
0
  Pruned: 4/32 (12.5%)
2
  Pruned: 13/128 (10.2%)
4
  Pruned: 1/8 (12.5%)

Total pruned: 18/168 (10.7%)
Epoch  10 | Train Loss: 0.000011 | Val Loss: 0.000008
Epoch  20 | Train Loss: 0.000005 | Val Loss: 0.000039
Epoch  30 | Train Loss: 0.000010 | Val Loss: 0.000004
Epoch  40 | Train Loss: 0.000003 | Val Loss: 0.000006
Epoch  50 | Train Loss: 0.000005 | Val Loss: 0.000011

Early stopping at epoch 59
  Trials: 100
  Successes: 90/100 (90.0%)
  Failures: 10/100
Success after pruning: 90.0
Final sparsity: 0.0%
Model saved to ./nnets/pend_tanh_mag_prun_0.1.pth


Sparsity = 0.3
Model loaded from ./nnets/pend_base_tanh.pth
Baseline success rate: 87.0
Initial sparsity: 0.0%
MAGNITUDE PRUNING RESULTS
0
  Pruned: 10/32 (31.2%)
2
  Pruned: 39/128 (30.5%)
4
  Pruned: 3/8 (37.5%)

Total pruned: 52/168 (31.0%)
Epoch  10 | Train Loss: 0.000008 | Val Loss: 0.0000

In [9]:
pendulum = InvertedPendulum(dt=0.02)
sparsities = [0.1, 0.3, 0.5, 0.7]

with open(data_path, 'rb') as f:
    data = pickle.load(f)
states = data['states']
actions = data['actions']

for s in sparsities:
    print(f"\n\nSparsity = {s}")
    
    network = PolicyNetwork(pendulum, device='cuda', activation=act)
    network.load_model(network_path)
    train_loader, val_loader = network.prepare_data(states, actions)

    baseline_success = test_random_trajectories(network, pendulum, n_trials=100, n_steps=200, verbose=False, safe_region_limit=0.8)
    print(f"Baseline success rate: {baseline_success}")

    print(f"Initial sparsity: {network.get_sparsity()*100:.1f}%")
    network.activation_pruning(train_loader, sparsity=s, verbose=True)
    network.train(train_loader, val_loader)

    pruning_success = test_random_trajectories(network, pendulum, n_trials=100, n_steps=200, verbose=True, safe_region_limit=0.8)
    print(f"Success after pruning: {pruning_success}")

    print(f"Final sparsity: {network.get_sparsity()*100:.1f}%")
    network.save_model(f"./nnets/pend_{act}_act_prun_{s}.pth")



Sparsity = 0.1
Model loaded from ./nnets/pend_base_tanh.pth
Baseline success rate: 84.0
Initial sparsity: 0.0%
ACTIVATION PRUNING RESULTS
Epoch  10 | Train Loss: 0.000006 | Val Loss: 0.000018
Epoch  20 | Train Loss: 0.000011 | Val Loss: 0.000010
Epoch  30 | Train Loss: 0.000007 | Val Loss: 0.000015

Early stopping at epoch 36
  Trials: 100
  Successes: 87/100 (87.0%)
  Failures: 13/100
Success after pruning: 87.0
Final sparsity: 0.0%
Model saved to ./nnets/pend_tanh_act_prun_0.1.pth


Sparsity = 0.3
Model loaded from ./nnets/pend_base_tanh.pth
Baseline success rate: 91.0
Initial sparsity: 0.0%
ACTIVATION PRUNING RESULTS
Epoch  10 | Train Loss: 0.000006 | Val Loss: 0.000004
Epoch  20 | Train Loss: 0.000009 | Val Loss: 0.000038
Epoch  30 | Train Loss: 0.000004 | Val Loss: 0.000002
Epoch  40 | Train Loss: 0.000011 | Val Loss: 0.000005
Epoch  50 | Train Loss: 0.000009 | Val Loss: 0.000004

Early stopping at epoch 50
  Trials: 100
  Successes: 86/100 (86.0%)
  Failures: 14/100
Success aft