In [1]:
import torch
from torch import nn
from torch.nn import functional as F
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import cvxpy as cp
import collections
import random
%matplotlib notebook 
%matplotlib inline   

from systems_and_functions.control_affine_system import ControlAffineSystem
from systems_and_functions.cart_pole_system import CartPole
from systems_and_functions.inverted_pendulum_system import InvertedPendulum
from systems_and_functions.networks import PolicyNet, QValueNet
from systems_and_functions.ddpg_process import DDPGProcess

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

np.random.seed(13)

torch.manual_seed(30)
if torch.cuda.is_available():
    torch.cuda.manual_seed(30)
    torch.cuda.manual_seed_all(30)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [2]:
params={'m': 1,'L': 1.0, 'b': 0.5}
controller_params = {'K':np.array([[15,  4]])}
# controller_params = {'K':np.array([[14,  3]])}
p1 = InvertedPendulum(system_params = params,
                      controller_params = controller_params,
                      dt = 0.01, 
                      controller_period = 0.01)

p1.linearize_and_compute_LQR()

ddpg1 = DDPGProcess(system = p1,
                    n_hiddens_policy = 32,
                    n_hiddens_critic = 64,
                    sigma = 0.5,
                    tau = 0.3,
                    gamma = 0.9,
                    replay_buffer_capacity = 2000,
                    min_training_batch = 1000,
                    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'),
                    save_path = 'experiment_results/DDPG/')


ddpg1.DDPG_main_iteration(iteration = 40,
                          plot_x_initial = torch.tensor([[2],[-2]]),
                          plot_step_num = 1000)

Controller is involved.
tensor([0., 0.], device='cuda:0')
linearized_ct_system:
 A[[ 0.   1. ]
 [ 9.8 -0.5]],
 B[[0.]
 [1.]]
computed LQR controller is [[19.65088867  5.86802774]]
-------------------------------Main Iteration------------------------------
---------------------Initializing Policy------------------------


  K = torch.tensor(-self.system.K)


-----------------Convergence Speed and Judgment-----------------
--------------It takes 90 steps to norm 2 ball;--------------
---------------It takes 114 steps to unit ball;---------------
----------------It takes 175 steps to converge.--------------


  fig.show()


---------------------------------Iteration 1-------------------------------
---------------------Sampling Training Data---------------------
---------------------Updating Actor & Critic--------------------
-----------------------Updating Critic----------------------
------------------------Updating Actor----------------------
-----------------Convergence Speed and Judgment-----------------
--------------It takes 90 steps to norm 2 ball;--------------
---------------It takes 113 steps to unit ball;---------------
----------------It takes 176 steps to converge.--------------
----------------------------------Save Data--------------------------------


  fig.show()


---------------------------------Iteration 2-------------------------------
---------------------Sampling Training Data---------------------
---------------------Updating Actor & Critic--------------------
-----------------------Updating Critic----------------------
------------------------Updating Actor----------------------
-----------------Convergence Speed and Judgment-----------------
--------------It takes 90 steps to norm 2 ball;--------------
---------------It takes 113 steps to unit ball;---------------
----------------It takes 173 steps to converge.--------------
----------------------------------Save Data--------------------------------
---------------------------------Iteration 3-------------------------------
---------------------Sampling Training Data---------------------
---------------------Updating Actor & Critic--------------------
-----------------------Updating Critic----------------------
------------------------Updating Actor----------------------
----------------

KeyboardInterrupt: 

In [None]:
ddpg1.initialize_policy_net(x_train_lim = 10,
                                    x_test_lim = 12,
                                    sample_num = 2000,
                                    iteration = 1*10**4,
                                    lr = 1e-3)

sim_data_ = ddpg1.system.simulate_rk4(x_initial = torch.tensor([[2],[-2]]), 
                                            step_number = 200,
                                            use_controller = 1,
                                            the_controller = ddpg1.actor.Controller)
step2norm2ball,step2unitball,step2converge = ddpg1.system.convergence_judgment(sim_data_)

ddpg1.system.plot_phase_portrait(data_sim = sim_data_,
                        arrow_on = 0,
                        title = 'after PI in iteration {}'.format(0),
                        save_fig = 0,
                        save_path =  ddpg1.save_path + 'figs/')

print('reward: ',ddpg1.record_rollout_reward(sim_data_))