In [1]:
import torch
from torch import nn
from torch.nn import functional as F
import numpy as np
import matplotlib.pyplot as plt
import cvxpy as cp
from systems_and_functions.control_affine_system import ControlAffineSystem
from systems_and_functions.cart_pole_system import CartPole
from systems_and_functions.inverted_pendulum_system import InvertedPendulum
from systems_and_functions.networks import PolicyNet, LyapunovNet, DFunctionNet
from systems_and_functions.dlearning_process import DlearningProcess
%matplotlib notebook 
%matplotlib inline  

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# np.random.seed(159)

# torch.manual_seed(344)
# if torch.cuda.is_available():
#     torch.cuda.manual_seed(277)
#     torch.cuda.manual_seed_all(277)

# np.random.seed(19)

# torch.manual_seed(24)
# if torch.cuda.is_available():
#     torch.cuda.manual_seed(24)
#     torch.cuda.manual_seed_all(24)

np.random.seed(15)

torch.manual_seed(24)
if torch.cuda.is_available():
    torch.cuda.manual_seed(24)
    torch.cuda.manual_seed_all(24)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
params={'m': 1,'L': 1.0, 'b': 0.5}
# controller_params = {'K':np.array([[14,  3]])}
controller_params = {'K':np.array([[15,  4]])}

p1 = InvertedPendulum(system_params = params,
                      controller_params = controller_params,
                      dt = 0.01, 
                      controller_period = 0.01)

# p1.linearize_and_compute_LQR()
# Preview of ROLLOUT

# x_initial = torch.tensor([[2],[-2]]).to(device)
x_initial = torch.tensor([[2],[-1]]).to(device)
step_num = 500
sim_data_ = p1.simulate_rk4(x_initial, step_num, 1,)
p1.convergence_judgment(sim_data_)
p1.plot_phase_portrait(data_sim = sim_data_,
                       arrow_on = False,
                       title = 'System Phase Portrait with initial controller')


d1 = DlearningProcess(system=p1,
                      n_hiddens_policy=32,
                      n_hiddens_lyapunov=64,
                      n_hiddens_dfunction=64,
                      save_path = 'experiment_results/Dlearning/')


d1.dlearning_main_iteration(iteration = 40,
                            plot_x_initial = x_initial,
                            plot_step_num = 500)