In [3]:
import torch
from torch import nn
from torch.nn import functional as F
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import cvxpy as cp
import collections
import random
%matplotlib notebook 
%matplotlib inline   

from systems_and_functions.control_affine_system import ControlAffineSystem
from systems_and_functions.cart_pole_system import CartPole
from systems_and_functions.inverted_pendulum_system import InvertedPendulum
from systems_and_functions.networks import PolicyNet, QValueNet
from systems_and_functions.ddpg_process import DDPGProcess

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
params={'m': 1,'L': 1.0, 'b': 0.5}
controller_params = {'K':np.array([[15,  4]])}
# controller_params = {'K':np.array([[14,  3]])}
p1 = InvertedPendulum(system_params = params,
                      controller_params = controller_params,
                      dt = 0.01, 
                      controller_period = 0.01)

p1.linearize_and_compute_LQR()

ddpg1 = DDPGProcess(system = p1,
                    actor_bound = 50,
                    n_hiddens_policy = 32,
                    n_hiddens_critic = 64,
                    sigma = 0.5,
                    tau = 0.3,
                    gamma = 0.9,
                    replay_buffer_capacity = 1000,
                    min_training_batch = 1000,
                    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'),
                    save_path = 'experiment_results/DDPG/')


ddpg1.DDPG_main_iteration(iteration = 40,
                          plot_x_initial = torch.tensor([[2],[-2]]),
                          plot_step_num = 1000)

In [None]:
ddpg1.initialize_policy_net(x_train_lim = 10,
                                    x_test_lim = 12,
                                    sample_num = 2000,
                                    iteration = 1*10**4,
                                    lr = 1e-3)

sim_data_ = ddpg1.system.simulate_rk4(x_initial = torch.tensor([[2],[-2]]), 
                                            step_number = 200,
                                            use_controller = 1,
                                            the_controller = ddpg1.actor.Controller)
step2norm2ball,step2unitball,step2converge = ddpg1.system.convergence_judgment(sim_data_)

ddpg1.system.plot_phase_portrait(data_sim = sim_data_,
                        arrow_on = 0,
                        title = 'after PI in iteration {}'.format(0),
                        save_fig = 0,
                        save_path =  ddpg1.save_path + 'figs/')

print('reward: ',ddpg1.record_rollout_reward(sim_data_))