In [1]:
import numpy as np
import cvxpy as cp
from matplotlib import pyplot as plt

from time import time
from functools import partial

import sys
sys.path.append('../../')

from Environment import Quadrotor1D
from Subroutines import QuadrotorEst,ApproxDAP,max_norm,find_stable_radius,SafeTransit,spectral_radius
from time import time
from Controllers import SafeDAP
from scipy.linalg import sqrtm
import pickle as pkl
%load_ext autoreload
%autoreload 2

In [2]:
class TimeStepsReached(Exception):
    pass
def evaluate(space_dim,dt,K_m_dt,l_m_dt):
    O = np.zeros((space_dim,space_dim))
    I = np.eye(space_dim)

    A = np.vstack([np.hstack([O,dt*I]),
                   np.hstack([-K_m_dt*I,-l_m_dt*I])]) + np.eye(2*space_dim)

    return  np.max(np.abs(np.linalg.eigvals(A)))

# Set up the system

In [3]:
A_hat_0 = np.array([[1,1.],[0,0.6]]) 
B_hat_0 = np.array([[0],[0.9]])
r_0 = 0.2

In [4]:
m = 1
K_stab = np.array([[2/3,1]])



l = 0.25
dt = 1
w_max = 0.2

env=Quadrotor1D(m,K_stab,l,w_max,dt)

# $\Theta_{ini}$

In [5]:
alpha_limit = (0.5,1.2)
beta_limit = (0.2,0.4)

# Optimization Parameters

In [6]:
# Constants
x_dim = env.AK.shape[-1]
u_dim = env.B.shape[-1]


I_x = np.eye(x_dim)  
I_u = np.eye(u_dim)  

w_cov = np.eye(x_dim) * env.w_max**2 # Assume uniform distribution.

# Cost matrices

Q = np.array([[1,0],
                [0,1]])*1
R = np.eye(u_dim)*1

# Constraints

x_max = 5.0
x_min = -1

v_max = 2
v_min = -2

u_max = 5
u_min = -9.8

D_x = np.vstack([I_x,-I_x]) # Constraints
d_x = np.array([x_max,v_max,-x_min,-v_min])

D_u = np.vstack([I_u,-I_u])
d_u = np.array([u_max,-u_min])

# Lookback lengths
H=6
H_benchmark = 6

M0 = np.zeros((H,u_dim,x_dim))

e_x_0 = 1.0 # Usually e_x is not zero 
e_u_0 = 0.3 # For safe DAP with known B, e_u = 0
decay = 0.7

refit_per_step = 50
pre_run_steps = 10
TD_steps  = 30 # The least number of steps taken in Phase 1.

# eta_bars = [0.1,0.2,0.5,1.0]
eta_bars = [0.05]

In [7]:
safeDapSolver = SafeDAP(Q,R,D_x,d_x,D_u,d_u,w_max,w_cov)
     
b_target_0 = 0.0
b_target = safeDapSolver.solve_b_star(b_target_0,env.A,env.B,e_x_0,e_u_0,H,K_stab)[0]
### b_target should not exceed solve_b_star()'s value for the subsequent optimization problem to be feasible. ###

# Save a copy of the parameters we have

In [8]:
params = {'Q':Q,'R':R,'D_x':D_x,'d_x':d_x,'D_u':D_u,'d_u':d_u,\
          'w_cov':w_cov,\
          'alpha_limit':alpha_limit,'beta_limit':beta_limit,\
          'H':H,'H_benchmark':H_benchmark,'eta_bars':eta_bars,\
          'A':env.A,'AK':env.AK,'B':env.B,\
          'A_hat_0':A_hat_0,'B_hat_0':B_hat_0,'r_0':r_0,\
          'w_max':w_max,'K_stab':K_stab,'b_target':b_target,'l':l,'m':m,'dt':dt,'eta_bars':eta_bars}
with open('./data/Parameters.pkl','wb') as f:
    pkl.dump(params,f)

# Experiment lengths

In [9]:
n_trials = 20
timesteps = 500

# Benchmark Experiments

In [10]:

def PlainSim(timesteps,n_trials):
    
    def main_loop():
        env=Quadrotor1D(m,K_stab,l,w_max,dt)
        
        x_no_control_hist = []
        # main loop
        for _ in range(timesteps):
            x = env.state()
            u = 0 
            env.step(u,b_target)

            x_no_control_hist.append(x)

            if _%100 == 0:
                print('Step',_)

        data = {}
        data['x'] = x_no_control_hist
        return data
    
    trial_data = []
    _ = 0 
    while _ < n_trials: 
        t = time()
        print('Trial {}'.format(_))
        trial_data.append(main_loop())
        _+=1

        print('Time for trial:',time()-t)
    return trial_data

def BenchmarkSim(timesteps,n_trials,unconstrained=False):
    
    def main_loop():
        
        safeDapSolver = SafeDAP(Q,R,D_x,d_x,D_u,d_u,w_max,w_cov)
        
        e_x = 0
        e_u = 0
        # Reset the environment
        env=Quadrotor1D(m,K_stab,l,w_max,dt)
        
        x_true_hist = []
        u_true_hist = []
        w_true_hist = [np.zeros((x_dim,1)) for _ in range(10*H_benchmark)]
        
        M,Phi = safeDapSolver.solve(env.A,env.B,H_benchmark,\
                                    e_x=e_x,e_u=e_u,unconstrained=unconstrained,K_stab=K_stab,b=b_target)
                
            

        # main loop 
        for _ in range(timesteps):
            
            x = env.state()
            x_true_hist.append(x)
            if _<pre_run_steps:
                u = (np.random.rand()-0.5)*2*0
            else:
                u = ApproxDAP(M,w_true_hist,0)

            env.step(u,b_target)

            u_true_hist.append(u)

            w_true_hist.append(env.state()-env.AK.dot(x_true_hist[-1])-env.B.dot(-b_target+u_true_hist[-1]))
            
            if _%100 == 0:
                print('Step',_)
            

        # Prepare data output
        data = {}
        data['x'] = x_true_hist
        data['u'] = u_true_hist
        data['w'] = w_true_hist
        data['M']  = M
        
        return data

    trial_data = []
    _ = 0 
    while _ < n_trials: 

        print('Trial {}'.format(_))
        data = main_loop()
        trial_data.append(data)
        _+=1
        
    return trial_data    

In [11]:
alg_dict = {
    'Plain':PlainSim,
            'UnconstrainedDAP':partial(BenchmarkSim,unconstrained=True),
            'ConstrainedDAP':partial(BenchmarkSim,unconstrained=False)
           }


for key, alg in alg_dict.items():
    print("{} starts.".format(key))
    
    t = time()
      

    trial_data = alg(timesteps,n_trials)
    with open('./data/{}.pkl'.format(key),'wb') as f:
        pkl.dump(trial_data,f)
    
    print('Total Time for {}:'.format(key),time()-t)


Plain starts.
Trial 0
Step 0
Step 100
Step 200
Step 300
Step 400
Time for trial: 0.01174616813659668
Trial 1
Step 0
Step 100
Step 200
Step 300
Step 400
Time for trial: 0.010758161544799805
Trial 2
Step 0
Step 100
Step 200
Step 300
Step 400
Time for trial: 0.012218475341796875
Trial 3
Step 0
Step 100
Step 200
Step 300
Step 400
Time for trial: 0.012369632720947266
Trial 4
Step 0
Step 100
Step 200
Step 300
Step 400
Time for trial: 0.011293172836303711
Trial 5
Step 0
Step 100
Step 200
Step 300
Step 400
Time for trial: 0.010501861572265625
Trial 6
Step 0
Step 100
Step 200
Step 300
Step 400
Time for trial: 0.011059999465942383
Trial 7
Step 0
Step 100
Step 200
Step 300
Step 400
Time for trial: 0.010931730270385742
Trial 8
Step 0
Step 100
Step 200
Step 300
Step 400
Time for trial: 0.01312565803527832
Trial 9
Step 0
Step 100
Step 200
Step 300
Step 400
Time for trial: 0.011895418167114258
Trial 10
Step 0
Step 100
Step 200
Step 300
Step 400
Time for trial: 0.011582612991333008
Trial 11
Step 0
Ste

# Safe Learning Experiments

In [12]:
est = QuadrotorEst(K_stab,dt,alpha_limit,beta_limit)
for eta_bar in eta_bars:

    def SafeAdaptiveSim(timesteps,unconstrained=False):
        # Data containers

        x_hist = []
        u_hist = []
        w_hat_hist = [np.zeros((x_dim,1)) for _ in range(10*H)]

        theta_hist = []
        refit_time_step = []
        phase1_endpoints =[]
        phase2_endpoints=[]
        M_hist = [M0]
        r_hist = []

        n_episode = 0
     
        def phase(A_hat,B_hat,old_eta,new_eta):
            # Local variables: A_hat,B_hat,old,new.
            # Global variables: theta_hist,M_hist,e_x,e_u,H,env,w_max, w_hat_hist
            # Start of the phase
            # Solve for new controller M.
            
            M,Phi = safeDapSolver.solve(A_hat,B_hat,\
                                        H,e_x = e_x,e_u = e_u,unconstrained = unconstrained,\
                                        K_stab = K_stab,b=b_target)

          
            
            if n_episode<=1:
                 old = {'M':np.array(M),'theta':(np.array(A_hat),np.array(B_hat)),'eta':old_eta,'r':r_hist[-1]}
            else:
                old = {'M':np.array(M_hist[-1]),'theta':theta_hist[-2],'eta':old_eta,'r':r_hist[-2]}
            new = {'M':np.array(M),'theta':theta_hist[-1],'eta':new_eta,'r':r_hist[-1]}

            old['e_x']=old_e_x
            new['e_x']=e_x
            
            old['e_u']=old_e_x
            new['e_u']=e_u

            M_hist.append(np.array(M))

            # Compute mid.
            mid = safeDapSolver.mid(old,new,H,K_stab)

            transit = SafeTransit(old,new,mid,H)

            for i in range(refit_per_step):
                x = env.state()
                u = transit.get_u(w_hat_hist)

                env.step(u,b_target)
    

                Ah,Bh = transit.get_theta()
                # Calculate the estimated disturbance, and project it onto the bounded disturbance set. 
             
                w_hat = env.state()-(Ah-Bh.dot(K_stab)).dot(x)-Bh.dot(-b_target+u)

                w_hat[w_hat>w_max] = w_max
                w_hat[w_hat<-w_max] = -w_max 
                w_hat_hist.append(np.array(w_hat)) 

                u_hist.append(u)
                x_hist.append(x)

                if len(x_hist)>=timesteps:
                    raise TimeStepsReached # Use the exception mechanism to stop the main loop once the total timestep is reached.

                # Updating M in the transit object.
                transit.step()
            
            # End of the phase


        env = Quadrotor1D(m,K_stab,l,w_max,dt)
      
        safeDapSolver = SafeDAP(Q,R,D_x,d_x,D_u,d_u,w_max,w_cov)
        
        # Setup the estimated parameters.
        A_hat = np.array(A_hat_0)
        B_hat = np.array(B_hat_0)
        r = r_0+0
        theta_hist.append((np.array(A_hat),np.array(B_hat)))
        r_hist.append(float(r))
        try:
            old_e_x = e_x = e_x_0
            old_e_x = e_u = e_u_0
            
            # main loop
            while True:
     
                # Phase 1: exploration exploitation 

                phase(A_hat,B_hat,0,eta_bar)

                phase1_endpoints.append(len(x_hist))

                # Refit the system model
                uh = np.array(u_hist[:-1]).reshape(-1,u_dim)
                xh = np.array(x_hist).reshape(-1,x_dim)
                A_hat,B_hat,r = est.est(xh,uh,b_target=b_target)
                
                x_t = xh[:-1,:]
                x_t_1 = xh[1:,:]
        
                   
                theta_hist.append((np.array(A_hat),np.array(B_hat)))
                r_hist.append(float(r))
                refit_time_step.append(len(x_hist))

                # Update old_e_x, old_e_u
                old_e_x = e_x+0
                old_e_u = e_u+0

                # Phase 2: pure exploitation 
                phase(A_hat,B_hat,eta_bar,0)

                phase2_endpoints.append(len(x_hist))


                print('Episode',n_episode)

                n_episode+=1
                
                old_e_x = e_x+0
                old_e_u = e_u+0
                
                print('e_x',e_x)
                e_x*=decay
                e_u*=decay

        except TimeStepsReached:
            print('Time Steps Reached')
            # Prepare data output


        # Prepare data output
        data = {}
        data['x'] = x_hist
        data['u'] = u_hist
        data['w'] = w_hat_hist
        data['theta'] = theta_hist
        data['M'] = M_hist
        return data
    
    alg_dict = {
        'SafeLearning':partial(SafeAdaptiveSim,unconstrained=False),
               'UnconstrainedLearning':partial(SafeAdaptiveSim,unconstrained=True)
               }


    for key, alg in alg_dict.items():
        print("{} starts.".format(key))
        trial_data = []
        _ = 0 
        while _ < n_trials: 
            t = time()
            try:
                print('Trial {}'.format(_))
                trial_data.append(alg(timesteps))
                _+=1
            except AttributeError:
                print('Infeasibility Encountered. Restarting the loop.')
            except cp.SolverError:
                print('Solver Error Encountered. Restarting the loop.')
            
            print('Time for trial:',time()-t)
            
        with open('./data/{}_eta_{}.pkl'.format(key,eta_bar),'wb') as f:
            pkl.dump(trial_data,f)

from IPython.display import Audio

sound_file = '../../sound/Bike-bell-sound.wav'
Audio(sound_file,autoplay=True)

SafeLearning starts.
Trial 0
b_target -0.4325777165148185
Episode 0
e_x 1.0
b_target -0.4325777165148185
Episode 1
e_x 0.7
b_target -0.4325777165148185
Episode 2
e_x 0.48999999999999994
b_target -0.4325777165148185
Episode 3
e_x 0.3429999999999999
b_target -0.4325777165148185
Time Steps Reached
Time for trial: 3.986215591430664
Trial 1
b_target -0.4325777165148185
Episode 0
e_x 1.0
b_target -0.4325777165148185
Episode 1
e_x 0.7
b_target -0.4325777165148185
Episode 2
e_x 0.48999999999999994
b_target -0.4325777165148185
Episode 3
e_x 0.3429999999999999
b_target -0.4325777165148185
Time Steps Reached
Time for trial: 4.21422266960144
Trial 2
b_target -0.4325777165148185
Episode 0
e_x 1.0
b_target -0.4325777165148185
Episode 1
e_x 0.7
b_target -0.4325777165148185
Episode 2
e_x 0.48999999999999994
b_target -0.4325777165148185
Episode 3
e_x 0.3429999999999999
b_target -0.4325777165148185
Time Steps Reached
Time for trial: 4.129185199737549
Trial 3
b_target -0.4325777165148185
Infeasibility En