In [1]:
import numpy as np
import cvxpy as cp
from matplotlib import pyplot as plt

from time import time
from functools import partial

import sys
sys.path.append('../../')

from Environment import Quadrotor1D
from Subroutines import QuadrotorEst,ApproxDAP,max_norm,find_stable_radius,SafeTransit,spectral_radius
from time import time
from Controllers import SafeDAP
from scipy.linalg import sqrtm
import pickle as pkl
%load_ext autoreload
%autoreload 2

In [2]:
class TimeStepsReached(Exception):
    pass
def evaluate(space_dim,dt,K_m_dt,l_m_dt):
    O = np.zeros((space_dim,space_dim))
    I = np.eye(space_dim)

    A = np.vstack([np.hstack([O,dt*I]),
                   np.hstack([-K_m_dt*I,-l_m_dt*I])]) + np.eye(2*space_dim)

    return  np.max(np.abs(np.linalg.eigvals(A)))

# Set up the system

In [3]:
m = 1
K_stab = np.array([[2/3,1]])
l = 0.25
dt = 1
w_max = 0.2
# w_max = 0.3

env=Quadrotor1D(m,K_stab,l,w_max,dt)

# $\Theta_{ini}$

In [4]:
alpha_limit = (0.5,1.2)
beta_limit = (0.2,0.4)

# Optimization Parameters

In [5]:
# Constants
x_dim = env.AK.shape[-1]
u_dim = env.B.shape[-1]


I_x = np.eye(x_dim)  
I_u = np.eye(u_dim)  

w_cov = np.eye(x_dim) * env.w_max * 1/12 # Assume uniform distribution.

# Cost matrices

Q = np.array([[1,0],
                [0,1]])*1
R = np.eye(u_dim)*1

# Constraints

x_max = 5
x_min = -1

v_max = 2
v_min = -2

u_max = 5
u_min = -9.8

D_x = np.vstack([I_x,-I_x]) # Constraints
d_x = np.array([x_max,v_max,-x_min,-v_min])

D_u = np.vstack([I_u,-I_u])
d_u = np.array([u_max,-u_min])

# Lookback lengths
H=10
H_benchmark = 40

M0 = np.zeros((H,u_dim,x_dim))

e_x = 0 # Usually e_x is not zero 
e_u = 0 # For safe DAP with known B, e_u = 0

refit_per_step = 50
pre_run_steps = 2
TD_steps  = 30 # The least number of steps taken in Phase 1.

# eta_bars = [1e-4,1e-3,0.01,0.1]
eta_bars = [0.01]

# Save a copy of the parameters we have

In [6]:
params = {'Q':Q,'R':R,'D_x':D_x,'d_x':d_x,'D_u':D_u,'d_u':d_u,\
          'alpha_limit':alpha_limit,'beta_limit':beta_limit,\
          'H':H,'H_benchmark':H_benchmark,'eta_bars':eta_bars,\
          'A':env.A,'AK':env.AK,'B':env.B,\
          'w_max':w_max,'K_stab':K_stab,'l':l,'m':m,'dt':dt}
with open('./data/Parameters.pkl','wb') as f:
    pkl.dump(params,f)

# Experiments

In [7]:

def PlainSim(timesteps,n_trials):
    
    def main_loop():
        env=Quadrotor1D(m,K_stab,l,w_max,dt)
        
        x_no_control_hist = []
        # main loop
        for _ in range(timesteps):
            x = env.state()
            u = 0 
            env.step(u)

            x_no_control_hist.append(x)

            if _%100 == 0:
                print('Step',_)

        data = {}
        data['x'] = x_no_control_hist
        return data
    
    trial_data = []
    _ = 0 
    while _ < n_trials: 
        t = time()
        print('Trial {}'.format(_))
        trial_data.append(main_loop())
        _+=1

        print('Time for trial:',time()-t)
    return trial_data

def BenchmarkSim(timesteps,n_trials,unconstrained=False,with_K_stab=True):

    def main_loop(M):
        # Reset the environment
        env=Quadrotor1D(m,K_stab,l,w_max,dt)
        
        x_true_hist = []
        u_true_hist = []
        w_true_hist = [np.zeros((x_dim,1)) for _ in range(10*H_benchmark)]

        # main loop 
        for _ in range(timesteps):
            x = env.state()
            x_true_hist.append(x)
            if _<pre_run_steps:
                u = (np.random.rand()-0.5)*2*0
            else:
                u = ApproxDAP(M,w_true_hist,0)

            env.step(u)
            u_true_hist.append(u)

            w_true_hist.append(env.state()-env.AK.dot(x_true_hist[-1])-env.B.dot(u_true_hist[-1]))

            if _%100 == 0:
                print('Step',_)

        # Prepare data output
        data = {}
        data['x'] = x_true_hist
        data['u'] = u_true_hist
        data['w'] = w_true_hist
        
#         print(data['w'])
        return data



    safeDapSolver = SafeDAP(Q,R,D_x,d_x,D_u,d_u,w_max,w_cov)

    if with_K_stab:    
        if unconstrained:
            M,Phi = safeDapSolver.solve(env.AK,env.B,H_benchmark,e_x=e_x,e_u=e_u,unconstrained=True,K_stab=K_stab)
        else:
            M,Phi = safeDapSolver.solve(env.AK,env.B,H_benchmark,e_x=e_x,e_u=e_u,K_stab=K_stab)
    else:        
        if unconstrained:
            M,Phi = safeDapSolver.solve(env.AK,env.B,H_benchmark,e_x=e_x,e_u=e_u,unconstrained=True)
        else:
            M,Phi = safeDapSolver.solve(env.AK,env.B,H_benchmark,e_x=e_x,e_u=e_u)

#     print('M',M)

    trial_data = []
    _ = 0 
    while _ < n_trials: 

        print('Trial {}'.format(_))
        trial_data.append(main_loop(M))
        _+=1
        
    return trial_data    

In [8]:

n_trials = 10
timesteps = 400

alg_dict = {
    'Plain':PlainSim,
            'UnconstrainedDAP':partial(BenchmarkSim,unconstrained=True),
            'ConstrainedDAP':partial(BenchmarkSim,unconstrained=False)
           }


for key, alg in alg_dict.items():
    print("{} starts.".format(key))
    
    t = time()
      

    trial_data = alg(timesteps,n_trials)
    with open('./data/{}.pkl'.format(key),'wb') as f:
        pkl.dump(trial_data,f)
    
    print('Total Time for {}:'.format(key),time()-t)


Plain starts.
Trial 0
Step 0
Step 100
Step 200
Step 300
Time for trial: 0.011920690536499023
Trial 1
Step 0
Step 100
Step 200
Step 300
Time for trial: 0.013528585433959961
Trial 2
Step 0
Step 100
Step 200
Step 300
Time for trial: 0.008852243423461914
Trial 3
Step 0
Step 100
Step 200
Step 300
Time for trial: 0.009958028793334961
Trial 4
Step 0
Step 100
Step 200
Step 300
Time for trial: 0.009712457656860352
Trial 5
Step 0
Step 100
Step 200
Step 300
Time for trial: 0.00948953628540039
Trial 6
Step 0
Step 100
Step 200
Step 300
Time for trial: 0.009351730346679688
Trial 7
Step 0
Step 100
Step 200
Step 300
Time for trial: 0.008581161499023438
Trial 8
Step 0
Step 100
Step 200
Step 300
Time for trial: 0.008859395980834961
Trial 9
Step 0
Step 100
Step 200
Step 300
Time for trial: 0.00927281379699707
Total Time for Plain: 0.11202406883239746
UnconstrainedDAP starts.
Trial 0
Step 0
Step 100
Step 200
Step 300
Trial 1
Step 0
Step 100
Step 200
Step 300
Trial 2
Step 0
Step 100
Step 200
Step 300
Trial

In [None]:
est = QuadrotorEst(K_stab,dt,alpha_limit,beta_limit)
for eta_bar in eta_bars:

    def SafeAdaptiveSim(timesteps,unconstrained=False,with_K_stab = True):
        # Data containers

        x_hist = []
        u_hist = []
        w_hat_hist = [np.zeros((x_dim,1)) for _ in range(10*H)]

        theta_hist = []
        refit_time_step = []
        phase1_endpoints =[]
        phase2_endpoints=[]
        M_hist = [M0]
        r_hist = []

        n_episode = 0

        def phase(A_hat,B_hat,old_eta,new_eta):
            # Local variables: A_hat,B_hat,old,new.
            # Global variables: theta_hist,M_hist,e_x,e_u,H,env,w_max, w_hat_hist
            # Start of the phase
            # Solve for new controller M.
            
            M,Phi = safeDapSolver.solve(A_hat-B_hat.dot(K_stab),B_hat,\
                                        H,e_x = e_x,e_u = e_u,unconstrained = unconstrained,\
                                        K_stab = K_stab if with_K_stab else None)
            
            
            if n_episode==1:
                 old = {'M':np.array(M),'theta':(np.array(A_hat),np.array(B_hat)),'eta':old_eta,'r':r_hist[-1]}
            else:
                old = {'M':np.array(M_hist[-1]),'theta':theta_hist[-2],'eta':old_eta,'r':r_hist[-2]}
            new = {'M':np.array(M),'theta':theta_hist[-1],'eta':new_eta,'r':r_hist[-1]}

            old['e_x']=new['e_x']=e_x
            old['e_u']=new['e_u']=e_u

            M_hist.append(np.array(M))

            # Compute mid.
            mid = safeDapSolver.mid(old,new,H)

            transit = SafeTransit(old,new,mid,H)

            for i in range(refit_per_step):
                x = env.state()
                u = transit.get_u(w_hat_hist)
                env.step(u)

                Ah,Bh = transit.get_theta()
                # Calculate the estimated disturbance, and project it onto the bounded disturbance set. 
                w_hat = env.state()-Ah.dot(x)-Bh.dot(u)
                w_hat[w_hat>w_max] = w_max
                w_hat[w_hat<-w_max] = -w_max 
                w_hat_hist.append(np.array(w_hat)) 

                u_hist.append(u)
                x_hist.append(x)

                if len(x_hist)>=timesteps:
                    raise TimeStepsReached # Use the exception mechanism to stop the main loop once the total timestep is reached.

                # Updating M in the transit object.
                transit.step()
            # End of the phase


        env = Quadrotor1D(m,K_stab,l,w_max,dt)
      
        safeDapSolver = SafeDAP(Q,R,D_x,d_x,D_u,d_u,w_max,w_cov)

        try:
        # main loop
            while True:

                if n_episode==0:
                    for i in range(pre_run_steps):
                        x = env.state()
                        x_hist.append(x)
                        u = (np.random.rand()-0.5)*2*eta_bar
                        env.step(u)
                        u_hist.append(u)
                    A_hat,B_hat,r = est.est(np.array(x_hist).reshape(-1,x_dim),np.array(u_hist[:-1]).reshape(-1,u_dim))
#                     print(A_hat,B_hat)
                    theta_hist.append((np.array(A_hat),np.array(B_hat)))
                    r_hist.append(float(r))
                    refit_time_step.append(len(x_hist))
    #                 print(A_hat,B_hat)

                    # Fit the system parameters for the first time.
                else:

                    # Phase 1: exploration exploitation 

#                     print('phase 1')
                    phase(A_hat,B_hat,0,eta_bar)

                    phase1_endpoints.append(len(x_hist))

                    # Refit the system model
                    A_hat,B_hat,r = est.est(np.array(x_hist).reshape(-1,x_dim),np.array(u_hist[:-1]).reshape(-1,u_dim))
#                     print(A_hat,B_hat)
                    theta_hist.append((np.array(A_hat),np.array(B_hat)))
                    r_hist.append(float(r))
                    refit_time_step.append(len(x_hist))

                    # Phase 2: pure exploitation 
#                     print('phase 2')
                    phase(A_hat,B_hat,eta_bar,0)

                    phase2_endpoints.append(len(x_hist))


                    print('Episode',n_episode)

                n_episode+=1

        except TimeStepsReached:
            print('Time Steps Reached')
            # Prepare data output


        # Prepare data output
        data = {}
        data['x'] = x_hist
        data['u'] = u_hist
        data['w'] = w_hat_hist
        data['theta'] = theta_hist
        return data
    
    alg_dict = {
        'SafeLearning':partial(SafeAdaptiveSim,unconstrained=False),
               'UnconstrainedLearning':partial(SafeAdaptiveSim,unconstrained=True)
               }


    n_trials = 10
    time_steps = 400
    for key, alg in alg_dict.items():
        print("{} starts.".format(key))
        trial_data = []
        _ = 0 
        while _ < n_trials: 
            t = time()
            try:
                print('Trial {}'.format(_))
                trial_data.append(alg(time_steps))
                _+=1
            except AttributeError:
                print('Infeasibility Encountered. Restarting the loop.')
            except cp.SolverError:
                print('Solver Error Encountered. Restarting the loop.')
            
            print('Time for trial:',time()-t)
            
        with open('./data/{}_eta_{}.pkl'.format(key,eta_bar),'wb') as f:
            pkl.dump(trial_data,f)

from IPython.display import Audio

sound_file = '../../sound/Bike-bell-sound.wav'
Audio(sound_file,autoplay=True)

SafeLearning starts.
Trial 0
Episode 1
Episode 2
Episode 3
Time Steps Reached
Time for trial: 6.308537721633911
Trial 1
Episode 1
Episode 2
Episode 3
Time Steps Reached
Time for trial: 6.104314565658569
Trial 2
Episode 1
Episode 2
Episode 3
Time Steps Reached
Time for trial: 6.335033178329468
Trial 3
Episode 1
Episode 2
Episode 3
Time Steps Reached
Time for trial: 6.306654214859009
Trial 4
Episode 1
Episode 2
Episode 3
