In [1]:
import torch,pickle,os
from torch.autograd import Variable
import numpy as np
def normalize(data,x_std_arr,x_mean_arr):
    return (data - torch.tensor(x_mean_arr[:data.shape[-1]],dtype=torch.float32)) / torch.tensor(x_std_arr[:data.shape[-1]],dtype=torch.float32)
def denormalize(data,y_std_arr,y_mean_arr):
    return data * torch.tensor(y_std_arr[:data.shape[-1]],dtype=torch.float32) + torch.tensor(y_mean_arr[:data.shape[-1]],dtype=torch.float32)

def get_A_and_B(x,u,model,x_std_arr,x_mean_arr,y_std_arr,y_mean_arr,init_state=None):
    x=Variable(torch.tensor(x,dtype=torch.float32),requires_grad=True)
    u=Variable(torch.tensor(u,dtype=torch.float32),requires_grad=True)
    xu = torch.cat((x,u))
    if init_state is not None:
        xu = torch.cat((xu,init_state))
    next_x = denormalize(model(normalize(xu,x_std_arr,x_mean_arr)),y_std_arr,y_mean_arr)+x
    A=torch.zeros(x.shape[0],x.shape[0])
    B=torch.zeros(x.shape[0],u.shape[0])
    for i in range(next_x.shape[0]):
        next_x[i].backward(retain_graph=True)
        A[i]=x.grad.data
        B[i]=u.grad.data
        x.grad.data.zero_()
        u.grad.data.zero_()
    return A,B
def real_hand_get_A_and_B(x,u,init_state,model,x_std_arr,x_mean_arr,y_std_arr,y_mean_arr):
    x=Variable(torch.tensor(x,dtype=torch.float32),requires_grad=True)
    u=Variable(torch.tensor(u,dtype=torch.float32),requires_grad=True)
    init_state=Variable(torch.tensor(init_state,dtype=torch.float32),requires_grad=True)
    xu = torch.cat((x,u,init_state))
    next_x = denormalize(model(normalize(xu,x_std_arr,x_mean_arr)),y_std_arr,y_mean_arr)+x
    A=torch.zeros(x.shape[0]+init_state.shape[0],x.shape[0]+init_state.shape[0])
    B=torch.zeros(x.shape[0]+init_state.shape[0],u.shape[0])
    for i in range(next_x.shape[0]):
        next_x[i].backward(retain_graph=True)
        A[i]=torch.cat((x.grad.data,init_state.grad.data))
        B[i]=u.grad.data
        x.grad.data.zero_()
        u.grad.data.zero_()
    for i in range(x.shape[0],x.shape[0]+init_state.shape[0]):
        A[i,i]=1
    return A,B
def get_K(A_ls,B_ls):
    K_ls=[]
    x_dim,u_dim=B_ls[0].shape
    H=len(A_ls)
    Q=np.identity(x_dim)
    R=np.identity(u_dim)
    P=np.zeros((x_dim,x_dim))
    for i in range(1,H+1):
        A=A_ls[H-i].numpy()
        B=B_ls[H-i].numpy()
        K=-np.linalg.inv(R+B.T.dot(P).dot(B)).dot(B.T).dot(P).dot(A)
        K_ls.insert(0,K)
        P=Q+K.T.dot(R).dot(K)+(A+B.dot(K)).T.dot(P).dot(A+B.dot(K))
    return K_ls

ImportError: No module named 'torch'

In [2]:
ho='_ho0.999'

model_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/trans_model_data/gazebo_ah_model/sim_cont_trajT_bs512_model512_BS64_loadT'+ho
with open(model_path, 'rb') as pickle_file:
    model = torch.load(pickle_file, map_location='cpu')
norm_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/trans_model_data/gazebo_ah_normalization/normalization_arr_sim_cont_trajT_bs512_model512_BS64_loadT'+ho+'_py2'
with open(norm_path, 'rb') as pickle_file:
    x_norm_arr, y_norm_arr = pickle.load(pickle_file)
    x_mean_arr, x_std_arr = x_norm_arr[0], x_norm_arr[1]
    y_mean_arr, y_std_arr = y_norm_arr[0], y_norm_arr[1]

set_idx='20'
initial_path_mode='astar'

if set_idx=='21':
    goal_idx_ls=[0]
elif set_idx=='20':
    goal_idx_ls=[0,2,7,8,15]
    
for goal_idx in goal_idx_ls:
    if initial_path_mode=='astar':
        if set_idx=='20':
            base_path='/Users/zsbjltwjj/Downloads/t42_hand/beliefspaceplanning/rollout_node/set/set20c_100ac2'+ho+'/astar_goal'+str(goal_idx)+'_run0_m100_obs0.75'+ho
        elif set_idx=='21':
            raise ValueError('still waiting for the astar solution!')
    x_path=base_path+'_traj.txt'
    u_path=base_path+'_plan.txt'
    xx=np.loadtxt(x_path,delimiter=',')
    uu=np.loadtxt(u_path,delimiter=',')
    if len(uu.shape)==1:
        uu=uu.reshape(uu.shape[0],1)
    
    A_ls,B_ls=[],[]
    for i in range(uu.shape[0]):
        A,B=get_A_and_B(xx[i],uu[i],model,x_std_arr,x_mean_arr,y_std_arr,y_mean_arr)
        A_ls.append(A)
        B_ls.append(B)
    a_and_b_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/lqr_coef/'
    if not os.path.exists(a_and_b_path):
        os.makedirs(a_and_b_path)
    A_path=a_and_b_path+initial_path_mode+ho+'_gazebo_ah_set'+set_idx+'c_goal'+str(goal_idx)+'_A'
    B_path=a_and_b_path+initial_path_mode+ho+'_gazebo_ah_set'+set_idx+'c_goal'+str(goal_idx)+'_B'
    with open(A_path,'wb') as A_file:
        torch.save(A_ls,A_file)
    with open(B_path,'wb') as B_file:
        torch.save(B_ls,B_file)
    k_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/lqr_k/'
    if not os.path.exists(k_path):
        os.makedirs(k_path)
    K_path=k_path+initial_path_mode+ho+'_gazebo_ah_set'+set_idx+'c_goal'+str(goal_idx)+'_K'
    K_ls=get_K(A_ls,B_ls)
    with open(K_path,'wb') as pkl_file:
        pickle.dump(K_ls,pkl_file,protocol=2)
    

In [6]:
ho=''

model_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/trans_model_data/real_ah_wm_v0.1/model_lr0.0002_val0.1_seed0_nn_2_dp_0.1_nodes_200'+ho+'_epochs_10'
with open(model_path, 'rb') as pickle_file:
    model = torch.load(pickle_file, map_location='cpu')
norm_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/trans_model_data/real_ah_wm_v0.1/normalization/normalization_arr'+ho
with open(norm_path, 'rb') as pickle_file:
    x_norm_arr, y_norm_arr = pickle.load(pickle_file)
    x_mean_arr, x_std_arr = x_norm_arr[0], x_norm_arr[1]
    y_mean_arr, y_std_arr = y_norm_arr[0], y_norm_arr[1]

initial_path_mode='astar'
goal_idx_ls=[0,1,2,3]
    
for goal_idx in goal_idx_ls:
    if initial_path_mode=='astar':
        base_path='/Users/zsbjltwjj/Downloads/t42_hand/t42_control/rollout_t42/set/astar_set'+ho+'/astar_goal'+str(goal_idx)+'_run0_m100'+ho
    x_path=base_path+'_traj.txt'
    u_path=base_path+'_plan.txt'
    xx=np.loadtxt(x_path,delimiter=',')
    uu=np.loadtxt(u_path,delimiter=',')
    if len(uu.shape)==1:
        uu=uu.reshape(uu.shape[0],1)
    #uu=np.concatenate((uu,np.tile(xx[0],(uu.shape[0],1))),axis=1)
    init_st=torch.tensor(xx[0],dtype=torch.float32)
    init_state=xx[0]
    
    A_ls,B_ls=[],[]
    for i in range(uu.shape[0]):
        #A,B=get_A_and_B(xx[i],uu[i],model,x_std_arr,x_mean_arr,y_std_arr,y_mean_arr)
        #A,B=get_A_and_B(xx[i],uu[i],model,x_std_arr,x_mean_arr,y_std_arr,y_mean_arr,init_st)
        A,B=real_hand_get_A_and_B(xx[i],uu[i],init_state,model,x_std_arr,x_mean_arr,y_std_arr,y_mean_arr)
        #print(A)
        #print(B)
        #raise
        A_ls.append(A)
        B_ls.append(B)
    a_and_b_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/lqr_coef/'
    if not os.path.exists(a_and_b_path):
        os.makedirs(a_and_b_path)
    A_path=a_and_b_path+initial_path_mode+ho+'_real_ah_goal'+str(goal_idx)+'_A'
    B_path=a_and_b_path+initial_path_mode+ho+'_real_ah_goal'+str(goal_idx)+'_B'
    with open(A_path,'wb') as A_file:
        torch.save(A_ls,A_file)
    with open(B_path,'wb') as B_file:
        torch.save(B_ls,B_file)
    k_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/lqr_k/'
    if not os.path.exists(k_path):
        os.makedirs(k_path)
    K_path=k_path+initial_path_mode+ho+'_real_ah_goal'+str(goal_idx)+'_K'
    K_ls=get_K(A_ls,B_ls)
    with open(K_path,'wb') as pkl_file:
        pickle.dump(K_ls,pkl_file,protocol=2)

In [4]:
ho='_ho0.999'

if ho=='_ho0.999':
    model_save_path='./trans_model_data/Reacher-v2_model/Reacher-v2_model_lr0.0001_nodes512_seed0'+ho+'_epochs_100'
else:
    model_save_path='./trans_model_data/Reacher-v2_model/Reacher-v2_model_lr0.0001_nodes512_seed0'+ho+'_epochs_50'
norm_path='./trans_model_data/Reacher-v2_normalization/normalization_arr'+ho
with open(model_save_path, 'rb') as pickle_file:
    model = torch.load(pickle_file, map_location='cpu')
with open(norm_path, 'rb') as pickle_file:
    x_norm_arr, y_norm_arr = pickle.load(pickle_file)
    x_mean_arr, x_std_arr = x_norm_arr[0], x_norm_arr[1]
    y_mean_arr, y_std_arr = y_norm_arr[0], y_norm_arr[1]

initial_path_mode='astar'
goal_idx_ls=[1,2,5]

for goal_idx in goal_idx_ls:
    if initial_path_mode=='astar':
        base_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/mjo_astar_eval_results'+ho+'/Reacher-v2_shortest_path_m1_run'+str(goal_idx)  
    x_path=base_path+'_traj.txt'
    u_path=base_path+'_plan.txt'
    xx=np.loadtxt(x_path,delimiter=',')
    uu=np.loadtxt(u_path,delimiter=',')
    if len(uu.shape)==1:
        uu=uu.reshape(uu.shape[0],1)
    
    A_ls,B_ls=[],[]
    for i in range(uu.shape[0]):
        A,B=get_A_and_B(xx[i],uu[i],model,x_std_arr,x_mean_arr,y_std_arr,y_mean_arr)
        A_ls.append(A)
        B_ls.append(B)
    a_and_b_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/lqr_coef/'
    if not os.path.exists(a_and_b_path):
        os.makedirs(a_and_b_path)
    A_path=a_and_b_path+initial_path_mode+ho+'_Reacher-v2_goal'+str(goal_idx)+'_A'
    B_path=a_and_b_path+initial_path_mode+ho+'_Reacher-v2_goal'+str(goal_idx)+'_B'
    with open(A_path,'wb') as A_file:
        torch.save(A_ls,A_file)
    with open(B_path,'wb') as B_file:
        torch.save(B_ls,B_file)
    k_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/lqr_k/'
    if not os.path.exists(k_path):
        os.makedirs(k_path)
    K_path=k_path+initial_path_mode+ho+'_Reacher-v2_goal'+str(goal_idx)+'_K'
    K_ls=get_K(A_ls,B_ls)
    with open(K_path,'wb') as pkl_file:
        pickle.dump(K_ls,pkl_file,protocol=2)

In [5]:
ho=''

model_save_path='./trans_model_data/Acrobot-v1_model/Acrobot-v1_model_lr0.0001_nodes512_seed0'+ho+'_epochs_50'
norm_path='./trans_model_data/Acrobot-v1_normalization/normalization_arr'+ho
with open(model_save_path, 'rb') as pickle_file:
    model = torch.load(pickle_file, map_location='cpu')
with open(norm_path, 'rb') as pickle_file:
    x_norm_arr, y_norm_arr = pickle.load(pickle_file)
    x_mean_arr, x_std_arr = x_norm_arr[0], x_norm_arr[1]
    y_mean_arr, y_std_arr = y_norm_arr[0], y_norm_arr[1]
    
initial_path_mode='astar'
goal_idx_ls=[1]

for goal_idx in goal_idx_ls:
    if initial_path_mode=='astar':
        base_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/mjo_astar_eval_results'+ho+'/Acrobot-v1_quickest_search_m10_run'+str(goal_idx)+'_goalheightplanned_0'  
    x_path=base_path+'_traj.txt'
    u_path=base_path+'_plan.txt'
    xx=np.loadtxt(x_path,delimiter=',')
    uu=np.loadtxt(u_path,delimiter=',')
    if len(uu.shape)==1:
        uu=uu.reshape(uu.shape[0],1)
    
    A_ls,B_ls=[],[]
    for i in range(uu.shape[0]):
        A,B=get_A_and_B(xx[i],uu[i],model,x_std_arr,x_mean_arr,y_std_arr,y_mean_arr)
        A_ls.append(A)
        B_ls.append(B)
    a_and_b_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/lqr_coef/'
    if not os.path.exists(a_and_b_path):
        os.makedirs(a_and_b_path)
    A_path=a_and_b_path+initial_path_mode+ho+'_Acrobot-v1_goal'+str(goal_idx)+'_A'
    B_path=a_and_b_path+initial_path_mode+ho+'_Acrobot-v1_goal'+str(goal_idx)+'_B'
    with open(A_path,'wb') as A_file:
        torch.save(A_ls,A_file)
    with open(B_path,'wb') as B_file:
        torch.save(B_ls,B_file)
    k_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/lqr_k/'
    if not os.path.exists(k_path):
        os.makedirs(k_path)
    K_path=k_path+initial_path_mode+ho+'_Acrobot-v1_goal'+str(goal_idx)+'_K'
    K_ls=get_K(A_ls,B_ls)
    with open(K_path,'wb') as pkl_file:
        pickle.dump(K_ls,pkl_file,protocol=2)

In [20]:
### CHECK ###
ho='_ho0.999'

model_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/trans_model_data/gazebo_ah_model/sim_cont_trajT_bs512_model512_BS64_loadT'+ho
with open(model_path, 'rb') as pickle_file:
    model = torch.load(pickle_file, map_location='cpu')
norm_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/trans_model_data/gazebo_ah_normalization/normalization_arr_sim_cont_trajT_bs512_model512_BS64_loadT'+ho+'_py2'
with open(norm_path, 'rb') as pickle_file:
    x_norm_arr, y_norm_arr = pickle.load(pickle_file)
    x_mean_arr, x_std_arr = x_norm_arr[0], x_norm_arr[1]
    y_mean_arr, y_std_arr = y_norm_arr[0], y_norm_arr[1]

init_x=torch.tensor([0.03238881511894898396,118.18717713766936583397,16.00000000000000000000,16.00000000000000000000])
init_u=torch.tensor([-1.00000000000000000000,-1.00000000000000000000])
xu = torch.cat((init_x,init_u))
next_x = denormalize(model(normalize(xu,x_std_arr,x_mean_arr)),y_std_arr,y_mean_arr)+init_x
print(next_x)

init_xx=torch.tensor([0.03338881511894898396,118.18717713766936583397,16.00000000000000000000,16.00000000000000000000])
init_u=torch.tensor([-1.00000000000000000000,-1.00000000000000000000])
xu = torch.cat((init_xx,init_u))
next_xx = denormalize(model(normalize(xu,x_std_arr,x_mean_arr)),y_std_arr,y_mean_arr)+init_xx
print(next_xx)

delta_x=init_xx[0]-init_x[0]
print(delta_x)

print((next_xx-next_x)/delta_x)

tensor([3.6515e-02, 1.1820e+02, 1.5972e+01, 1.5972e+01],
       grad_fn=<AddBackward0>)
tensor([3.7515e-02, 1.1820e+02, 1.5972e+01, 1.5972e+01],
       grad_fn=<AddBackward0>)
tensor(0.0010)
tensor([0.9998, 0.0000, 0.0000, 0.0000], grad_fn=<DivBackward0>)


In [21]:
initial_path_mode='astar'
set_idx='20'
goal_idx=8
a_and_b_path='/Users/zsbjltwjj/Downloads/t42_hand/zs_sim_robot/lqr_coef/'
A_path=a_and_b_path+initial_path_mode+ho+'_gazebo_ah_set'+set_idx+'c_goal'+str(goal_idx)+'_A'
B_path=a_and_b_path+initial_path_mode+ho+'_gazebo_ah_set'+set_idx+'c_goal'+str(goal_idx)+'_B'
with open(A_path,'rb') as A_file:
    A_ls=torch.load(A_file)
with open(B_path,'rb') as B_file:
    B_ls=torch.load(B_file)
A_ls[0]

tensor([[ 9.9983e-01,  2.0099e-03,  6.1035e-03, -5.2692e-03],
        [ 1.5732e-04,  9.9991e-01,  7.2536e-04,  1.2060e-03],
        [ 2.5293e-05, -1.9423e-04,  1.0001e+00, -1.4734e-04],
        [ 4.2482e-05, -1.6070e-04,  3.7555e-04,  9.9993e-01]])

In [26]:
x=Variable(torch.tensor([0.03338881511894898396,118.18817713766936583397,16.00100000000000000000,16.00100000000000000000],dtype=torch.float32),requires_grad=True)
u=Variable(torch.tensor([-1,-1],dtype=torch.float32),requires_grad=True)
xu = torch.cat((x,u))
    
next_x = denormalize(model(normalize(xu,x_std_arr,x_mean_arr)),y_std_arr,y_mean_arr)+x
A=torch.zeros(x.shape[0],x.shape[0])
B=torch.zeros(x.shape[0],u.shape[0])
for i in range(4):
    next_x[i].backward(retain_graph=True)
    A[i]=x.grad.data
    B[i]=u.grad.data
    x.grad.data.zero_()
    u.grad.data.zero_()
    print(A[i])

tensor([ 0.9998,  0.0020,  0.0061, -0.0053])
tensor([1.5732e-04, 9.9991e-01, 7.2536e-04, 1.2060e-03])
tensor([ 2.5293e-05, -1.9423e-04,  1.0001e+00, -1.4734e-04])
tensor([ 4.2482e-05, -1.6070e-04,  3.7555e-04,  9.9993e-01])
