In [1]:
import math
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

import GPy

import time as t

In [2]:
import pandas as pd
import sys
from env_pybullet_gen3 import env_pybullet_kin_gen3



In [3]:
#Create a experiment env
env = env_pybullet_kin_gen3(no_zeros = True,Excel_path_Okay_tcp = "./positions_from_joints_19.xlsx",time_step=0.05,home_angles = [-0.207226464676801,1.5689993219813,-1.01515387451347,-2.45271819663908,2.00795352004673,1.91098991659003,-0.831045149646278])
env.robot.visual_inspection = True

#Initially parameters of the urdf

#Make maxvels closer to the reality
#both have to be modified
env.max_vel = [168,151,128,94,210,48,189]
env.original_parameters_df ["max_vel"]=env.max_vel
print(env.original_parameters_df["max_vel"])
print(env.original_parameters_df)


print('observation space:', env.observation_space) #states, There is only 1 state constant
env.update_parameters_to_modify(["mass","max_vel","kp","ki","kd","force_x_one","Ixx","Iyy","Izz"])
print('action space:', env.action_space) #parameters, number of parameters choose to tune, continuous
print('original action:', env.action_original()) #parameters, number of parameters choose to tune, continuous




hola
../Simulation_Pybullet/models/urdf/JACO3_URDF_V11.urdf
Robot launched
hola
(7, 12)
(7, 17)
0    168
1    151
2    128
3     94
4    210
5     48
6    189
Name: max_vel, dtype: int64
       mass damping       Ixx       Iyy       Izz   kp   ki   kd  max_vel  \
0  1.377353       0  0.004801  0.004755  0.002283  0.1  0.0  0.0      168   
1  1.163667       0  0.008419  0.001920  0.008361  0.1  0.0  0.0      151   
2  1.163660       0  0.007545  0.007487  0.001921  0.1  0.0  0.0      128   
3  0.930287       0  0.006410  0.001380  0.006518  0.1  0.0  0.0       94   
4  0.678106       0  0.001680  0.001506  0.000826  0.1  0.0  0.0      210   
5  0.678106       0  0.001938  0.000827  0.001763  0.1  0.0  0.0       48   
6  0.500657       0  0.000775  0.000585  0.000975  0.1  0.0  0.0      189   

   force_x_one  
0            1  
1            1  
2            1  
3            1  
4            1  
5            1  
6            1  
observation space: 1
mass okey
max_vel okey
kp okey
ki okey


In [None]:
#Create a first search
pop_size = 70*20
sigma = 0.0001
original_action = np.array(env.action_original())
np.random.seed(0)
Create = True
rewards = []
actions = []

if(Create ==True):
    #Generate new population weights to test
    weights_pop = [(sigma*np.random.randn(env.action_space)) for i in range(pop_size)]
    for weights in weights_pop:
                    action=np.add(np.multiply(weights,original_action),original_action)
                    actions.append(action)
                    rewards.append(env.step_tcp_rishabh(action))
    actions = np.array(actions)
    rewards = np.array(rewards).reshape(actions.shape[0],1)
    
    i_data = np.hstack((actions,rewards))
    np.save("i_data.npy",i_data)
else:
    i_data = np.load("i_data.npy")
    actions = i_data[:,:-1]
    rewards = i_data[:,-1:]


In [None]:
print("actions")
print(actions.shape)
print(actions)
print("rewards")
print(rewards.shape)
print(rewards)

In [None]:
#Create a Gaussian model, which models the relation between parameters and score
kernel = GPy.kern.Linear(actions.shape[1], ARD=1)
#kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)

model = GPy.models.GPRegression(actions, rewards, kernel)

model.optimize(optimizer='scg', max_iters=10**2)

In [None]:
predictions =  model.predict(actions)

error = abs(np.array(predictions[0])-rewards)
error_max = max(error)
error_mean = error.mean(axis=0)

print("error")
print(error)
print("error_max")
print(error_max)
print("error_mean")
print(error_mean)

In [None]:
# Get the best action from regressed model

def Get_best_action_from_model(env,model,sigma = 0.5, population = 70*10**3,n_elite = 10):
    if(population>70*10**3):
        print("More data fill the memory, and creates and error, used only 70*10**3")
        population = 70*10**3
    
    weights_pop_model = [(sigma*np.random.randn(env.action_space)) for i in range(population)]
    
    actions_model = []
    for weights in weights_pop_model:
        action=np.add(np.multiply(weights,original_action),original_action)
        actions_model.append(action)
    actions_model = np.array(actions_model)
    
    prediction = model.predict(actions_model)
    rewards_model = np.array(prediction[0])
    
    elite_idxs = rewards_model.argsort()[-n_elite:]
    elite_actions = [actions_model[i] for i in elite_idxs]
    elite_rewards = [rewards_model[i] for i in elite_idxs]

    #Set the best weight as the mean of the best ones 

    best_action = np.array(elite_actions).mean(axis=0)
    best_rewards = rewards_model.argsort()[-n_elite:]
    
    return np.array(elite_actions),np.array(elite_rewards)
    
    

In [None]:
best_action,best_rewards = Get_best_action_from_model(env,model)
print(best_rewards)
print(best_rewards.shape)
print(best_action)
print(best_action.shape)

#Look which x provide an Y 0
print(rewards)
desired_error_list = []
desired_error = 0
for i in range(rewards.shape[0]):
    desired_error_list.append([desired_error])
desired_erro_np = np.array(desired_error_list)
print(desired_erro_np )
predicted_actions = model.infer_newX(desired_erro_np , optimize=False)
predicted_actions = np.array(predicted_actions[0])
print(predicted_actions.shape)
print(predicted_actions[0,:])

In [None]:
#Bayesian Search
def bayesian_learn(env,model, pop_size=env.action_space, sigma=0.3):
    
    for i in range(n_iterations):
        best_action_m,best_rewards_m = Get_best_action_from_model(env,model)
        
        
        weights_pop_model = [(sigma*np.random.randn(env.action_space)) for i in range(pop_size)]
        
        rewards_explore = []
        actions_explore = []
        for weights in weights_pop_model:
            action=np.add(np.multiply(weights,best_action_m),best_action_m)
            actions_explore.append(action)
            rewards_explore.append(env.step_tcp_rishabh(action))
        
        return actions_explore,rewards_explore
        


In [None]:
fixed_inputs = range(env.action_space)

#decide the free input
free_input = fixed_inputs.pop(0)
model.plot()

In [None]:
model.objective_function()

In [None]:
n

In [None]:
#Execute the cross entrophy method with default Values
#scores = cem()


#To don't ask the GPU as much reduce the pop_size, it's the amount of elemts try
scores,best_actions = cem_no_net()
# 
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()
    

In [None]:

# plot the last scores zoom
fig = plt.figure()
zoom= 300
ax = fig.add_subplot(111)
plt.plot(np.arange(1, zoom+1), scores[-zoom:])
plt.ylabel('Score zoom')
plt.xlabel('Episode #')
plt.show()

In [None]:
# plot actions
best_actions_np = np.array(best_actions)
joint = 1
for i in range(len(env.parameters_to_modify)) :
    parameter = env.parameters_to_modify[i]
    figures = plt.figure()
    ax = fig.add_subplot(111)
    plt.plot(np.arange(1, best_actions_np.shape[0]+1), best_actions_np[:,joint+i*7])
    plt.ylabel(parameter+" Joint"+str(joint))
    plt.xlabel('Episode #')
    plt.show()

In [None]:
# load the weights from file
# Not working know


#state = env.reset()
env = env_pybullet_kin_gen3(no_zeros = True,Excel_path_Okay_tcp = "./positions_from_joints_19.xlsx",time_step=0.05,home_angles = [-0.207226464676801,1.5689993219813,-1.01515387451347,-2.45271819663908,2.00795352004673,1.91098991659003,-0.831045149646278])
env.robot.visual_inspection = False

#Make maxvels closer to the reality
env.max_vel = [168,151,128,94,210,48,189]
env.original_parameters_df ["max_vel"]=env.max_vel
env.modified_parameters_df ["max_vel"]=[168,151,128,94,210,48,189]

env.update_parameters_to_modify(["mass","max_vel","kp","ki","kd","force_x_one","Ixx","Iyy","Izz","damping"])
env.robot.visual_inspection = False
env.modified_parameters_df = env.create_df_from_Excel("./Parameters_train_tcp_euc_rishabh.xlsx")


t.sleep(0.02)
action = env.action_modified()
action = np.array(action)
print('original action:', env.action_original()) #parameters, number of parameters choose to tune, continuous
print("trained",action)
reward = env.step_tcp_rishabh(action)
print("reward")
print(reward)



In [None]:
#Convert to excel
a = env.df_avg.to_numpy()
print(a[:,5])
env.df_avg.to_excel("./Train_parameters_result_tcp_euc_rishabh.xlsx")

In [None]:
env.original_parameters_df

In [None]:
# load the weights from file
# Not working know


#state = env.reset()
env = env_pybullet_kin_gen3(no_zeros = True,Excel_path_Okay_tcp = "./positions_from_joints_19.xlsx",time_step=0.05,home_angles = [-0.207226464676801,1.5689993219813,-1.01515387451347,-2.45271819663908,2.00795352004673,1.91098991659003,-0.831045149646278])
env.robot.visual_inspection = False

#Make maxvels closer to the reality
env.max_vel = [168,151,128,94,210,48,189]
env.original_parameters_df ["max_vel"]=env.max_vel
env.modified_parameters_df ["max_vel"]=[168,151,128,94,210,48,189]

env.update_parameters_to_modify(["mass","max_vel","kp","ki","kd","force_x_one","Ixx","Iyy","Izz"])
env.robot.visual_inspection = False
env.modified_parameters_df = env.original_parameters_df


t.sleep(0.02)
action = env.action_modified()
action = np.array(action)
print(action)
reward = env.step_tcp_rishabh(action)

print("reward")
print(reward)

In [None]:
#Convert to excel
a = env.df_avg.to_numpy()
print(a[:,5])
env.df_avg.to_excel("./Original_parameters_result_tcp_euc_rishabh.xlsx")