In [1]:
import math
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import time as t

In [2]:
import pandas as pd
import sys
from env_pybullet_gen3 import env_pybullet_kin_gen3



In [3]:
#To improve the velocity, run it on the GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Device ', device)



#Create a experiment env
env = env_pybullet_kin_gen3(no_zeros = True)
env.robot.visual_inspection = False

#Initially parameters of the urdf



print('observation space:', env.observation_space) #states, There is only 1 state constant
env.update_parameters_to_modify(["mass","max_vel","kp","ki","damping","force_x_one","Ixx","Iyy","Izz"])
print('action space:', env.action_space) #parameters, number of parameters choose to tune, continuous
print('original action:', env.action_original()) #parameters, number of parameters choose to tune, continuous




Device  cpu
hola
../Simulation_Pybullet/models/urdf/JACO3_URDF_V11.urdf
Robot launched
hola
(7, 12)
(7, 17)
observation space: 1
mass okey
max_vel okey
kp okey
ki okey
damping okey
force_x_one okey
Ixx okey
Iyy okey
Izz okey
action space: 63
original action: [1.377353, 1.163667, 1.16366, 0.930287, 0.678106, 0.678106, 0.500657, 30, 30, 30, 30, 30, 30, 30, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1, 1, 1, 1, 1, 1, 1, 0.00480078220558528, 0.008418724123621643, 0.007544516197001279, 0.0064096919604697605, 0.0016797846804208455, 0.0019375935324615593, 0.0007750385804833523, 0.004755191268457921, 0.0019202057294098781, 0.007486605057526543, 0.0013804130332615912, 0.0015062421641084327, 0.0008273237988932355, 0.0005849825981943527, 0.0022826303695446856, 0.00836116845951151, 0.0019205500000651847, 0.006517816917001926, 0.0008260694053789821, 0.0017630597813546379, 0.0009751695712112207]


In [4]:
#Cross Entrophy Method, to choose the weights

# In my case where only 1 action,and that it's apply the parameters do another step doesn't change anything due to the state doesn't vary
# For this reason max_t and gama doesn't make sense, so I set them to max_t = 1 and gamma to 0
def cem_no_net(n_iterations=600, max_t=1, gamma=0.0, print_every=100, pop_size=env.action_space, elite_frac=0.1, sigma=0.05,sigma_reduction_every_print = 0.65, per_one = True ):
    """PyTorch implementation of the cross-entropy method.
        
    Params
    ======
        n_iterations (int): maximum number of training iterations
        max_t (int): maximum number of timesteps per episode
        gamma (float): discount rate
        print_every (int): how often to print average score (over last 100 episodes)
        pop_size (int): size of population at each iteration
        elite_frac (float): percentage of top performers to use in update
        sigma (float): standard deviation of additive noise
        per_one (boolean): to determine if the output is in per one or not
    """
    #Numbers of elements that you keep as the better ones
    n_elite=int(pop_size*elite_frac)
    
    #scores doble end queee , from iterations size * 0.1
    scores_deque = deque(maxlen=int(n_iterations*0.1))
    #intial scores empty
    scores = []
    #Select a seed to make the results the same every test, not depending on the seed
    np.random.seed(0)
    #Initial best weights, are from 0 to 1, it's good to be small the weights, but they should be different from 0.
    # small to avoid overfiting , different from 0 to update them
    
    if (per_one == True):
        best_weight = sigma*np.random.randn(env.action_space)
        original_action = np.array(env.action_original())
    else:
        best_weight = np.add(sigma*np.random.randn(env.action_space),env.action_original())

    #Each iteration, modify  + (from 0 to 1) the best weight randomly
    #Computes the reward with these weights
    #Sort the reward to get the best ones
    # Save the best weights
    # the Best weight it's the mean of the best one
    #compute the main reward of the main best rewards ones
    #this it's show to evalute how good its
    
    for i_iteration in range(1, n_iterations+1):
        
        #Generate new population weights, as a mutation of the best weight to test them
        weights_pop = [best_weight + (sigma*np.random.randn(env.action_space)) for i in range(pop_size)]
        
        #Compute the parameters and obtain the rewards for each of them
        
        if (per_one == True):
            rewards=[]
            for weights in weights_pop:
                #print(weights)
                action=np.add(np.multiply(weights,original_action),original_action)
                #t.sleep(10)
                rewards.append( env.step(action) )
            rewards = np.array(rewards)
        else:
            rewards=[]
            for weights in weights_pop:
                rewards.append(env.step(weights) )
            rewards = np.array(rewards)
        print("rewards" + str(i_iteration))
        print(rewards)
        #print("\n")
        
        #Sort the rewards to obtain the best ones
        elite_idxs = rewards.argsort()[-n_elite:]
        elite_weights = [weights_pop[i] for i in elite_idxs]
        
        #Set the best weight as the mean of the best ones 
       
        best_weight = np.array(elite_weights).mean(axis=0)
        
        #Get the reward with this new weight
        if (per_one == True):
            action = np.add(np.multiply(best_weight,original_action),original_action)
            reward = env.step(action)
        else:
            reward = env.step(best_weight)
        scores_deque.append(reward)
        scores.append(reward)
        
        #save the check point
        env.save_parameters("./Parameters_train.xlsx")
        
        if i_iteration % print_every == 0:
            sigma = sigma * sigma_reduction_every_print
            print('Episode {}\tAverage Score: {:.2f}'.format(i_iteration, np.mean(scores_deque)))

        if np.mean(scores_deque)>=0.0:
            print('\nEnvironment solved in {:d} iterations!\tAverage Score: {:.2f}'.format(i_iteration-n_iterations*0.1, np.mean(scores_deque)))
            break
    return scores


In [3]:
#Execute the cross entrophy method with default Values
#scores = cem()


#To don't ask the GPU as much reduce the pop_size, it's the amount of elemts try
scores = cem_no_net()
# 
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

NameError: name 'cem_no_net' is not defined

In [3]:
# load the weights from file
# Not working know


#state = env.reset()
env = env_pybullet_kin_gen3()
env.update_parameters_to_modify(["mass","max_vel","kp","ki","damping","force_x_one","Ixx","Iyy","Izz"])
env.robot.visual_inspection = False
env.modified_parameters_df = env.create_df_from_Excel("./Parameters_train.xlsx")


t.sleep(0.02)
action = env.action_modified()
action = np.array(action)
print(action)
reward = env.step(action)

print("reward")
print(reward)
    

hola
../Simulation_Pybullet/models/urdf/JACO3_URDF_V11.urdf
Robot launched
hola
(7, 12)
(7, 17)
mass okey
max_vel okey
kp okey
ki okey
damping okey
force_x_one okey
Ixx okey
Iyy okey
Izz okey
[ 1.73949051e+00  1.52451049e+00  9.76729593e-01  1.46156033e+00
  5.60223896e-01  8.48270629e-01  5.56069238e-01  2.81556338e+01
  2.66939534e+01  2.38091661e+01  3.61842903e+01  4.46346969e+01
  3.47408646e+01  2.99176552e+01  1.22938475e-01  1.38169471e-01
  1.03923928e-01  1.60274592e-01  8.34927111e-02  1.41600808e-01
  1.22725851e-01 -6.44525815e-04  3.17577065e-03  4.34535552e-02
  6.59162066e-03  5.39533549e-02 -1.84007244e-02  2.67375295e-03
  1.22919757e-01  1.11492787e-01  1.24278760e-01  1.63993343e-02
  7.38432292e-02  6.80288522e-02  1.27088594e-01  1.13854102e+00
  7.06931285e-01  8.61842090e-01  1.16387731e+00  1.17685178e+00
  1.17303453e+00  1.41319870e+00  4.42920663e-03  7.59407199e-03
  9.39075148e-03  5.03197523e-03  2.73903810e-03  2.91892355e-03
  3.65936911e-04  4.45952280

In [4]:
#Convert to excel
a = env.df_avg.to_numpy()
print(a[:,5])
env.df_avg.to_excel("./Train_parameters_result.xlsx")

[0.03029725 0.06059015 0.09089181 0.12120369 0.15150956 0.18181692
 0.21212577 0.24243605 0.27274773 0.30306075 0.33337505 0.36369057
 0.39400725 0.42431304 0.45461908 0.48492535 0.51524176 0.54555871
 0.57586872 0.60617664 0.63648396 0.66679113 0.69709827 0.72740542
 0.75505046 0.76311252 0.7654517  0.76612155 0.76630445 0.7663453
 0.76634468 0.76633194 0.76631565 0.7662983  0.76628061 0.76626281
 0.76624494 0.76622704 0.76620911 0.76619114 0.76617314 0.76615511
 0.76613705 0.76611896 0.76610084 0.76608269 0.76606451 0.7660463
 0.76602807 0.7660098  0.7659915  0.76597318 0.76595483 0.76593645
 0.76591804 0.7658996  0.76587745 0.76587305 0.7658583  0.76584048
 0.76582175 0.76580272 0.76577892 0.76575495 0.76573671 0.76572783
 0.76571032 0.7656917  0.76567304 0.7656544  0.76563577 0.76561713
 0.76559846 0.76557978 0.76556106 0.79586842 0.82618383 0.85649941
 0.8868151  0.91713088 0.94744676 0.97776274 1.00807882 1.03839499
 1.06871126 1.09902762 1.12934407 1.15966062 1.18396198 1.191041

In [5]:
env.original_parameters_df

Unnamed: 0,mass,damping,Ixx,Iyy,Izz,kp,ki,kd,max_vel,force_x_one
0,1.377353,0,0.004801,0.004755,0.002283,0.1,0.0,0.0,30,1
1,1.163667,0,0.008419,0.00192,0.008361,0.1,0.0,0.0,30,1
2,1.16366,0,0.007545,0.007487,0.001921,0.1,0.0,0.0,30,1
3,0.930287,0,0.00641,0.00138,0.006518,0.1,0.0,0.0,30,1
4,0.678106,0,0.00168,0.001506,0.000826,0.1,0.0,0.0,30,1
5,0.678106,0,0.001938,0.000827,0.001763,0.1,0.0,0.0,30,1
6,0.500657,0,0.000775,0.000585,0.000975,0.1,0.0,0.0,30,1
