In [1]:
import math
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import time as t

In [2]:
import pandas as pd
import sys
from env_pybullet_gen3 import env_pybullet_kin_gen3



In [4]:
#To improve the velocity, run it on the GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Device ', device)



#Create a experiment env
env = env_pybullet_kin_gen3()
env.robot.visual_inspection = False

#Initially parameters of the urdf



print('observation space:', env.observation_space) #states, There is only 1 state constant
env.update_parameters_to_modify(["mass","Ixx","Iyy","Izz","damping","max_vel","kp"])
print('action space:', env.action_space) #parameters, number of parameters choose to tune, continuous
print('original action:', env.action_original()) #parameters, number of parameters choose to tune, continuous




Device  cpu
hola
../Simulation_Pybullet/models/urdf/JACO3_URDF_V11.urdf
Robot launched
hola
(7, 12)
(7, 17)
observation space: 1
mass okey
Ixx okey
Iyy okey
Izz okey
damping okey
max_vel okey
kp okey
action space: 49
original action: [1.377353, 1.163667, 1.16366, 0.930287, 0.678106, 0.678106, 0.500657, 0.00480078220558528, 0.008418724123621643, 0.007544516197001279, 0.0064096919604697605, 0.0016797846804208455, 0.0019375935324615593, 0.0007750385804833523, 0.004755191268457921, 0.0019202057294098781, 0.007486605057526543, 0.0013804130332615912, 0.0015062421641084327, 0.0008273237988932355, 0.0005849825981943527, 0.0022826303695446856, 0.00836116845951151, 0.0019205500000651847, 0.006517816917001926, 0.0008260694053789821, 0.0017630597813546379, 0.0009751695712112207, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 30, 30, 30, 30, 30, 30, 30, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]


In [4]:
#Cross Entrophy Method, to choose the weights

# In my case where only 1 action,and that it's apply the parameters do another step doesn't change anything due to the state doesn't vary
# For this reason max_t and gama doesn't make sense, so I set them to max_t = 1 and gamma to 0
def cem_no_net(n_iterations=300, max_t=1, gamma=0.0, print_every=500*0.1, pop_size=20, elite_frac=0.2, sigma=0.05,sigma_reduction_every_print = 0.65, per_one = True ):
    """PyTorch implementation of the cross-entropy method.
        
    Params
    ======
        n_iterations (int): maximum number of training iterations
        max_t (int): maximum number of timesteps per episode
        gamma (float): discount rate
        print_every (int): how often to print average score (over last 100 episodes)
        pop_size (int): size of population at each iteration
        elite_frac (float): percentage of top performers to use in update
        sigma (float): standard deviation of additive noise
        per_one (boolean): to determine if the output is in per one or not
    """
    #Numbers of elements that you keep as the better ones
    n_elite=int(pop_size*elite_frac)
    
    #scores doble end queee , from iterations size * 0.1
    scores_deque = deque(maxlen=int(n_iterations*0.1))
    #intial scores empty
    scores = []
    #Select a seed to make the results the same every test, not depending on the seed
    np.random.seed(0)
    #Initial best weights, are from 0 to 1, it's good to be small the weights, but they should be different from 0.
    # small to avoid overfiting , different from 0 to update them
    
    if (per_one == True):
        best_weight = sigma*np.random.randn(env.action_space)
        original_action = np.array(env.action_original())
    else:
        best_weight = np.add(sigma*np.random.randn(env.action_space),env.action_original())

    #Each iteration, modify  + (from 0 to 1) the best weight randomly
    #Computes the reward with these weights
    #Sort the reward to get the best ones
    # Save the best weights
    # the Best weight it's the mean of the best one
    #compute the main reward of the main best rewards ones
    #this it's show to evalute how good its
    
    for i_iteration in range(1, n_iterations+1):
        
        #Generate new population weights, as a mutation of the best weight to test them
        weights_pop = [best_weight + (sigma*np.random.randn(env.action_space)) for i in range(pop_size)]
        
        #Compute the parameters and obtain the rewards for each of them
        
        if (per_one == True):
            rewards=[]
            for weights in weights_pop:
                #print(weights)
                action=np.add(np.multiply(weights,original_action),original_action)
                #t.sleep(10)
                rewards.append( env.step(action) )
            rewards = np.array(rewards)
        else:
            rewards=[]
            for weights in weights_pop:
                rewards.append(env.step(weights) )
            rewards = np.array(rewards)
        print("rewards" + str(i_iteration))
        print(rewards)
        #print("\n")
        
        #Sort the rewards to obtain the best ones
        elite_idxs = rewards.argsort()[-n_elite:]
        elite_weights = [weights_pop[i] for i in elite_idxs]
        
        #Set the best weight as the mean of the best ones 
       
        best_weight = np.array(elite_weights).mean(axis=0)
        
        #Get the reward with this new weight
        if (per_one == True):
            action = np.add(np.multiply(best_weight,original_action),original_action)
            reward = env.step(action)
        else:
            reward = env.step(best_weight)
        scores_deque.append(reward)
        scores.append(reward)
        
        #save the check point
        env.save_parameters("./Parameters_train.xlsx")
        
        if i_iteration % print_every == 0:
            sigma = sigma * sigma_reduction_every_print
            print('Episode {}\tAverage Score: {:.2f}'.format(i_iteration, np.mean(scores_deque)))

        if np.mean(scores_deque)>=0.0:
            print('\nEnvironment solved in {:d} iterations!\tAverage Score: {:.2f}'.format(i_iteration-n_iterations*0.1, np.mean(scores_deque)))
            break
    return scores


In [None]:
#Execute the cross entrophy method with default Values
#scores = cem()


#To don't ask the GPU as much reduce the pop_size, it's the amount of elemts try
scores = cem_no_net()
# 
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

rewards1
[-549.76516933 -509.27950921 -662.3293083  -635.79921062 -471.87504557
 -568.51506821 -539.0372517  -550.6221749  -499.60561635 -780.18279331
 -635.18079819 -522.83518223 -487.02913354 -673.54720792 -571.25106706
 -609.8091226  -600.99497514 -613.22867836 -481.35804925 -498.67172434]
rewards2
[-489.17380935 -492.98577264 -460.31049183 -489.88797302 -417.27393431
 -502.11261358 -423.95669219 -515.79296636 -539.76355882 -450.27200823
 -479.48200125 -448.49580772 -558.12287388 -550.81516766 -579.49074659
 -440.32773443 -510.92276915 -500.6533749  -598.98286444 -614.20132749]
rewards3
[-427.53712983 -393.95146983 -468.36832457 -441.03063659 -426.4854198
 -399.06213637 -423.32361799 -426.43973908 -408.75843787 -397.60424465
 -406.25005098 -471.18444484 -460.46999022 -457.86964597 -433.63635738
 -403.39052559 -401.35635274 -462.43425781 -387.72301194 -442.32760647]
rewards4
[-365.88605684 -408.1660785  -420.16149619 -446.49594311 -432.14967975
 -389.17222015 -441.67923751 -481.56232

rewards29
[-399.84738379 -359.29188165 -373.23679199 -358.81591688 -384.99067284
 -366.60006515 -386.79186471 -366.8993796  -396.5415447  -425.35261457
 -366.37482883 -370.29470894 -365.733986   -451.19401692 -393.0952841
 -365.63643943 -394.26626483 -356.06967981 -373.95824197 -398.48643614]
rewards30
[-367.70116505 -386.80551156 -375.70726259 -382.50604884 -411.08305435
 -373.49318419 -376.34664811 -374.76680591 -373.05336526 -357.95592174
 -371.08384195 -362.36814374 -381.29114163 -370.15673973 -359.23023199
 -462.03413432 -383.12050207 -409.54888893 -357.25830716 -391.61791793]
rewards31
[-414.96859154 -384.86548338 -395.95299051 -389.41872709 -368.19012091
 -357.61728942 -364.78879086 -412.31419897 -384.36591279 -368.92323794
 -375.66637213 -385.12584729 -374.6789927  -373.44189508 -369.71482524
 -421.38176429 -368.83638439 -374.14729636 -377.60563393 -389.52268777]
rewards32
[-422.06773201 -364.12003099 -375.94931867 -427.39476932 -402.15624895
 -376.21729657 -356.58597423 -410.6

rewards57
[-377.75949214 -381.27773495 -369.23808537 -365.45082084 -379.69197425
 -358.70935951 -369.67130689 -351.92554885 -418.68450859 -360.1889301
 -430.95520299 -351.79412184 -363.04203969 -384.66042701 -389.42768826
 -372.25831475 -362.34321908 -359.0462879  -352.96945553 -364.66561312]
rewards58
[-376.41769006 -356.2535954  -395.0380662  -370.2448721  -364.66160999
 -377.90890236 -351.1100507  -364.32160825 -365.28843866 -357.90586221
 -350.91705108 -359.60973722 -362.29105988 -361.37976026 -373.26741241
 -357.2339737  -392.93021938 -404.01808325 -355.4509444  -355.82425436]
rewards59
[-360.28276704 -355.12467648 -391.18026204 -435.56236837 -374.21947932
 -372.18150748 -363.95752951 -357.17447415 -366.85050518 -353.41244279
 -362.84335388 -368.37830731 -391.25175424 -357.52289321 -362.05700246
 -371.92525359 -363.14757293 -377.73634277 -352.70044071 -367.64233923]
rewards60
[-361.60107353 -363.65611639 -364.88144796 -369.95833058 -359.83917996
 -373.88648339 -365.64835283 -390.8

rewards85
[-366.2508006  -369.49484331 -364.52888844 -391.5520714  -369.82553414
 -365.14120289 -350.01096102 -367.26052125 -373.89665413 -356.4608933
 -360.67035934 -381.8205806  -354.39275411 -352.32760123 -358.98593018
 -353.77627316 -350.59763269 -373.38992287 -358.98425656 -365.00720308]
rewards86
[-361.25015576 -357.13085768 -366.62043972 -349.09895792 -363.58493735
 -355.31945163 -358.63963042 -362.6923025  -398.88225524 -362.3031215
 -367.07349404 -362.87867066 -355.772311   -348.89954154 -397.83030905
 -363.3650752  -372.55553005 -358.85780572 -358.54045913 -356.22093451]
rewards87
[-349.17598431 -353.26620333 -365.69571216 -400.46630325 -363.73519408
 -377.93538893 -350.05929403 -412.64179066 -367.91090455 -377.16957554
 -362.20739259 -365.51702177 -401.54383047 -369.11686665 -370.61233035
 -353.8478216  -353.43529129 -366.62154334 -366.65877831 -379.014186  ]
rewards88
[-361.53842811 -384.05454559 -351.95464778 -355.34077888 -351.10835358
 -350.22667627 -354.03531587 -365.12

rewards113
[-362.65479285 -365.47905479 -352.1961268  -353.16891793 -358.15725846
 -349.44507916 -363.46404394 -352.40002002 -351.27353534 -349.99223502
 -354.19904869 -353.05275473 -357.77632629 -360.06112155 -377.0656417
 -369.15055836 -359.85488461 -352.148766   -352.5365025  -351.6607994 ]
rewards114
[-361.07149978 -349.71205631 -351.98812295 -352.17847799 -353.36904589
 -359.46203923 -355.99775927 -360.36853231 -354.99984101 -351.55192482
 -356.3196708  -351.95534285 -363.66119004 -364.74587333 -355.93327527
 -354.5989665  -352.76125271 -351.7387384  -358.64659458 -375.22693951]
rewards115
[-352.85411706 -359.31028403 -352.96859579 -356.59612826 -360.5944712
 -356.2919404  -358.04599723 -352.2702784  -357.64116383 -355.09788311
 -375.23954216 -356.43678859 -356.46103871 -356.89211095 -355.68372093
 -364.83548293 -351.39932351 -350.55008209 -357.17230499 -364.63109717]
rewards116
[-353.00216825 -368.58576923 -356.88354728 -359.81597203 -368.27819692
 -375.52245934 -357.54311058 -35

rewards141
[-352.89058743 -359.10357636 -364.26605834 -358.82036717 -359.44759359
 -354.70889981 -352.09677398 -353.68345755 -354.88347835 -350.27885899
 -354.8491118  -351.8506094  -350.61724758 -353.84996159 -369.45609157
 -361.4283024  -352.10534727 -368.21358021 -350.50105763 -360.53648425]
rewards142
[-352.9532065  -357.47894575 -357.76025553 -355.5302502  -356.44707912
 -365.40514403 -358.55710533 -364.18664348 -371.38643736 -366.10236936
 -357.18129948 -365.44100925 -350.34616009 -349.91685897 -381.18798676
 -356.46874954 -355.48755697 -366.91414362 -363.7341657  -356.77880612]
rewards143
[-355.75338589 -353.67863681 -364.25711827 -369.29150115 -350.7706294
 -350.15795111 -359.49038121 -353.46050924 -358.15285968 -354.24480578
 -364.54996847 -354.89882798 -361.85172594 -350.74170883 -369.75255271
 -370.32663285 -350.72532612 -355.03216689 -352.06056465 -373.60188549]
rewards144
[-360.98362361 -353.11389584 -348.42203679 -352.96449001 -362.41759918
 -351.78409799 -361.17262964 -3

rewards169
[-357.88157638 -350.06376629 -359.95970165 -357.08761077 -350.25785043
 -348.56278047 -353.07994637 -353.54569009 -348.68219791 -350.86273757
 -349.37339984 -353.11049356 -351.06436907 -347.65992276 -351.51737389
 -349.51473968 -357.07066466 -350.93384895 -349.5810013  -356.12100137]
rewards170
[-349.0399109  -351.37555238 -349.74505581 -351.54988976 -347.70769309
 -350.45224662 -348.48756633 -352.17446564 -353.81733082 -352.74930022
 -351.34950184 -347.82569853 -354.48719793 -352.39941817 -352.40719991
 -351.61564851 -352.49925703 -358.4186607  -351.46271792 -351.98259833]
rewards171
[-354.31466747 -349.35278647 -348.77514225 -349.92855467 -348.60252412
 -349.58922765 -348.75084023 -349.24966015 -349.7967437  -351.30488989
 -353.68863599 -350.31748595 -349.70377657 -353.9165679  -353.26279405
 -348.47362569 -349.38703799 -348.61493991 -351.21529595 -367.39668742]
rewards172
[-349.05738836 -352.10977263 -354.4376685  -348.65684259 -350.54618569
 -352.38698964 -353.15641071 -

rewards197
[-348.4869827  -358.92219683 -349.01107485 -348.92766303 -349.3522791
 -362.39337037 -358.9571245  -350.34449818 -349.82026376 -348.86473694
 -348.82846376 -350.05920795 -353.50611239 -354.06039424 -348.73593436
 -354.30175023 -352.98996702 -351.80040253 -348.33218527 -353.21174413]
rewards198
[-370.95683519 -349.09167154 -352.97673032 -348.69399591 -351.75880192
 -348.60715201 -350.28329317 -352.42495586 -364.56889746 -350.45999665
 -349.40038195 -352.36132026 -349.93455393 -348.06589809 -349.56447266
 -348.84434089 -348.34902993 -364.51803207 -350.344157   -351.06364453]
rewards199
[-350.2022787  -355.16948184 -354.02280852 -350.54745111 -351.03760707
 -347.9351425  -348.59347004 -354.45203673 -353.21251833 -352.62482309
 -349.3636688  -351.17686383 -354.19598439 -350.01303682 -356.8373241
 -350.26494786 -353.1147069  -353.25345138 -351.74297352 -350.45840113]
rewards200
[-350.55114357 -361.30284488 -354.60007562 -349.91375833 -347.60630894
 -351.10350133 -348.05949119 -34

In [3]:
# load the weights from file
# Not working know


#state = env.reset()
env = env_pybullet_kin_gen3()
env.update_parameters_to_modify(["mass","Ixx","Iyy","Izz","damping","max_vel","kp"]) 
env.robot.visual_inspection = False
env.modified_parameters_df = env.create_df_from_Excel("./Parameters_train.xlsx")


t.sleep(0.02)
action = env.action_modified()
action = np.array(action)
print(action)
reward = env.step(action)

print("reward")
print(reward)
    

hola
../Simulation_Pybullet/models/urdf/JACO3_URDF_V11.urdf
Robot launched
hola
(7, 12)
(7, 17)
mass okey
Ixx okey
Iyy okey
Izz okey
damping okey
max_vel okey
kp okey
[7.79638167e-01 1.48334446e+00 1.15518890e+00 9.07947347e-01
 4.47264303e-01 6.52018079e-01 5.78831644e-01 6.84318560e-03
 6.45719553e-03 8.09856424e-03 4.84896446e-03 2.18423029e-03
 2.66481139e-03 6.81450446e-04 5.81072557e-03 1.38699579e-03
 1.14170823e-02 1.45373411e-03 1.75416652e-03 7.50523079e-04
 3.54149444e-04 2.51646632e-03 6.55800279e-03 1.94742661e-03
 8.15637829e-03 9.05975596e-04 1.28661440e-03 1.01790436e-03
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 2.75531405e+01
 2.60981814e+01 3.07277854e+01 3.53306620e+01 2.99576606e+01
 3.32426558e+01 2.93014952e+01 1.30122439e-01 1.17561442e-01
 8.08765193e-02 6.10829992e-02 8.21241719e-02 7.63945303e-02
 1.04560926e-01]
reward
-346.7272372200783


In [4]:
#Convert to excel
env.df_avg.to_excel("./Train_parameters_result.xlsx")