In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
#matplotlib.use("Agg")
from Utils import *
from Robot import Robot
import torch
import math
import torch.nn as nn
import torch.nn.functional as F
import random
from random import gauss,uniform
from random import seed
from Networks import Actor, Critic

In [None]:
seed(1821)

DH_params=np.array([[np.pi/2.0,0.0,0.4],
                    [0.0,0.4,0.0],
                    [0.0,0.4,0.0],
                   ])
q0=np.zeros(DH_params.shape[0])
robot=Robot(DH_params,q0)
robot.kinematics()
#robot.x0=[0,0,0,0,0,0,0,0,0]
robot.goal=np.array([0.0,-0.8,0.4])

In [None]:
P_net = Actor().cuda()
Pt_net= Actor().cuda()
Q_net = Critic().cuda()
Qt_net= Critic().cuda()

for Qt_param, Q_param in zip(Qt_net.parameters(), Q_net.parameters()):
    Qt_param.data.copy_(Q_param.data)
for Pt_param, P_param in zip(Pt_net.parameters(), P_net.parameters()):
    Pt_param.data.copy_(P_param.data)

Q_loss = torch.nn.MSELoss(reduction='mean').cuda()
Q_optimizer = torch.optim.Adam(Q_net.parameters(), lr=1e-3, weight_decay=1e-2)
P_optimizer = torch.optim.Adam(P_net.parameters(), lr=1e-4)

Q_Scheduler = torch.optim.lr_scheduler.StepLR(Q_optimizer, step_size=5000, gamma=0.1,last_epoch=-1)
P_Scheduler = torch.optim.lr_scheduler.StepLR(P_optimizer, step_size=5000, gamma=0.1,last_epoch=-1)


batch_size=32
max_size=1000000
D=Replay_Buffer(batch_size,max_size)
tau=0.01
gamma=torch.FloatTensor([0.99]).cuda()
Episodes=20000
Warm_Up=5000
T=750
robot.step_size=0.01
eps = np.finfo(float).eps
best_score=-float('Inf')
episode=0
noise_std=5.0

Success_Rate=[]

In [None]:
for episode in range(episode,Episodes):
   # print(episode)
   
    buff=buffer()
    
    robot.goal=np.array([uniform(-0.1,0.1),uniform(-0.5,-0.7),uniform(0.1,0.3)])
    robot.x0=np.array([0.,0.,0.,0.,0.,0.,0.,0.,0.])
    robot.q=robot.x0[3:6]
    robot.kinematics()
    s1 = [gauss(0.0, noise_std) for i in range(T)]
    s2 = [gauss(0.0, noise_std) for i in range(T)]
    s3 = [gauss(0.0, noise_std) for i in range(T)]
    for t in range(T):
        P_net.eval()
        state=torch.FloatTensor(np.append(np.append(np.append(robot.x0[0:6],robot.acc),robot.goal),robot.end_effector()))
        u=P_net(state.unsqueeze_(0).cuda())
        u=torch.clamp(u+torch.FloatTensor([s1[t],s2[t],s3[t]]).cuda(),min=-1.0, max=1.0)
        robot.get_input(u.cpu().data[0].numpy())
        sol,r,d=robot.step()
        buff.update(sol,r)
        next_state=torch.FloatTensor(np.append(np.append(np.append(robot.x0[0:6],robot.acc),robot.goal),robot.end_effector()))
        D.store((state.cpu().data[0].numpy(),u.cpu().data[0].numpy(),r,d,next_state.cpu().data.numpy()))
        if (d==1):
            break
        if (D.size>Warm_Up):
            P_net.train()
            state,a,r,d,next_state=D.get_batch()
            state=state.cuda()
            next_state=next_state.cuda()
            a=a.cuda()
            r=r.cuda()
            d=d.cuda()
            y_t=r+gamma*(1.0-d)*Qt_net((next_state,Pt_net(next_state)))
            y_t=y_t.detach()
            #Qloss
            Q_loss(y_t,Q_net((state,a))).backward(retain_graph=True)
            #Ploss
            U=P_net(state)
            #+0.001*U.norm(dim=1,keepdim=True)
            (-Q_net((state,U ))).mean().backward(retain_graph=True)
            P_optimizer.step()
            P_optimizer.zero_grad()
            Q_optimizer.step()
            Q_optimizer.zero_grad()
            for Qt_param, Q_param in zip(Qt_net.parameters(), Q_net.parameters()):
                Qt_param.data.copy_((1.0 - tau)*Qt_param.data + tau*Q_param.data)
            for Pt_param, P_param in zip(Pt_net.parameters(), P_net.parameters()):
                Pt_param.data.copy_((1.0 - tau) * Pt_param.data + tau*P_param.data)
    #if ((episode)%100==0 and D.size>Warm_Up):
    #    print(episode)
    #    robot.show(buff)
    #    robot.plot_sol(buff)
    if (((episode+1)%1000)==0 and D.size>Warm_Up):
        print("Test for episode =",episode)
        tests_no=100
        success_rate=0.0
        for i in range(tests_no):
            robot.goal=np.array([uniform(-0.1,0.1),uniform(-0.5,-0.7),uniform(0.1,0.3)])            
            buff=buffer()
            robot.x0=np.array([0.,0.,0.,0.,0.,0.,0.,0.,0.])
            robot.q=robot.x0[3:6]
            robot.kinematics()
            P_net.eval()
            for t in range(T):
                state=torch.FloatTensor(np.append(np.append(np.append(robot.x0[0:6],robot.acc),robot.goal),robot.end_effector()))
                u=P_net(state.unsqueeze_(0).cuda())
                robot.get_input(u.cpu().data[0].numpy())
                sol,r,d=robot.step()
                buff.update(sol,r)
                if (robot.Distance()<0.05):
                    success_rate=success_rate+1.0    
                    break
            #robot.show(buff)
            #robot.plot_sol(buff)    
            #filename="Animations/Animation_Episode_"+str(episode)+"test_"+str(i)+".mp4"
            #robot.animate(buff,filename)
            #print("Animation saved for episode = ",episode) 
        success_rate=100.0*(success_rate/tests_no)   
        Success_Rate.append(success_rate)
        filename="States/State_"+str(episode)+".pth"
        Save_States(P_net,Pt_net,Q_net,Qt_net,P_optimizer,Q_optimizer,episode,batch_size,buff.r,filename)
        print("Episode = {} , Success_Rate = {} ".format(episode,success_rate))        
        plt.figure()
        plt.title(r"$Success-Rate$")
        plt.plot(Success_Rate,"-*")
        plt.xlabel(r"$step$")
        plt.show()
    if ((episode%5000)==0 and D.size>Warm_Up):            
        Q_Scheduler.step()
        P_Scheduler.step()
        noise_std=0.5*noise_std