In [409]:
import torch as tr
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
import random as rn

In [410]:
device = tr.device('cuda' if tr.cuda.is_available() else 'cpu')
# print(device)

In [411]:
# #Force Field

# X,Y = np.linspace(-0.75,0.75,43),np.linspace(-0.75,0.75,43)

# # def U(x,y,U_0=0.4): #potential function, U_0 is the amplitude, x and y are the coordinates, Gradient needs to be calculated manually in forward pass.
# #     if (x**2+y**2)**0.5<=0.5:
# #         return tr.tensor((16*U_0*((x**2+y**2)**0.5-0.25)**2),dtype=tr.half)
# #     else:
# #         return tr.tensor(0,dtype=tr.half)

# def U(x,y,U_0=0.4): #potential function, U_0 is the amplitude, x and y are the coordinates, Gradient needs to be calculated manually in forward pass.
#     if (x**2+y**2)**0.5<=0.5:
#         return 16*U_0*((x**2+y**2)**0.5-0.25)**2
#     else:
#         return 0
# # Z = U(X,Y)
# # F = tr.from_numpy(np.gradient(Z))
# # print(F.shape)

In [412]:
# #Toy Force Field
# x = np.linspace(-0.75, 0.75, 100)
# y = np.linspace(-0.75, 0.75, 100)
# X, Y = np.meshgrid(x, y)

# # Calculate the force field
# force_field_x = -np.abs(Y)
# force_field_y = np.zeros_like(X)

# # Create the meshgrid for force field
# F = tr.tensor(tr.from_numpy(np.stack((force_field_x, force_field_y), axis=2)),dtype=tr.float)

t1=tr.tensor([1,1],dtype=tr.float)
t2=tr.tensor([1],dtype=tr.float)
print(t2.shape)
t3=tr.tensor([1],dtype=tr.float)
T = tr.cat([t1,t2,t3])
print(T)

torch.Size([1])
tensor([1., 1., 1., 1.])


In [413]:
#Agent Class
class Agent(nn.Module):
    def __init__(self, x=tr.tensor([-0.5], dtype=tr.float), y=tr.tensor([0],dtype=tr.float), b=tr.tensor([0],dtype=tr.float)):#initialize agent at location (-0.5,-0.5)
        super(Agent, self).__init__()
        self.x=x #x coordinate
        self.y=y #y coordinate
        self.b=b #bias
        self.input = nn.Linear(5, 64,dtype=tr.float) #input layer
        self.hidden1 = nn.Linear(64, 64,dtype=tr.float) #hidden layer
        self.hidden2 = nn.Linear(64, 32, dtype = tr.float) #hidden layer
        self.output = nn.Linear(32, 4,dtype=tr.float) 
        self.activation = nn.ReLU() #activation function

    def forward(self, x, network = None): #forward pass
        if network is None: network = self
        x1 = self.activation(self.input(x))
        x2 = self.activation(self.hidden1(x1))
        x3 = self.activation(self.hidden2(x2))
        x4 = self.activation(self.output(x3))
        return x4
    

    def move(self,Q,e):
        if tr.rand(size=(1,))<e:
            L = tr.randint(0, 4, (1,))
        else: L = tr.argmax(Q)
        
        dx,dy = 0.,0.
        if L==0: dx = 0.0357 #up
        elif L==1: dx = -0.0357 #down
        elif L==2: dy = 0.0357 #right
        elif L==3: dy = -0.0357 #left
        else: raise Exception('Invalid action')

        if (-0.75<=self.x+dx<=0.75).item() == False:
            dx=0
        if (-0.75<=self.y+dy<=0.75).item() == False:
            dy=0
        self.x+=dx
        self.y+=dy
        # if dx == dy == 0: raise Exception('Agent did not move')
def R(x,y,F):
    dt = 1/(F+1e-8)
    return -0.5*dt/1000-0.5*tr.norm(tr.tensor([x-0.5,y]),p=2)

# def R(x,y):
#     return -tr.norm(tr.tensor([x-0.5,y]),p=2)




In [414]:
# # toy = Agent()
# # print(toy.x,toy.x.shape)
# a=tr.tensor([-0.5], dtype=tr.float)
# print(a.shape)

In [415]:
#Simulation
agent = Agent().to(device)

t_agent = Agent().to(device)
t_agent.load_state_dict(agent.state_dict())

t_agent.eval()

t_update = 100
optimizer = tr.optim.Adam(agent.parameters(), lr=0.001)
dT = 0
g = 0.8
max_steps = 2000
X,Y=[],[]
print(agent.x,agent.y)


while dT<max_steps:
    
    x0,y0 = agent.x.numpy(),agent.y.numpy()
    X.append(agent.x.numpy())
    Y.append(agent.y.numpy())
    dT+=1
    if dT%t_update==0:#update target agent every 100 steps
        t_agent.load_state_dict(agent.state_dict())
    

    s = tr.cat([agent.x, agent.y, -tr.abs(agent.y), tr.tensor(0,dtype=tr.float), agent.b]).to(device) #state
    Q1 = agent.forward(s) #Q value
    e = 1-dT/max_steps #epsilon
    agent.move(Q1,tr.tensor(e,dtype=tr.float))      	    #move agent
    x1,y1 = agent.x.numpy(),agent.y.numpy()
    if np.abs(x0-x1)>0.0357 or np.abs(y1-y0)>0.0357: raise Exception('Agent moved too far')


    s2 = tr.cat([agent.x, agent.y, -tr.abs(agent.y), tr.tensor(0,dtype=tr.float), agent.b]).to(device) #state
    Q2 = agent.forward(s2,network=t_agent) #Q value
    tQ = R(agent.x,agent.y,agent.y)+ g*tr.max(Q2)         #target Q value
    criterion = nn.MSELoss().to(device)     #1/2*(tQ-Q1)**2 #loss
    loss = criterion(tQ,tr.max(Q1))         #loss


    optimizer.zero_grad()                   #zero gradients
    loss.backward()                         #backpropagate
    optimizer.step()                        #update weights
    if (tr.round(agent.x,decimals = 1),tr.round(agent.y,decimals = 1)) == (0.5000,0.0000):
        print('success')
        break
print(agent.x,agent.y,np.var(X),np.var(Y))
# print(tr.round(agent.x,decimals = 1),tr.round(agent.y,decimals = 1))


tensor([-0.5000]) tensor([0.])


RuntimeError: zero-dimensional tensor (at position 3) cannot be concatenated

In [None]:
# X1 = tr.stack(X).numpy()
# Y1 = tr.stack(Y).numpy()

# plt.scatter(X,Y)
# plt.xlim(-0.75,0.75)
# plt.ylim(-0.75,0.75)
# plt.grid()
# plt.show()

