In [1]:
from gym import Env
import gym
import pygame
from gym.spaces import Discrete, Box,Dict
import numpy as np
import random

In [162]:
class AirTraffic():
    def __init__(self,planes:int = 2, grid_size:list = [5,5],radius:int = 2):
        self.radius = radius
        self.planes = planes
        self.GRID_DIM = grid_size
        
        self._action_to_direction = {
            0: np.array([-1, 0]),
            1: np.array([-1, 1]),
            2: np.array([0,  1]),
            3: np.array([-1 , -1]),
            4: np.array([0 , -1]),
            
        }        
    
        self._ACTIONLOOKUP = {
            0: 'Up',
            1: 'front right',
            2: 'right',
            3: 'front left',
            4: 'left'
            }
        self.GRID_DIM = np.array(self.GRID_DIM)
        self.GRID = np.zeros(self.GRID_DIM)
        self.action_space = Discrete(len(self._ACTIONLOOKUP.keys()))
        self.state_space = Discrete(self.GRID_DIM[0]*self.GRID_DIM[1])
        self._ob_space = {}
        for p in range(self.planes):
            self._ob_space[f'plane{p}'] = Box(np.array([0,0]), np.array([self.GRID_DIM[0]-1,self.GRID_DIM[1] - 1]), shape=(2,), dtype=int)
            self._ob_space[f'dest{p}'] = Box(np.array([0,0]), np.array([self.GRID_DIM[0]-1,self.GRID_DIM[1] - 1]), shape=(2,), dtype=int)
        
        self.observation_space = Dict(self._ob_space)
        self._agent_location = np.array([np.array([np.random.randint(self.GRID_DIM[0]),np.random.randint(self.GRID_DIM[1])]) for p in range(self.planes) ])
        self._target_location = np.array([np.array([np.random.randint(self.GRID_DIM[0]),np.random.randint(self.GRID_DIM[1])]) for p in range(self.planes)])
        
    def step(self,action):
        self._prev_agent_location = [None]*self.planes ##### Find the vector of agent from destination to find
        moved_plane = [False]*self.planes

#             for plane,action in enumerate(action): 
        moved_plane = self._move(action)
        done,reward =  self._is_over()
        observation = self._get_obs()
        info = self._get_info()
        return observation, reward, done, info
    def _move(self, action):
        self._prev_agent_location = self._agent_location
        for plane,act in enumerate(action):
            self._agent_location[plane] = np.clip(self._agent_location[plane] + self._action_to_direction[act],[0,0],[self.GRID_DIM[0] - 1, self.GRID_DIM[1] - 1])
        return   self._prev_agent_location== self._agent_location
    def _is_over(self):
        done = (self._agent_location==self._target_location).all(axis = 1)
        reward = self._get_reward()
        return done, reward
    def _get_reward(self):
        reward = []
        # reward will be the minimum of intruder and todestination
        distance, closest_dist = self._get_info()
        for index,plane in enumerate(range(self.planes)):

#             closest_dist = np.min(np.linalg.norm([self._agent_location[plane]]*self.planes-self._agent_location,axis = 1))
            self._intruder = -(self.radius **2 - closest_dist[plane]**2)/(self.radius**2/500)
            self._todestination = 100 - distance[plane]#np.linalg.norm(self._target_location[plane]-self._agent_location[plane])
            print(self._intruder,self._todestination)
            reward.append(np.min([self._intruder,self._todestination]))
        return reward
    def _get_obs(self):
        return {'planes': self._agent_location,'destinations':self._target_location}
    def _get_info(self):
        closest_dist = []
        distance = []
        for index,plane in enumerate(range(self.planes)):
            closest_dist .append(np.min(np.linalg.norm([self._agent_location[plane]]*self.planes-self._agent_location,axis = 1)))
#             self._intruder = -(self.radius **2 - closest_dist**2)/(self.radius**2/500)
            distance .append(np.linalg.norm(self._target_location[plane]-self._agent_location[plane]))
#             print(self._intruder,self._todestination)
        return distance,closest_dist
    def render(self):
        print(self._agent_location)
        rend = self.GRID.copy().astype(dtype = 'U2')
        for plane in range(self.planes):
            rend[self._agent_location[plane][0],self._agent_location[plane][1]] = f'p{plane}'
            rend[self._target_location[plane][0],self._target_location[plane][1]] = f'd{plane}'
#         rend[self._agent_location[:,0],np.array(self._agent_location)[:,1]] = 'p'
#         rend[np.array(self._target_location)[:,0],np.array(self._target_location)[:,1]] = 'd'
        return print(rend)
# if intruser dist < 50rew = intruder if >50 rew = distance dfrom dest 
        
env = AirTraffic()

In [163]:
env.render()

[[3 1]
 [1 0]]
[['0.' '0.' '0.' '0.' '0.']
 ['p1' '0.' '0.' '0.' '0.']
 ['0.' '0.' '0.' '0.' '0.']
 ['0.' 'p0' '0.' '0.' '0.']
 ['0.' 'd1' '0.' '0.' 'd0']]


In [164]:
env.step([0,0])
env.render()

-500.0 96.394448724536
-500.0 95.87689437438235
[[2 1]
 [0 0]]
[['p1' '0.' '0.' '0.' '0.']
 ['0.' '0.' '0.' '0.' '0.']
 ['0.' 'p0' '0.' '0.' '0.']
 ['0.' '0.' '0.' '0.' '0.']
 ['0.' 'd1' '0.' '0.' 'd0']]


In [None]:
env.step([2,])
env.render()

In [43]:
a = [[1,1],[2,2],[3,6]]
b = np.array([[2,3],[2,5],[2,6]])
np.min(np.linalg.norm(a-b,axis=1))

1.0

In [79]:
c = np.zeros([8,8])
c[b[:,0],b[:,1]] = 1
c

array([[0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 1., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.]])

In [114]:
x = [np.array([np.random.randint(4),np.random.randint(5)]) for p in range(2)]
x
y = [np.array([np.random.randint(4),np.random.randint(5)]) for p in range(2)]
y

[array([3, 2]), array([2, 2])]

In [107]:
x+y

[array([0, 1]), array([1, 4]), array([1, 1]), array([2, 2])]

In [120]:
y = np.array(y)
x = np.array(x)
y[0]

array([3, 2])

In [117]:
x

array([[0, 1],
       [0, 1]])

In [158]:
(x==y).all(axis = 1)

array([False, False])