In [1]:
import gym
from gym import Env
from gym.spaces import MultiDiscrete, Box

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

import numpy as np
import os


  from .autonotebook import tqdm as notebook_tqdm


In [86]:
from __future__ import annotations
from typing import List, Dict
import math
import numpy as np
 
class Planet:
    def __init__(self, name: str, position: np.array, trajectory: List):
        self.name: str = name
        self.trajectory: List = trajectory
        self.planets: List[Planet] = []
        self.position: np.array = position
        self.ideal_postion: np.array = None
        self.next_position: np.array = None
        self.distance_to_other_planets: List[np.array] = []
        self.current_step = 0
        self.deviation: np.float32 = 0
        self.distance_to_ideal_position: np.float32 = 0
    
    def set_other_planets(self, other_planets: List) -> List[Planet]:
        self.planets = other_planets
        
    def get_ideal_position(self):
        ideal_position = self.trajectory[self.current_step]
        self.ideal_postion = ideal_position

    def get_direction(self):
        raise NotImplementedError

    def get_distance_to_other_planets(self) -> Dict[str, np.float32]:
        dist_dict = {}
        for planet in self.planets:
            if planet.name != self.name:
                distance = math.dist(self.position, planet.position)
                dist_dict[planet.name] = distance
        return dist_dict

    def get_next_position(self, action) -> np.array:
        deviation = self.get_deviation(action)
        self.deviation += deviation
        next_y = self.trajectory[self.current_step+1] + self.deviation
        next_position = np.array([self.current_step+1, next_y])
        return next_position

    def update_current_step(self):
        self.current_step = self.current_step + 1
    
    def get_deviation(self, action: int) -> np.float32:
        if action==1:
            deviation = -0.1
        elif action==2:
            deviation = 0.1
        else:
            deviation==0
        return deviation

    def get_distance_to_ideal_position(self) -> np.float32:
        self.get_ideal_position()
        distance: np.float32 = math.dist(self.position, self.ideal_postion)
        return distance

    def update_position(self, action):
        self.position = self.get_next_position(action)
    


### Creating trajectories

In [27]:
def get_point(x: float, h: float, w: float) -> np.array:
    root_number = 1-(x**2/w**2)
    if root_number < 0:
        root_number*-1
        y = h*np.sqrt(root_number)
        return np.array([x, -y])
    else:
        y = h*np.sqrt(root_number)
        return np.array([x, y]) 

In [28]:
h_1 = 5
w_1 = 30
trajectory_1 = []
for i in range(-30,31):
    trajectory_1.append(get_point(x=i, h=h_1, w=w_1))

trajectory_earth = trajectory_1.copy()
for el in trajectory_1:
    if el[0]==30 or el[0]==-30:
        continue
    else:
        trajectory_earth.append(np.array([-el[0], -el[1]]))
    

h_2 = 10
w_2 = 15
trajectory_2 = []
for i in range(-30,31
               ):
    trajectory_2.append(get_point(x=i/2, h=h_2, w=w_2))

trajectory_venus = trajectory_2.copy()
for el in trajectory_2:
    if el[0]==15 or el[0]==-15:
        continue
    else:
        trajectory_venus.append(np.array([-el[0], -el[1]]))




### Adding the planets

In [82]:
earth = Planet(name="earth", position=np.array([-30, 0]), trajectory=trajectory_earth)
venus = Planet(name="venus", position=np.array([-15, 0]), trajectory=trajectory_venus)

In [83]:
planets = [earth, venus]

earth.set_other_planets(planets)
venus.set_other_planets(planets)


In [65]:
venus.get_distance_to_other_planets()

{'earth': 15.0}

### Setting up env shell

The observation space is a box of of dimension (2, 4) - two rows and four columns.<br>
The rows represent planets, and the columns represent:<br>

0. planet position, x-coordinate
1. planet position, y-coordinate
2. distance to nearest planet
3. distance to ideal position

The state of the environment is a (2, 4) np.array matching the observation space described above

In [89]:
class spaceEnv(Env):
    def __init__(self, planets: List[Planet]) -> None:
        self.action_space = MultiDiscrete([3, 3])
        self.observation_space = Box(low=-40.0, high=40.0, shape=(2, 4), dtype=np.float32)
        #set starting state
        self.state = np.array(
                        [
                    [-15.0, 0.0, 15.0, 0.0],
                    [-30.0, 0.0 , 15.0, 0.0]          
                ]
            )
        self.planets = planets
    
    def step(self, action):
        self.state = self.calculate_state(action)
        return self.state, reward, done, info

    def render(self):
        pass
    
    def reset(self):
        pass

    def calculate_reward(self):
        pass

    def calculate_state(self, action) -> np.array:
        positions = []
        for action, planet in enumerate(self.planets):
            step = planet.current_step + 1
            x = planet.trajectory[step][0]
            y = planet.get_next_position(action)
            planet.update_position(action)
            positions.append([x, y])

        distances = []
        for planet in self.planets:
            distance_to_nearest_planet: np.float32 = min(planet.get_distance_to_other_planets().values())      
            distance_to_ideal_position = planet.get_distance_to_ideal_position()
            distances.append([
                distance_to_nearest_planet,
                distance_to_ideal_position
            ])
        
        state_list = []
        for i in range(len(self.planets)):
            row = [positions[i][0], positions[i][1], distances[i][0], distances[i][1]]
            state_list.append(row)
        
        return np.array(state_list)
        

In [85]:
for action, planet in enumerate(planets):
    print(action)
    print(planet.name)

0
earth
1
venus


In [25]:
state = Box(low=-40.0, high=40.0, shape=(2, 3), dtype=np.float32)


array([[  7.8146143,  -2.3182938, -36.068714 ],
       [-31.500095 ,   0.4114863, -13.385839 ]], dtype=float32)

In [88]:
np.array([
    [0,2],
    [0, 4]
])

array([[0, 2],
       [0, 4]])

### Helper funtions

In [80]:
acti = MultiDiscrete([3, 3])
tester = acti.sample()
tester[1]

1