In [3]:
import gym
from gym import Env
from gym.spaces import MultiDiscrete, Box

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_checker import check_env

import numpy as np
import os


  from .autonotebook import tqdm as notebook_tqdm


In [350]:
from __future__ import annotations
from typing import List, Dict
import math
import numpy as np
 
class Planet:
    def __init__(self, name: str, starting_position: np.ndarray, trajectory: List):
        self.name: str = name
        self.trajectory: List = trajectory
        self.planets: List[Planet] = []
        self.starting_position: np.ndarray = starting_position
        self.position: np.ndarray = starting_position
        self.ideal_postion: np.ndarray = None
        self.next_position: np.ndarray = None
        self.distance_to_other_planets: List[np.ndarray] = []
        self.current_step = 0
        self.deviation: np.float32 = 0
        self.distance_to_ideal_position: np.float32 = 0
        self.distance_to_nearest_planet: np.float32 = 0
    
    def set_other_planets(self, other_planets: List) -> List[Planet]:
        self.planets = other_planets
        
    def get_ideal_position(self):
        ideal_position = self.trajectory[self.current_step]
        self.ideal_postion = ideal_position

    def get_direction(self):
        raise NotImplementedError

    def get_distance_to_other_planets(self) -> Dict[str, np.float32]:
        dist_dict = {}
        for planet in self.planets:
            if planet.name != self.name:
                distance = math.dist(self.position, planet.position)
                dist_dict[planet.name] = distance
        return dist_dict

    def get_next_position(self, action: int) -> np.ndarray:
        deviation = self.get_deviation(action)
        if self.deviation < 10:
            self.deviation += deviation

        if self.current_step < len(self.trajectory) - 1:
            next_y: np.float32 = self.trajectory[self.current_step+1][1] + self.deviation
            next_position = np.array([self.current_step+1, next_y], dtype=np.float32)
        else:
            next_position = self.starting_position
        return next_position

    def update_current_step(self):
        self.current_step = self.current_step + 1
    
    def get_deviation(self, action: int) -> np.float32:
        if action==1:
            deviation = -0.1
        elif action==2:
            deviation = 0.1
        else:
            deviation = 0
        return deviation

    def get_distance_to_ideal_position(self) -> np.float32:
        self.get_ideal_position()
        distance: np.float32 = math.dist(self.position, self.ideal_postion)
        return distance

    def update_position(self, action: int):
        self.position = self.get_next_position(action)
    


IndexError: list index out of range

### Creating trajectories

In [5]:
def get_point(x: float, a: float, b: float) -> np.array:

    root_number = (1-(x**2/a**2))*b**2
    if root_number < 0:
        root_number*-1
        y = np.sqrt(root_number)
        return np.array([x, -y])
    else:
        y = np.sqrt(root_number)
        return np.array([x, y]) 

In [87]:
a_1 = 32
b_1 = 4.8
trajectory_1 = []
for i in range(-32,33):
    trajectory_1.append(np.round(get_point(x=i, a=a_1, b=b_1), 2))

trajectory_earth = trajectory_1.copy()
for el in trajectory_1:
    if el[0]==32 or el[0]==-32:
        continue
    else:
        trajectory_earth.append(np.array([-el[0], -el[1]]))

a_2 = 16
b_2 = 8.9
trajectory_2 = []
for i in range(-16,17
               ):
    trajectory_2.append(np.round(get_point(x=i, a=a_2, b=b_2), 2))

trajectory_venus = trajectory_2.copy()
for el in trajectory_2:
    if el[0]==16 or el[0]==-16:
        continue
    else:
        trajectory_venus.append(np.array([-el[0], -el[1]]))

#setteing starting point to zero
new_earth_1 = trajectory_earth[32:]
new_earth_2 = trajectory_earth[0:32]
final_trajectory_earth = new_earth_1 + new_earth_2

new_venus_1 = trajectory_venus[16:]
new_venus_2 = trajectory_venus[0:16]
final_trajectory_venus = new_venus_1 + new_venus_2 + new_venus_1 + new_venus_2


### Adding the planets

In [273]:
earth = Planet(name="earth", starting_position=np.array([0, 4.8]), trajectory=final_trajectory_earth)
venus = Planet(name="venus", starting_position=np.array([0, 8.9]), trajectory=final_trajectory_venus)

In [280]:
earth.get_distance_to_other_planets()

{'venus': 4.28000020980835}

### Setting up test env

The observation space is a box of of dimension (2, 4) - two rows and four columns.<br>
The rows represent planets, and the columns represent:<br>

0. planet position, x-coordinate
1. planet position, y-coordinate
2. distance to nearest planet
3. distance to ideal position

The state of the environment is a (2, 4) np.array matching the observation space described above

In [399]:
class spaceEnv(Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 1}
    
    def __init__(self, planets: List[Planet]) -> None:
        self.action_space = MultiDiscrete([3, 3])
        self.observation_space = Box(low=-100.0, high=100.0, shape=(2, 4), dtype=np.float32)
        #set starting state
        self.state: np.ndarray = np.array(
                        [
                    [0.0, 4.8, 4.1, 0.0],
                    [0.0, 8.9 , 4.1, 0.0]          
                ], dtype=np.float32
            )
        self.planets = planets
        self.current_step = 0
        self.window = None
        self.clock = None
    
    def step(self, action: np.ndarray):
        self.state = self.calculate_state(action)
        obs: np.ndarray = self.state
        reward = self.calculate_reward()
        self.current_step+=1
        self.update_planet_steps()
        done = self.is_done()
        info = {
            "venus": {
                "name": self.planets[0].name,
                "position": self.planets[0].position,
                "distance_to_nearest": self.planets[0].distance_to_nearest_planet,
                "deviation": self.planets[0].deviation,
                "current_step": self.planets[0].current_step
            },
    
            "earth": {
                "name": self.planets[1].name,
                "position": self.planets[1].position,
                "distance_to_nearest": self.planets[1].distance_to_nearest_planet,
                "deviation": self.planets[1].deviation,
                "current_step": self.planets[1].current_step
            }, 
            "step no": self.current_step
        }

        return obs, reward, done, info

    def render(self):
        pass
    
    def reset(self):
        self.state: np.ndarray = np.array(
                        [
                    [0.0, 4.8, 4.1, 0.0],
                    [0.0, 8.9 , 4.1, 0.0]            
                ],
                dtype=np.float32
            )
        self.current_step = 0
        for planet in self.planets:
            planet.position = planet.starting_position
            planet.deviation = 0.0
        return self.state

    def calculate_reward(self):
        reward = 0
        for planet in self.planets:
            if np.abs(planet.distance_to_nearest_planet) > 5:
                reward+=0
                #print("distance_greater than five")
            else:
                reward-=planet.distance_to_nearest_planet
                #print("distance less than five")
                #print(f"distanceto_idealposition for {planet} is {planet.distance_to_ideal_position}")
            if np.abs(planet.distance_to_ideal_position) > 0:
                reward-=np.abs(planet.distance_to_ideal_position)
                #print("distance to ideal less than five")
            else:
                reward+=1
            if np.abs(planet.distance_to_nearest_planet) < 0.1:
                print("collision")
                reward-=100
                
        return reward

    def calculate_state(self, action: np.ndarray) -> np.ndarray:
        positions = []
        for action_number, planet in enumerate(self.planets):
            planet.update_position(action[action_number])
            positions.append(planet.position)
            planet.update_current_step()

        distances = []
        for planet in self.planets:
            distance_to_nearest_planet: np.float32 = min(planet.get_distance_to_other_planets().values())      
            planet.distance_to_nearest_planet = distance_to_nearest_planet
            distance_to_ideal_position = planet.get_distance_to_ideal_position()
            distances.append([
                distance_to_nearest_planet,
                distance_to_ideal_position
            ])
        
        state_list = []
        for i in range(len(self.planets)):
            row = [positions[i][0], positions[i][1], distances[i][0], distances[i][1]]
            state_list.append(row)
        
        return np.array(state_list, dtype=np.float32)
    
    def update_planet_steps(self):
        for planet in self.planets:
            if planet.current_step < len(planet.trajectory) - 1:
                planet.current_step = self.current_step
    
    def is_done(self):
        if self.current_step==127:
            done = True
        else:
            done = False
        return done
        
    

### Test env

In [278]:
earth.get_distance_to_other_planets()

{}

In [347]:
for action, planet in enumerate(planets):
    print(action)

0
1


In [400]:
planets = [earth, venus]

earth.set_other_planets(planets)
venus.set_other_planets(planets)

env = spaceEnv(planets=[venus, earth])
env.reset()

array([[0. , 4.8, 4.1, 0. ],
       [0. , 8.9, 4.1, 0. ]], dtype=float32)

In [401]:
check_env(env, warn=True)



In [349]:
type(env.action_space.sample())

numpy.ndarray

In [None]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        #env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{} info{}'.format(episode, score, info))
env.close()

In [340]:
venus.trajectory[8]

array([8.  , 7.71])

In [371]:
venus.trajectory[14]

array([14.  ,  4.31])

In [411]:
env.reset()

array([[0. , 4.8, 4.1, 0. ],
       [0. , 8.9, 4.1, 0. ]], dtype=float32)

In [398]:
earth.deviation


-13.999999999999966

In [444]:
env.step([0, 0])

(array([[33.  , -8.88,  7.69, 34.  ],
        [33.  , -1.19,  7.69,  2.  ]], dtype=float32),
 2,
 False,
 {'venus': {'name': 'venus',
   'position': array([33.  , -8.88], dtype=float32),
   'distance_to_nearest': 7.690000057220459,
   'deviation': 0.0,
   'current_step': 33},
  'earth': {'name': 'earth',
   'position': array([33.  , -1.19], dtype=float32),
   'distance_to_nearest': 7.690000057220459,
   'deviation': 0.0,
   'current_step': 33},
  'step no': 33})