MathisWellmann · urmzd · Mar 7, 2023 · Mar 7, 2023 · Mar 8, 2023 · Mar 8, 2023
diff --git a/Cargo.toml b/Cargo.toml
@@ -17,18 +17,10 @@ rand_pcg = "0.3"
 log = "0.4.11"
 pretty_env_logger = "0.4.0"
 num = "0.3.0"
-find_folder = "0.3.0"
 failure = "0.1.8"
-plotters = "0.3.0"
 derive-new = "0.5"
 cosyne = { version = "0.3.2", optional = true }
-sdl2 = { version = "0.35", features = ["bundled", "gfx"] }
 serde = { version = "1.0", features = ["derive"] }
 derivative = { version = "2.2" }
 nalgebra = "0.31"
-ordered-float = { git = "https://github.com/urmzd/rust-ordered-float.git", features = [
-  "serde",
-  "rand",
-  "std",
-] }
 num-traits = "0.2"
diff --git a/examples/cartpole.rs b/examples/cartpole.rs
@@ -1,22 +1,19 @@
-use gym_rs::{
-    core::Env, envs::classical_control::cartpole::CartPoleEnv, utils::renderer::RenderMode,
-};
+use gym_rs::{core::Env, envs::classical_control::cartpole::CartPoleEnv};
 use log::debug;
-use ordered_float::OrderedFloat;
 use rand::{thread_rng, Rng};
 
 fn main() {
     pretty_env_logger::try_init().unwrap_or(());
 
-    let mut env = CartPoleEnv::new(RenderMode::Human);
+    let mut env = CartPoleEnv::new();
     env.reset(None, false, None);
 
     let mut rewards = vec![];
 
     for _ in 0..15 {
-        let mut current_reward = OrderedFloat(0.);
+        let mut current_reward = 0.;
 
-        for _ in 0..475 {
+        for _ in 0..500 {
             let action = (&mut thread_rng()).gen_range(0..=1);
             let state_reward = env.step(action);
             current_reward += state_reward.reward;

diff --git a/examples/mountain_car.rs b/examples/mountain_car.rs
@@ -1,13 +1,12 @@
 use gym_rs::{
     core::{ActionReward, Env},
     envs::classical_control::mountain_car::MountainCarEnv,
-    utils::renderer::RenderMode,
 };
 use rand::{thread_rng, Rng};
 
 fn main() {
     pretty_env_logger::try_init().unwrap_or(());
-    let mut mc = MountainCarEnv::new(RenderMode::Human);
+    let mut mc = MountainCarEnv::new();
     let _state = mc.reset(None, false, None);
 
     let mut end: bool = false;

diff --git a/src/core.rs b/src/core.rs
@@ -1,45 +1,28 @@
-use std::fmt::Debug;
-
-use ordered_float::OrderedFloat;
+use core::fmt;
 use rand_pcg::Pcg64;
-use serde::Serialize;
+use serde::{Serialize, de::DeserializeOwned};
 
-use crate::{
-    spaces::BoxR,
-    utils::{
-        custom::{structs::Metadata, traits::Sample, types::O64},
-        renderer::{RenderMode, Renders},
-    },
-};
+use crate::{spaces::BoxR, utils::custom::structs::Metadata};
 
 /// Defines the range of values that can be outputted by a given environment.
 const DEFAULT_REWARD_RANGE: &'static RewardRange = &(RewardRange {
-    lower_bound: OrderedFloat(f64::NEG_INFINITY),
-    upper_bound: OrderedFloat(f64::INFINITY),
+    lower_bound: f64::NEG_INFINITY,
+    upper_bound: f64::INFINITY,
 });
 
-/// Defines the render mode set by a default environment instances.
-const DEFAULT_RENDER_MODE: &'static RenderMode = &RenderMode::None;
-
 /// Defines a common set of operations available to different environments.
-pub trait Env: Clone + Debug + Serialize + EnvProperties
-where
-    Self::Observation: Sample + Into<Vec<f64>>,
-{
-    /// The type of action supported.
-    type Action;
-
-    /// The type of the observation produced after an action has been applied.
-    type Observation;
-
+pub trait Env: Clone + fmt::Debug + Serialize + EnvProperties {
     /// The type of the metadata object produced by acting on the environment.
     type Info;
 
     /// The type of the object produced when an environment is reset.
     type ResetInfo;
 
+    /// Generate an instance.
+    fn new() -> Self;
+
     /// Acts on an environment using the given action, producing a reward.
-    fn step(&mut self, action: Self::Action) -> ActionReward<Self::Observation, Self::Info>;
+    fn step(&mut self, action: usize) -> ActionReward<Self::Observation, Self::Info>;
 
     /// Resets the environment to a initial random state.
     fn reset(
@@ -49,9 +32,6 @@ where
         options: Option<BoxR<Self::Observation>>,
     ) -> (Self::Observation, Option<Self::ResetInfo>);
 
-    /// Produces the renders, if any, associated with the given mode.
-    fn render(&mut self, mode: RenderMode) -> Renders;
-
     /// Closes any open resources associated with the internal rendering service.
     fn close(&mut self);
 }
@@ -60,23 +40,27 @@ where
 pub trait EnvProperties
 where
     Self: Sized,
+    Self::Observation: Copy + Serialize + DeserializeOwned + Into<Vec<f64>>
 {
     /// The type of values that can be observed in the action space.
     type ActionSpace;
     /// The type of observations produced
     type ObservationSpace;
+    /// The state value.
+    type Observation;
+
+    /// The default score when episode terminates due to expected errors.
+    const DEFAULT_SCORE: f64;
+
+    /// The length of an episode.
+    fn episode_length() -> usize;
 
     /// Provides an object describing additional details about this environment.
     fn metadata(&self) -> &Metadata<Self>;
 
     /// Provides the random number generator responsible for seeding states.
     fn rand_random(&self) -> &Pcg64;
 
-    /// Provides the current render mode.
-    fn render_mode(&self) -> &RenderMode {
-        DEFAULT_RENDER_MODE
-    }
-
     /// Provides the range of reward values that can be outputted by this environment.
     fn reward_range(&self) -> &RewardRange {
         DEFAULT_REWARD_RANGE
@@ -87,16 +71,25 @@ where
 
     /// Provides the object describing the states that can be observed in this environment.
     fn observation_space(&self) -> &Self::ObservationSpace;
+
+    /// Set state.
+    fn set_observation(&mut self, state: Self::Observation);
+
+    /// Get partial state.
+    fn get_observation_property(&self, idx: usize) -> f64;
+
+    /// Get full state.
+    fn get_observation(&self) -> Self::Observation;
 }
 
 /// Encapsulates and describes the state update experienced by an environment after acting on an
 /// action.
-#[derive(Clone, Debug, Copy, PartialEq, Eq, Ord, PartialOrd)]
+#[derive(Clone, Debug, Copy, PartialEq, PartialOrd)]
 pub struct ActionReward<T, E> {
     /// The current observable state.
     pub observation: T,
     /// The value of the reward produced.
-    pub reward: O64,
+    pub reward: f64,
     /// Indicates whether the episode has terminated or not.
     pub done: bool,
     /// Indicates whether the episode has termianted early or  not.
@@ -106,12 +99,12 @@ pub struct ActionReward<T, E> {
 }
 
 /// Defines the bounds for the reward value that can be observed.
-#[derive(Clone, Debug, Serialize, PartialEq, Ord, PartialOrd, Eq)]
+#[derive(Clone, Debug, Serialize, PartialEq, PartialOrd)]
 pub struct RewardRange {
     /// The smallest possible reward that can be observed.
-    lower_bound: O64,
+    lower_bound: f64,
     /// The largest possible reward that can be observed.
-    upper_bound: O64,
+    upper_bound: f64,
 }
 
 /// Implement a default reward range.