Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

DRAFT: Quality of Life Updates #10

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,10 @@ rand_pcg = "0.3"
log = "0.4.11"
pretty_env_logger = "0.4.0"
num = "0.3.0"
find_folder = "0.3.0"
failure = "0.1.8"
plotters = "0.3.0"
derive-new = "0.5"
cosyne = { version = "0.3.2", optional = true }
sdl2 = { version = "0.35", features = ["bundled", "gfx"] }
serde = { version = "1.0", features = ["derive"] }
derivative = { version = "2.2" }
nalgebra = "0.31"
ordered-float = { git = "https://github.com/urmzd/rust-ordered-float.git", features = [
"serde",
"rand",
"std",
] }
num-traits = "0.2"
11 changes: 4 additions & 7 deletions examples/cartpole.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
use gym_rs::{
core::Env, envs::classical_control::cartpole::CartPoleEnv, utils::renderer::RenderMode,
};
use gym_rs::{core::Env, envs::classical_control::cartpole::CartPoleEnv};
use log::debug;
use ordered_float::OrderedFloat;
use rand::{thread_rng, Rng};

fn main() {
pretty_env_logger::try_init().unwrap_or(());

let mut env = CartPoleEnv::new(RenderMode::Human);
let mut env = CartPoleEnv::new();
env.reset(None, false, None);

let mut rewards = vec![];

for _ in 0..15 {
let mut current_reward = OrderedFloat(0.);
let mut current_reward = 0.;

for _ in 0..475 {
for _ in 0..500 {
let action = (&mut thread_rng()).gen_range(0..=1);
let state_reward = env.step(action);
current_reward += state_reward.reward;
Expand Down
3 changes: 1 addition & 2 deletions examples/mountain_car.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
use gym_rs::{
core::{ActionReward, Env},
envs::classical_control::mountain_car::MountainCarEnv,
utils::renderer::RenderMode,
};
use rand::{thread_rng, Rng};

fn main() {
pretty_env_logger::try_init().unwrap_or(());
let mut mc = MountainCarEnv::new(RenderMode::Human);
let mut mc = MountainCarEnv::new();
let _state = mc.reset(None, false, None);

let mut end: bool = false;
Expand Down
73 changes: 33 additions & 40 deletions src/core.rs
Original file line number Diff line number Diff line change
@@ -1,45 +1,28 @@
use std::fmt::Debug;

use ordered_float::OrderedFloat;
use core::fmt;
use rand_pcg::Pcg64;
use serde::Serialize;
use serde::{Serialize, de::DeserializeOwned};

use crate::{
spaces::BoxR,
utils::{
custom::{structs::Metadata, traits::Sample, types::O64},
renderer::{RenderMode, Renders},
},
};
use crate::{spaces::BoxR, utils::custom::structs::Metadata};

/// Defines the range of values that can be outputted by a given environment.
const DEFAULT_REWARD_RANGE: &'static RewardRange = &(RewardRange {
lower_bound: OrderedFloat(f64::NEG_INFINITY),
upper_bound: OrderedFloat(f64::INFINITY),
lower_bound: f64::NEG_INFINITY,
upper_bound: f64::INFINITY,
});

/// Defines the render mode set by a default environment instances.
const DEFAULT_RENDER_MODE: &'static RenderMode = &RenderMode::None;

/// Defines a common set of operations available to different environments.
pub trait Env: Clone + Debug + Serialize + EnvProperties
where
Self::Observation: Sample + Into<Vec<f64>>,
{
/// The type of action supported.
type Action;

/// The type of the observation produced after an action has been applied.
type Observation;

pub trait Env: Clone + fmt::Debug + Serialize + EnvProperties {
/// The type of the metadata object produced by acting on the environment.
type Info;

/// The type of the object produced when an environment is reset.
type ResetInfo;

/// Generate an instance.
fn new() -> Self;

/// Acts on an environment using the given action, producing a reward.
fn step(&mut self, action: Self::Action) -> ActionReward<Self::Observation, Self::Info>;
fn step(&mut self, action: usize) -> ActionReward<Self::Observation, Self::Info>;

/// Resets the environment to a initial random state.
fn reset(
Expand All @@ -49,9 +32,6 @@ where
options: Option<BoxR<Self::Observation>>,
) -> (Self::Observation, Option<Self::ResetInfo>);

/// Produces the renders, if any, associated with the given mode.
fn render(&mut self, mode: RenderMode) -> Renders;

/// Closes any open resources associated with the internal rendering service.
fn close(&mut self);
}
Expand All @@ -60,23 +40,27 @@ where
pub trait EnvProperties
where
Self: Sized,
Self::Observation: Copy + Serialize + DeserializeOwned + Into<Vec<f64>>
{
/// The type of values that can be observed in the action space.
type ActionSpace;
/// The type of observations produced
type ObservationSpace;
/// The state value.
type Observation;

/// The default score when episode terminates due to expected errors.
const DEFAULT_SCORE: f64;

/// The length of an episode.
fn episode_length() -> usize;

/// Provides an object describing additional details about this environment.
fn metadata(&self) -> &Metadata<Self>;

/// Provides the random number generator responsible for seeding states.
fn rand_random(&self) -> &Pcg64;

/// Provides the current render mode.
fn render_mode(&self) -> &RenderMode {
DEFAULT_RENDER_MODE
}

/// Provides the range of reward values that can be outputted by this environment.
fn reward_range(&self) -> &RewardRange {
DEFAULT_REWARD_RANGE
Expand All @@ -87,16 +71,25 @@ where

/// Provides the object describing the states that can be observed in this environment.
fn observation_space(&self) -> &Self::ObservationSpace;

/// Set state.
fn set_observation(&mut self, state: Self::Observation);

/// Get partial state.
fn get_observation_property(&self, idx: usize) -> f64;

/// Get full state.
fn get_observation(&self) -> Self::Observation;
}

/// Encapsulates and describes the state update experienced by an environment after acting on an
/// action.
#[derive(Clone, Debug, Copy, PartialEq, Eq, Ord, PartialOrd)]
#[derive(Clone, Debug, Copy, PartialEq, PartialOrd)]
pub struct ActionReward<T, E> {
/// The current observable state.
pub observation: T,
/// The value of the reward produced.
pub reward: O64,
pub reward: f64,
/// Indicates whether the episode has terminated or not.
pub done: bool,
/// Indicates whether the episode has termianted early or not.
Expand All @@ -106,12 +99,12 @@ pub struct ActionReward<T, E> {
}

/// Defines the bounds for the reward value that can be observed.
#[derive(Clone, Debug, Serialize, PartialEq, Ord, PartialOrd, Eq)]
#[derive(Clone, Debug, Serialize, PartialEq, PartialOrd)]
pub struct RewardRange {
/// The smallest possible reward that can be observed.
lower_bound: O64,
lower_bound: f64,
/// The largest possible reward that can be observed.
upper_bound: O64,
upper_bound: f64,
}

/// Implement a default reward range.
Expand Down
Loading