In [5]:
from gym import spaces
import numpy as np
import random
import math
from scipy.optimize import brentq
from gym.utils import seeding


class LakeDiscrete1():

    def __init__(self):
        self.b = np.array([0.42])
        self.q = np.array([2])
        self.mean = np.array([0.02])
        self.stdev = np.array([0.0017])
        self.delta = np.array([0.98])
        self.alpha = 0.4

        self.steps = 10
        self.Pcrit = brentq(lambda x: x ** self.q / (1 + x ** self.q) - self.b * x, 0.01, 1.5)
        self.natural_inflows = self.get_natural_inflows()

        self.initial_state = np.array([0., 0., 1.])

        self.action_space = spaces.Discrete(11)
        self.observation_space = spaces.Box(low=np.array([0, 0, 0]),
                                            high=np.array([2.3, 0.041, 1]), dtype=np.float)
        self.reward_space = spaces.Box(low=np.array([0, 0]),
                                       high=np.array([0.041, 0.11]), dtype=np.float)

        self.gamma = 1
        self.seed(1)

    def get_natural_inflows(self):
        return np.full(self.steps, self.mean ** 2 / math.sqrt(self.stdev ** 2 + self.mean ** 2))

    def seed(self, seed=None):
        random.seed(seed)
        np.random.seed(seed)
        self.action_space.seed(seed)
        _, seed = seeding.np_random(seed)
        return [seed]

    def reset(self):
        return self.initial_state

    def simulator(self, state, action, curr_step):
        action = action / 100
        
        prev_p = state[0]
        next_P = \
            (1 - self.b) * prev_p + prev_p ** self.q / (1 + prev_p ** self.q) + \
            action + self.natural_inflows[curr_step - 1]
        reliability = (next_P < self.Pcrit)
        utility = self.alpha * action * np.power(self.delta, (curr_step - 1))

        nextstate = np.array([next_P, utility, reliability])
        reward = utility if reliability else 0

        if curr_step == self.steps:
            absorb = True
        else:
            absorb = False

        return nextstate, reward, absorb


In [4]:
env = LakeDiscrete1()
steps = env.steps
decisions = np.array([10 for i in range(steps)])
state = env.reset()

for t in range(0, steps):
    action = decisions[t]
    nextstate, reward, terminal = env.simulator(state, action, t+1)

    print(reward)

    state = nextstate

[0.04]
[0.0392]
[0.038416]
[0.03764768]
[0.03689473]
[0.03615683]
0
0
0
0


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  high=np.array([2.3, 0.041, 1]), dtype=np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  high=np.array([0.041, 0.11]), dtype=np.float)
