In [1]:
import numpy as np
from gym import Env
from gym.spaces import Discrete, Box
import random
import matplotlib.pyplot as plt

In [2]:
x = Discrete(5)
x.sample()

4

In [3]:
x = Box(low=0, high=1, shape=(4,))
y = x.sample()
print(y)
print(np.max(y))

z = y[0]
print(z)

[0.9870124  0.8652579  0.19092113 0.9144891 ]
0.9870124
0.9870124


In [4]:
import time

def change_variable_every_second(x):
    variable_to_change = 0

    while True:
        print("Current value:", variable_to_change)
        variable_to_change += x
        time.sleep(1)


In [5]:
class DamGateControlEnv(Env):
    def __init__(self, num_levels=16):
        self.num_levels = num_levels
        self.observation_space = Box(low=np.array([0.0]), high=np.array([500.0]))
        self.action_space = Box(low=0, high=0.95, shape=(4,), dtype=float)
        self.water_level = random.uniform(436, 439)
        self.time_second = 3600
        self.inflow_rate = random.uniform(0, 0.25)

    def step(self, action):
        #self.state = np.clip(self.state, 0, self.num_levels - 1)
        self.time_second -= 1

        outflow_rate = np.sum(action) * 0.01
        self.water_level += self.inflow_rate - outflow_rate

        # Reward based on maintaining water level within the desired range
        water_level_penalty = (438 - self.water_level)*10
        action_penalty = -0.1 * np.max(action)  # Penalty for opening more gates

        reward = water_level_penalty + action_penalty

        if self.water_level < 436:
            reward *= 0.1

            # Check if the episode is done
        done = self.water_level > 438.65

        # Add some random noise to the state
        self.water_level += random.uniform(-0.02, 0.02)

        # Update inflow rate for the next step
        self.inflow_rate = random.uniform(0, 0.25)
        
        info = {}
        return self.water_level, reward, done, info

    def render(self):
        #print(f"Current Water Level: {self.state:}")
        print(f"Water Level: {self.water_level}, inflow: {self.inflow_rate}")

    def reset(self):
        self.water_level = random.uniform(436, 439)
        self.inflow_rate = random.uniform(0, 0.25)
        return self.water_level


In [6]:
env = DamGateControlEnv()

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [8]:
import gym
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import random


class DamEnv(gym.Env):
    """Custom environment for dam spillway control."""

    metadata = {'render.modes': ['human']}

    def __init__(self):
        super(DamEnv, self).__init__()

        # Define action space (opening for each gate)
        self.action_space = gym.spaces.Box(low=0, high=9.5, shape=(4,), dtype=float)

        # Define observation space (water level, inflow rate)
        self.observation_space = gym.spaces.Box(low=435, high=440, dtype=float)

        # Initialize water level and inflow rate
        self.water_level = random.uniform(437, 439)
        self.inflow_rate = random.uniform(0, 0.25)

    def step(self, action):
        """Step the environment based on the given action."""

        # Update the water level based on action and inflow rate
        outflow_rate = np.sum(action) * 0.01
        self.water_level += self.inflow_rate - outflow_rate

        # Reward based on maintaining water level within the desired range
        water_level_penalty = (438 - self.water_level)*10
        action_penalty = -0.1 * np.max(action)  # Penalty for opening more gates

        reward = water_level_penalty + action_penalty

        if self.water_level > 438.65:
            reward -= 100

        

        # Check if water level is outside the desired range
        done = self.water_level < 436

        # Update inflow rate for the next step
        self.inflow_rate = random.uniform(0, 0.25)

        return self.water_level, reward, done, {}

    def reset(self):
        """Reset the environment to the initial state."""

        self.water_level = random.uniform(437, 439)
        self.inflow_rate = random.uniform(0, 0.3)
        return self.water_level

    def render(self, mode='human'):
        """Render the environment (optional)."""

        print(f"Water Level: {self.water_level}, inflow: {self.inflow_rate}")

env = DamEnv()


In [7]:
env.observation_space.sample()

array([240.52075], dtype=float32)

In [8]:
episodes = 10000
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score += reward
    print("Episode:{} Score:{} action{}".format(episode, score, action))

Water Level: 436.4995016507718, inflow: 0.1157993978808452
Water Level: 436.5986858561956, inflow: 0.2008865352911182
Water Level: 436.765489205744, inflow: 0.13934845957862507
Water Level: 436.8712156740783, inflow: 0.12286551575196689
Water Level: 436.9879004980218, inflow: 0.06897720107927935
Water Level: 437.056842798042, inflow: 0.2013866989151366
Water Level: 437.23915355631607, inflow: 0.22102813413431166
Water Level: 437.4453790450161, inflow: 0.07773718982850014
Water Level: 437.5021013129598, inflow: 0.13506084421042552
Water Level: 437.599627006525, inflow: 0.022386212820198675
Water Level: 437.603840729036, inflow: 0.1412435949865257
Water Level: 437.7271939263827, inflow: 0.008845714842706104
Water Level: 437.7229949756086, inflow: 0.24258828885819547
Water Level: 437.9620558654474, inflow: 0.1715371271985865
Water Level: 438.1161457081057, inflow: 0.14665134205029048
Water Level: 438.2421160100416, inflow: 0.22827160379386255
Water Level: 438.4446305264924, inflow: 0.0792

In [9]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [10]:
actions = env.action_space.shape[0]

In [11]:

states = env.observation_space.shape
print(states)

(1,)


In [12]:
def build_model(states, actions):
    model = Sequential()
    model.add(Dense(24, activation = 'relu', input_shape = states))
    model.add(Dense(24, activation = 'relu'))
    model.add(Dense(actions, activation = 'linear'))
    return model

In [21]:
del model

In [22]:
model = build_model(states, actions)

In [23]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 24)                48        
                                                                 
 dense_4 (Dense)             (None, 24)                600       
                                                                 
 dense_5 (Dense)             (None, 4)                 100       
                                                                 
Total params: 748
Trainable params: 748
Non-trainable params: 0
_________________________________________________________________


In [24]:
from rl.agents import DQNAgent

In [25]:
from rl.policy import BoltzmannQPolicy

In [26]:
from rl.memory import SequentialMemory

In [27]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit = 50000, window_length = 1)
    dqn = DQNAgent(model = model, memory=memory, policy=policy, nb_actions = actions, nb_steps_warmup = 10, target_model_update = 1e-2)
    return dqn

In [28]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr = 1e-3), metrics = ["mae"])
dqn.fit(env, nb_steps=50000, visualize = False, verbose = 1)

Training for 100000 steps ...
Interval 1 (0 steps performed)


  updates=self.state_updates,


    6/10000 [..............................] - ETA: 1:44 - reward: 2.4667    



10000 episodes - episode_reward: -10.510 [-112.281, 20.011] - loss: 844.192 - mae: 18.977 - mean_q: -8.406

Interval 2 (10000 steps performed)
10000 episodes - episode_reward: -11.517 [-112.267, 20.207] - loss: 853.548 - mae: 16.356 - mean_q: -10.156

Interval 3 (20000 steps performed)
10000 episodes - episode_reward: -11.077 [-112.150, 20.043] - loss: 859.556 - mae: 16.311 - mean_q: -10.400

Interval 4 (30000 steps performed)
10000 episodes - episode_reward: -11.608 [-112.321, 19.984] - loss: 868.785 - mae: 16.091 - mean_q: -10.514

Interval 5 (40000 steps performed)
10000 episodes - episode_reward: -11.440 [-112.427, 20.032] - loss: 873.469 - mae: 16.156 - mean_q: -10.898

Interval 6 (50000 steps performed)
10000 episodes - episode_reward: -11.534 [-112.337, 20.060] - loss: 879.622 - mae: 16.200 - mean_q: -10.974

Interval 7 (60000 steps performed)
10000 episodes - episode_reward: -11.923 [-112.283, 19.938] - loss: 883.701 - mae: 16.231 - mean_q: -10.981

Interval 8 (70000 steps perf

<keras.callbacks.History at 0x1cfe7933520>

In [29]:
dqn.save_weights('DamGateControlEnv.h5', overwrite=True)

In [None]:
# Load the saved weights
#dqn.load_weights('dqn_weights.h5')

In [47]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import random
from collections import deque

# Define the Deep Q-Network (DQN) model
def build_model(input_shape, num_actions):
    model = keras.Sequential([
        layers.Dense(24, activation='relu', input_shape=input_shape),
        layers.Dense(24, activation='relu'),
        layers.Dense(num_actions, activation='linear')
    ])
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss='mse')
    return model

# Define the Deep Q-Learning agent
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.model = build_model((state_size,), action_size)
        self.target_model = build_model((state_size,), action_size)
        self.target_model.set_weights(self.model.get_weights())
        self.memory = deque(maxlen=1000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration-exploitation trade-off
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.batch_size = 32

    def act(self, state):
        state = np.asarray(state)
        state = state  # Reshape the state for prediction
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.action_size)
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])


    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        minibatch = random.sample(self.memory, self.batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.target_model.predict(next_state.reshape(1, -1))[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)


    def target_train(self):
        self.target_model.set_weights(self.model.get_weights())

# Initialize the DQN agent
state_size = env.observation_space.shape[0] # Update with your state size
action_size = env.action_space.shape[0]  # Update with your action size
agent = DQNAgent(state_size, action_size)

# Training the DQN agent
episodes = 1000
for episode in range(episodes):
    state = env.reset()
    total_reward = 0
    for time in range(500):  # Max episode length
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        total_reward += reward
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            break
        agent.replay()
    agent.target_train()

    # Decay exploration rate
    if agent.epsilon > agent.epsilon_min:
        agent.epsilon *= agent.epsilon_decay

    print("Episode {}: Total Reward: {}".format(episode + 1, total_reward))

# Testing the trained DQN agent
state = env.reset()
total_reward = 0
for time in range(500):
    action = agent.act(state)
    next_state, reward, done, _ = env.step(action)
    total_reward += reward
    state = next_state
    if done:
        break

print("Total Reward during Testing: {}".format(total_reward))


ValueError: Error when checking input: expected dense_78_input to have 2 dimensions, but got array with shape ()

In [30]:
import gym
import numpy as np
from tensorflow.keras.models import load_model

def test_model(model_path, env_name, num_episodes=5):
    # Load the trained model
    model = load_model(model_path)

    for episode in range(num_episodes):
        # Initialize the environment
        env = env_name
        state = env.reset()
        state = np.reshape(state, [1, env.observation_space.shape[0]])

        total_reward = 0

        while True:
            # Choose the best action based on the model's prediction
            action = np.argmax(model.predict(state))

            # Take the chosen action
            next_state, reward, done, _ = env.step(action)

            # Prepare the next state for prediction
            next_state = np.reshape(next_state, [1, env.observation_space.shape[0]])

            # Accumulate the total reward
            total_reward += reward

            # Break the loop if the episode is done
            if done:
                break

            # Update the current state for the next iteration
            state = next_state

        print(f"Episode {episode + 1}, Total Reward: {total_reward}, gate_status: {action}")

if __name__ == "__main__":
    # Provide the path to your trained model (.h5 file)
    model_path = 'DamGateControlEnv.h5'

    # Specify the gym environment name
    env_name = DamGateControlEnv()

    # Number of episodes for testing
    num_episodes = 5

    test_model(model_path, env_name, num_episodes)


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


ValueError: No model config found in the file at <tensorflow.python.platform.gfile.GFile object at 0x000001CFE79017F0>.

In [31]:
total_reward = 0
state = env.reset()

while True:
    action = dqn.forward(state)
    next_state, reward, done, _ = env.step(action)
    total_reward += reward

    if done:
        break

    state = next_state


In [32]:
print(total_reward)

1.8259477214528397


In [33]:
scores = dqn.test(env, nb_episodes =100, visualize = False)
print(np.mean(scores.history('episode reward')))

Testing for 100 episodes ...
Episode 1: reward: 7.855, steps: 1
Episode 2: reward: 7.452, steps: 1
Episode 3: reward: -0.041, steps: 1
Episode 4: reward: 11.388, steps: 1
Episode 5: reward: 8.646, steps: 1
Episode 6: reward: 12.121, steps: 1
Episode 7: reward: 3.401, steps: 1
Episode 8: reward: 17.340, steps: 1
Episode 9: reward: 7.892, steps: 1
Episode 10: reward: 1.639, steps: 1
Episode 11: reward: -111.207, steps: 1
Episode 12: reward: -2.873, steps: 1
Episode 13: reward: 12.428, steps: 1
Episode 14: reward: -0.306, steps: 1
Episode 15: reward: -107.889, steps: 1
Episode 16: reward: -111.518, steps: 1
Episode 17: reward: 9.760, steps: 1
Episode 18: reward: 5.595, steps: 1
Episode 19: reward: 6.502, steps: 1
Episode 20: reward: 17.766, steps: 1
Episode 21: reward: -3.937, steps: 1
Episode 22: reward: -1.995, steps: 1
Episode 23: reward: 3.824, steps: 1
Episode 24: reward: -5.376, steps: 1
Episode 25: reward: -6.399, steps: 1
Episode 26: reward: 16.930, steps: 1
Episode 27: reward: -1

TypeError: 'dict' object is not callable

# Q-learning algorithm

In [None]:
def q_learning(env, num_episodes=1000, alpha=0.1, gamma=0.99, epsilon=0.1):
    q_table = np.zeros((env.observation_space.shape[0], env.action_space.n))
    rewards_history = []
    q_value_history = []  # New list to store Q-values for a specific state-action pair

    for episode in range(num_episodes):
        state = env.reset()
        state = int(state.item()) if hasattr(state, 'item') else int(state)
        done = False
        total_reward = 0

        while not done:
            if np.random.rand() < epsilon:
                action = env.action_space.sample()  # Exploration
            else:
                action = np.argmax(q_table[state])  # Exploitation

            next_state, reward, done, _ = env.step(action)
            next_state = int(next_state.item()) if hasattr(next_state, 'item') else int(next_state)

            # Q-value update
            q_table[state, action] += alpha * (reward + gamma * np.max(q_table[next_state]) - q_table[state, action])

            state = next_state
            total_reward += reward

        rewards_history.append(total_reward)

    # Plot the rewards and Q-values
    plt.figure(figsize=(12, 6))

    # Plot rewards
    plt.subplot(1, 2, 1)
    plt.plot(rewards_history)
    plt.xlabel('Episode')
    plt.ylabel('Total Reward')
    plt.title('Training Progress')

    # Plot Q-values for the specified state-action pair
    plt.subplot(1, 2, 2)
    plt.plot(q_value_history)
    plt.xlabel('Episode')
    plt.ylabel('Q-value')
    plt.title(f'Q-values for State-Action Pair {plot_state}')

    plt.tight_layout()
    plt.show()

    return q_table, rewards_history

In [None]:
# Function to visualize Q-values for a specific state-action pair
def visualize_q_values(q_table, state_action_pair):
    plt.plot(q_table[state_action_pair[0], state_action_pair[1]])
    plt.xlabel('Episode')
    plt.ylabel('Q-value')
    plt.title(f'Q-values for State-Action Pair {state_action_pair}')
    plt.show()

# Dam Deep Q learning

In [1]:
import gym
from gym import spaces
import pygame
import sys
import numpy as np

class BlueBoxEnv(gym.Env):
    def __init__(self):
        super(BlueBoxEnv, self).__init__()

        # Constants
        self.WIDTH, self.HEIGHT = 400, 400
        self.BLUE = (0, 0, 255)

        # Action and observation spaces
        self.action_space = spaces.Discrete(2)  # 0: No action, 1: Up, 2: Down
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.HEIGHT, self.WIDTH, 3), dtype=np.uint8)

        # Create the screen
        self.screen = pygame.display.set_mode((self.WIDTH, self.HEIGHT))
        pygame.display.set_caption("Blue Box Game")

        # Create the blue box
        self.box_width, self.box_height = 200, 0
        self.box_x, self.box_y = (self.WIDTH - self.box_width) // 2, self.HEIGHT - self.box_height - 20

        # Set up clock to control the frame rate
        self.clock = pygame.time.Clock()

    def reset(self):
        # Reset the environment
        self.box_x, self.box_y = (self.WIDTH - self.box_width) // 2, self.HEIGHT - self.box_height - 20
        self.box_height = 0
        return self._get_observation()

    def step(self, action):
        # Update the environment based on the action
        if action == 0:  # Up
            self.box_y -= 5
            self.box_height += 5
        elif action == 1:  # Down
            self.box_y += 0.005
            self.box_height -= 0.005
        elif action == 2:  # Down
            self.box_y += 0.01
            self.box_height -= 0.01
        elif action == 3:  # Down
            self.box_y += 0.015
            self.box_height -= 0.015
        elif action == 4:  # Down
            self.box_y += 0.02
            self.box_height -= 0.02

        # Cap the box height to a reasonable range
        self.box_height = np.clip(self.box_height, 10, self.HEIGHT - 20)

        # Clear the screen
        self.screen.fill((255, 255, 255))

        # Draw the blue box
        pygame.draw.rect(self.screen, self.BLUE, (self.box_x, self.box_y, self.box_width, self.box_height))

        # Update the display
        pygame.display.flip()

        # Cap the frame rate
        self.clock.tick(30)

        # Return the observation, reward, done, and info
        return self._get_observation(), 0.0, False, {}

    def render(self, mode='human'):
        # Render the environment (optional)
        pass

    def close(self):
        pygame.quit()

    def _get_observation(self):
        # Capture the current state of the screen as an observation
        return pygame.surfarray.array3d(pygame.display.get_surface())

# Example of how to use the custom environment
env = BlueBoxEnv()
observation = env.reset()

for _ in range(1000):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)

    if done:
        observation = env.reset()

env.close()


: 

# Training the agent

In [None]:
env = DamGateControlEnv()

In [None]:
trained_q_table, rewards_history = q_learning(env)

In [None]:
visualize_q_values(trained_q_table, (0, 0))

# Testing the trained agent

In [None]:
state = env.reset()
state = int(state.item()) if hasattr(state, 'item') else int(state)
done = False

In [None]:
while not done:
    action = np.argmax(trained_q_table[state])
    next_state, reward, done, _ = env.step(action)
    next_state = int(next_state.item()) if hasattr(next_state, 'item') else int(next_state)

    print(f"Current State: {state}, Action: {action}, Next State: {next_state}, Reward: {reward}")

    state = next_state

# RL with visualization

In [1]:
import gym
from gym import spaces
import pygame
import sys
import numpy as np
import sys
import random

class BlueBoxEnv(gym.Env):
    def __init__(self):
        pygame.init()
        
        super(BlueBoxEnv, self).__init__()

        self.font = pygame.font.Font(None, 36)
        self.font1 = pygame.font.Font(None, 26)
        self.font2 = pygame.font.Font(None, 20)

        # Constants
        self.WIDTH, self.HEIGHT = 1000, 500
        self.BLUE = (0, 0, 255)
        self.SILVER = (192, 192, 192)
        self.GRAY = (255/2, 255/2, 255/2)
        self.RED = (255,0,0)
        self.GREEN = (100,255,0)


        self.box1_color = self.SILVER
        self.box2_color = self.SILVER
        self.box3_color = self.SILVER
        self.box4_color = self.SILVER
        self.box5_color = self.SILVER
        self.box6_color = self.SILVER
        self.box7_color = self.SILVER
        self.box8_color = self.SILVER

        self.screen = pygame.display.set_mode((self.WIDTH, self.HEIGHT))
        pygame.display.set_caption("Blue Box Game")

        # Create the blue box
        self.box_width, self.box_height = 400, 437.95
        self.box1_width, self.box1_height = 40, 95
        self.box2_width, self.box2_height = 40, 95
        self.box3_width, self.box3_height = 40, 95
        self.box4_width, self.box4_height = 40, 95
        self.box5_width, self.box5_height = 40, 95
        self.box6_width, self.box6_height = 40, 95
        self.box7_width, self.box7_height = 40, 95
        self.box8_width, self.box8_height = 40, 95
        self.box_x, self.box_y = (self.WIDTH - self.box_width) // 2, self.HEIGHT - self.box_height - 20
        self.box1_x, self.box1_y = (self.WIDTH - self.box1_width-350) // 2, self.HEIGHT - self.box1_height - 300
        self.box2_x, self.box2_y = (self.WIDTH - self.box2_width-250) // 2, self.HEIGHT - self.box2_height - 300
        self.box3_x, self.box3_y = (self.WIDTH - self.box3_width-150) // 2, self.HEIGHT - self.box3_height - 300
        self.box4_x, self.box4_y = (self.WIDTH - self.box4_width-50) // 2, self.HEIGHT - self.box4_height - 300
        self.box5_x, self.box5_y = (self.WIDTH - self.box5_width +50) // 2, self.HEIGHT - self.box5_height - 300
        self.box6_x, self.box6_y = (self.WIDTH - self.box6_width +150) // 2, self.HEIGHT - self.box6_height - 300
        self.box7_x, self.box7_y = (self.WIDTH - self.box7_width +250) // 2, self.HEIGHT - self.box7_height - 300
        self.box8_x, self.box8_y = (self.WIDTH - self.box8_width +350) // 2, self.HEIGHT - self.box8_height - 300


        self.water_fall_start1 = -500
        self.water_fall_start2 = -500
        self.water_fall_start3 = -500
        self.water_fall_start4 = -500
        self.water_fall_start5 = -500
        self.water_fall_start6 = -500
        self.water_fall_start7 = -500
        self.water_fall_start8 = -500

        class Droplet(pygame.sprite.Sprite):
            BLUE = (0, 0, 255)

            # Create the blue box
            box_width, box_height = 400, 437.95
            box1_width, box1_height = 40, 95
            box2_width, box2_height = 40, 95
            box3_width, box3_height = 40, 95
            box4_width, box4_height = 40, 95
            box5_width, box5_height = 40, 95
            box6_width, box6_height = 40, 95
            box7_width, box7_height = 40, 95
            box8_width, box8_height = 40, 95
            ls_width, ls_height = 40, 40
            box_x, box_y = (self.WIDTH - box_width) // 2, self.HEIGHT - box_height - 20
            box1_x, box1_y = (self.WIDTH - box1_width-350) // 2, self.HEIGHT - box1_height - 300
            box2_x, box2_y = (self.WIDTH - box2_width-250) // 2, self.HEIGHT - box2_height - 300
            box3_x, box3_y = (self.WIDTH - box3_width-150) // 2, self.HEIGHT - box3_height - 300
            box4_x, box4_y = (self.WIDTH - box4_width-50) // 2, self.HEIGHT - box4_height - 300
            box5_x, box5_y = (self.WIDTH - box5_width +50) // 2, self.HEIGHT - box5_height - 300
            box6_x, box6_y = (self.WIDTH - box6_width +150) // 2, self.HEIGHT - box6_height - 300
            box7_x, box7_y = (self.WIDTH - box7_width +250) // 2, self.HEIGHT - box7_height - 300
            box8_x, box8_y = (self.WIDTH - box8_width +350) // 2, self.HEIGHT - box8_height - 300

            water_fall_start1 = -500
            water_fall_start2 = -500
            water_fall_start3 = -500
            water_fall_start4 = -500
            water_fall_start5 = -500
            water_fall_start6 = -500
            water_fall_start7 = -500
            water_fall_start8 = -500


            def __init__(self, x, y):
                super().__init__()
                self.image = pygame.Surface((1, 5))
                self.image.fill(self.BLUE)
                self.rect = self.image.get_rect()
                self.rect.center = (x, y)
                self.velocity = random.randint(1, 15)

            def update(self):
                self.rect.y += self.velocity
                if self.rect.y > 1000:
                    self.rect.y = self.box1_y +95
                    x_values = [random.randint(self.water_fall_start1, self.water_fall_start1+self.box1_width),
                                random.randint(self.water_fall_start2, self.water_fall_start2+self.box1_width),
                                random.randint(self.water_fall_start3, self.water_fall_start3+self.box1_width),
                                random.randint(self.water_fall_start4, self.water_fall_start4+self.box1_width),
                                random.randint(self.water_fall_start5, self.water_fall_start5+self.box1_width),
                                    random.randint(self.water_fall_start6, self.water_fall_start6+self.box1_width),
                                    random.randint(self.water_fall_start7, self.water_fall_start7+self.box1_width),
                                    random.randint(self.water_fall_start8, self.water_fall_start8+self.box1_width) ]
                    self.rect.x = random.choice(x_values)

    
        self.all_sprites = pygame.sprite.Group()

        # Create droplets
        for _ in range(20000):
            droplet = Droplet(random.randint(0, 0), random.randint(100, self.HEIGHT))
            self.all_sprites.add(droplet)

        self.clock = pygame.time.Clock()


        # Action and observation spaces
        self.action_space = gym.spaces.Box(low=0, high=9.5, shape=(4,))
        self.observation_space = gym.spaces.Box(low=np.array([435, 0]), high=np.array([440, 5]))

        # Initialize water level and inflow rate
        self.water_level = 438
        self.inflow_rate = np.random.uniform(0, 0.05)

    def reset(self):
        # Reset the environment

        self.box_width, self.box_height = 400, 437.95
        self.box1_width, self.box1_height = 40, 95
        self.box2_width, self.box2_height = 40, 95
        self.box3_width, self.box3_height = 40, 95
        self.box4_width, self.box4_height = 40, 95
        self.box5_width, self.box5_height = 40, 95
        self.box6_width, self.box6_height = 40, 95
        self.box7_width, self.box7_height = 40, 95
        self.box8_width, self.box8_height = 40, 95
        self.box_x, self.box_y = (self.WIDTH - self.box_width) // 2, self.HEIGHT - self.box_height - 20
        self.box1_x, self.box1_y = (self.WIDTH - self.box1_width-350) // 2, self.HEIGHT - self.box1_height - 300
        self.box2_x, self.box2_y = (self.WIDTH - self.box2_width-250) // 2, self.HEIGHT - self.box2_height - 300
        self.box3_x, self.box3_y = (self.WIDTH - self.box3_width-150) // 2, self.HEIGHT - self.box3_height - 300
        self.box4_x, self.box4_y = (self.WIDTH - self.box4_width-50) // 2, self.HEIGHT - self.box4_height - 300
        self.box5_x, self.box5_y = (self.WIDTH - self.box5_width +50) // 2, self.HEIGHT - self.box5_height - 300
        self.box6_x, self.box6_y = (self.WIDTH - self.box6_width +150) // 2, self.HEIGHT - self.box6_height - 300
        self.box7_x, self.box7_y = (self.WIDTH - self.box7_width +250) // 2, self.HEIGHT - self.box7_height - 300
        self.box8_x, self.box8_y = (self.WIDTH - self.box8_width +350) // 2, self.HEIGHT - self.box8_height - 300


        self.water_fall_start1 = -500
        self.water_fall_start2 = -500
        self.water_fall_start3 = -500
        self.water_fall_start4 = -500
        self.water_fall_start5 = -500
        self.water_fall_start6 = -500
        self.water_fall_start7 = -500
        self.water_fall_start8 = -500

        return self._get_observation()

    def step(self, action):
        
        self.box1_level, self.box2_level, self.box3_level, self.box4_level, self.box5_level, self.box6_level, self.box7_level, self.box8_level = 0,0,0,0,0,0,0,0

        self.water_fall_start1 = -500
        self.water_fall_start2 = -500
        self.water_fall_start3 = -500
        self.water_fall_start4 = -500
        self.water_fall_start5 = -500
        self.water_fall_start6 = -500
        self.water_fall_start7 = -500
        self.water_fall_start8 = -500

        if action[0] > 0:
            self.box4_level, self.box5_level = action[0]*10, action[0]*10
            self.water_fall_start4 = self.box4_x
            self.water_fall_start5 = self.box5_x

            self.box_y -= action[0] *0.001
            self.box_height += action[0] *0.001

        if action[1] > 0:
            self.box3_level, self.box6_level = action[1]*10, action[1]*10
            self.water_fall_start3 = self.box3_x
            self.water_fall_start6 = self.box6_x

            self.box_y -= action[1] *0.001
            self.box_height += action[1] *0.001

        if action[2] > 0:
            self.box2_level, self.box7_level = action[2]*10, action[2]*10
            self.water_fall_start2 = self.box2_x
            self.water_fall_start7 = self.box7_x

            self.box_y -= action[2] *0.001
            self.box_height += action[2] *0.001

        if action[3] > 0:
            self.box1_level, self.box8_level = action[3]*10, action[3]*10
            self.water_fall_start1 = self.box1_x
            self.water_fall_start8 = self.box8_x

            self.box_y -= action[3] *0.001
            self.box_height += action[3] *0.001

        
        outflow_rate = np.sum(action) * 0.001
        self.water_level += self.inflow_rate - outflow_rate


        """

        if action == 5:
            self.box4_level, self.box5_level = 25, 25

            self.box_y += 2.5 * 1
            self.box_height -= 2.5 * 1

        
        if action == 6:
            self.box3_level, self.box6_level = 25, 25

            self.box_y += 2.5 *2
            self.box_height -= 2.5 *2

        if action == 7:
            self.box2_level, self.box7_level = 25, 25

            self.box_y += 2.5 *3
            self.box_height -= 2.5 *3

        if action == 8:
            self.box1_level, self.box8_level = 25, 25

            self.box_y += 2.5 *4
            self.box_height -= 2.5 *4

        if action == 9:
            self.box4_level, self.box5_level = 47, 47

            self.box_y += 4.7 * 1
            self.box_height -= 4.7 * 1

        if action == 10:
            self.box3_level, self.box6_level = 47, 47

            self.box_y += 4.7 *2
            self.box_height -= 4.7 *2


        if action == 11:
            self.box2_level, self.box7_level = 47, 47

            self.box_y += 4.7 *3
            self.box_height -= 4.7 *3

        if action == 12:
            self.box1_level, self.box8_level = 47, 47

            self.box_y += 4.7 *4
            self.box_height -= 4.7 *4

        if action == 13:
            self.box4_level, self.box5_level = 95, 95

            self.box_y += 9.5 * 1
            self.box_height -= 9.5 * 1

        if action == 14:
            self.box3_level, self.box6_level = 95, 95

            self.box_y += 9.5 *2
            self.box_height -= 9.5 *2

        if action == 15:
            self.box2_level, self.box7_level = 95, 95

            self.box_y += 9.5 *3
            self.box_height -= 9.5 *3

        if action == 16:
            self.box1_level, self.box8_level = 95, 95

            self.box_y += 9.5 *4
            self.box_height -= 9.5 *4

        """


        # Cap the box height to a reasonable range
        self.box_height = np.clip(self.box_height, 10, self.HEIGHT - 20)

        water_level_penalty = -abs(self.water_level - 438.325)
        action_penalty = -0.1 * sum(action)  # Penalty for opening more gates

        reward = water_level_penalty + action_penalty

        if self.water_level > 438.65:
            reward = reward * 0.001

        # Check if water level is outside the desired range
        done = self.water_level < 437 or self.water_level > 438.65

        # Update inflow rate for the next step
        self.inflow_rate = np.random.uniform(0, 0.05)

        self.screen.fill((255, 255, 255))

        # Draw the blue box
        pygame.draw.rect(self.screen, self.GRAY, (self.box_x, self.box_y, self.box_width, self.box_height))
        pygame.draw.rect(self.screen, self.box1_color, (self.box1_x, self.box1_y, self.box1_width, self.box1_height))
        height_text1 = self.font1.render(f"{self.box1_level/10:.1f}", True, (0, 0, 0))
        self.screen.blit(height_text1, (self.box1_x, self.box1_y-20))
        pygame.draw.rect(self.screen, self.BLUE, (self.box1_x, self.box1_y + 95-self.box1_level, self.box1_width, self.box1_level))
        pygame.draw.rect(self.screen, self.box2_color, (self.box2_x, self.box2_y, self.box2_width, self.box2_height))
        height_text2 = self.font1.render(f"{self.box2_level/10:.1f}", True, (0, 0, 0))
        self.screen.blit(height_text2, (self.box2_x, self.box2_y-20))
        pygame.draw.rect(self.screen, self.BLUE, (self.box2_x, self.box1_y + 95-self.box2_level, self.box1_width, self.box2_level))
        pygame.draw.rect(self.screen, self.box3_color, (self.box3_x, self.box3_y, self.box3_width, self.box3_height))
        height_text3 = self.font1.render(f"{self.box3_level/10:.1f}", True, (0, 0, 0))
        self.screen.blit(height_text3, (self.box3_x, self.box3_y-20))
        pygame.draw.rect(self.screen, self.BLUE, (self.box3_x, self.box1_y + 95-self.box3_level, self.box1_width, self.box3_level))
        pygame.draw.rect(self.screen, self.box4_color, (self.box4_x, self.box4_y, self.box4_width, self.box4_height))
        height_text4 = self.font1.render(f"{self.box4_level/10:.1f}", True, (0, 0, 0))
        self.screen.blit(height_text4, (self.box4_x, self.box4_y-20))
        pygame.draw.rect(self.screen, self.BLUE, (self.box4_x, self.box1_y + 95-self.box4_level, self.box1_width, self.box4_level))
        pygame.draw.rect(self.screen, self.box4_color, (self.box5_x, self.box5_y, self.box5_width, self.box5_height))
        height_text5 = self.font1.render(f"{self.box5_level/10:.1f}", True, (0, 0, 0))
        self.screen.blit(height_text5, (self.box5_x, self.box5_y-20))
        pygame.draw.rect(self.screen, self.BLUE, (self.box5_x, self.box1_y + 95-self.box5_level, self.box1_width, self.box5_level))
        pygame.draw.rect(self.screen, self.box4_color, (self.box6_x, self.box6_y, self.box6_width, self.box6_height))
        height_text6 = self.font1.render(f"{self.box6_level/10:.1f}", True, (0, 0, 0))
        self.screen.blit(height_text6, (self.box6_x, self.box6_y-20))
        pygame.draw.rect(self.screen, self.BLUE, (self.box6_x, self.box1_y + 95-self.box6_level, self.box1_width, self.box6_level))
        pygame.draw.rect(self.screen, self.box4_color, (self.box7_x, self.box7_y, self.box7_width, self.box7_height))
        height_text7 = self.font1.render(f"{self.box7_level/10:.1f}", True, (0, 0, 0))
        self.screen.blit(height_text7, (self.box7_x, self.box7_y-20))
        pygame.draw.rect(self.screen, self.BLUE, (self.box7_x, self.box1_y + 95-self.box7_level, self.box1_width, self.box7_level))
        pygame.draw.rect(self.screen, self.box4_color, (self.box8_x, self.box8_y, self.box8_width, self.box8_height))
        height_text8 = self.font1.render(f"{self.box8_level/10:.1f}", True, (0, 0, 0))
        self.screen.blit(height_text8, (self.box8_x, self.box8_y-20))
        pygame.draw.rect(self.screen, self.BLUE, (self.box8_x, self.box1_y + 95-self.box8_level, self.box1_width, self.box8_level))

        height_text = self.font.render(f"Water Level: {self.box_height:.3f}, inflow: {self.inflow_rate}. control: {action}", True, (0, 0, 0))
        self.screen.blit(height_text, (10, 10))

        self.all_sprites.update()

        self.all_sprites.draw(self.screen)

        # Update the display
        pygame.display.flip()

        # Cap the frame rate
        self.clock.tick(30)



        #return np.array([self.water_level, self.inflow_rate]), reward, done, {}
        # Return the observation, reward, done, and info
        return self._get_observation(), reward, done, {}

    def render(self, mode='human'):
        # Render the environment (optional)
        pass

    def close(self):
        pygame.quit()

    def _get_observation(self):
        # Capture the current state of the screen as an observation
        return pygame.surfarray.array3d(pygame.display.get_surface())


env = BlueBoxEnv()
observation = env.reset()

for _ in range(1000):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)

    if done:
        observation = env.reset()

env.close()


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [14]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [15]:
actions = env.action_space.shape[0]

In [16]:

states = env.observation_space.shape
print(states)

(2,)


In [17]:
def build_model(states, actions):
    model = Sequential()
    model.add(Dense(24, activation = 'relu', input_shape = states))
    model.add(Dense(24, activation = 'relu'))
    model.add(Dense(actions, activation = 'linear'))
    return model

In [9]:
del model

In [18]:
model = build_model(states, actions)

In [19]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 24)                72        
                                                                 
 dense_7 (Dense)             (None, 24)                600       
                                                                 
 dense_8 (Dense)             (None, 4)                 100       
                                                                 
Total params: 772
Trainable params: 772
Non-trainable params: 0
_________________________________________________________________


In [20]:
from rl.agents import DQNAgent

from rl.policy import BoltzmannQPolicy

from rl.memory import SequentialMemory

def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit = 50000, window_length = 1)
    dqn = DQNAgent(model = model, memory=memory, policy=policy, nb_actions = actions, nb_steps_warmup = 10, target_model_update = 1e-2)
    return dqn

dqn = build_agent(model, actions)
dqn.compile(Adam(lr = 1e-3), metrics = ["mae"])
dqn.fit(env, nb_steps=50000, visualize = False, verbose = 1)



  super().__init__(name, **kwargs)


Training for 50000 steps ...


AttributeError: 'NoneType' object has no attribute 'get_size'

In [None]:
dqn.save_weights('dqn_weights.h5', overwrite=True)

# Load the saved weights
dqn.load_weights('dqn_weights.h5')