In [None]:
import numpy as np
from gym import Env
from gym.spaces import Discrete, Box
import random
import matplotlib.pyplot as plt

In [None]:
x = Discrete(5)
x.sample()

In [None]:
import time

def change_variable_every_second(x):
    variable_to_change = 0

    while True:
        print("Current value:", variable_to_change)
        variable_to_change += x
        time.sleep(1)


In [12]:
class DamGateControlEnv(Env):
    def __init__(self, num_levels=16):
        self.num_levels = num_levels
        self.observation_space = Box(low=np.array([0.0]), high=np.array([500.0]))
        self.action_space = Discrete(num_levels)
        self.state = 438 + random.uniform(-3, 3)
        self.time_second = 3600

    def step(self, action):
        self.state -= action * 0.001
        #self.state = np.clip(self.state, 0, self.num_levels - 1)
        self.time_second -= 1

        reward = 0

        if self.state <= 438.65:
            reward += ((3600 - self.time_second)/3600)*16 - (action * 0.5)
        else:
            reward = -1

            # Check if the episode is done
        done = self.time_second <= 0

        # Add some random noise to the state
        self.state += random.uniform(-0.02, 0.02)
        
        info = {}
        return self.state, reward, done, info

    def render(self):
        print(f"Current Water Level: {self.state:}")

    def reset(self):
        self.state = 438 + random.uniform(-3, 3)
        return self.state


In [31]:
import gym
from gym import spaces
import pygame
import sys
import numpy as np

class BlueBoxEnv(gym.Env):
    def __init__(self):
        super(BlueBoxEnv, self).__init__()

        # Constants
        self.WIDTH, self.HEIGHT = 400, 400
        self.BLUE = (0, 0, 255)

        # Action and observation spaces
        self.action_space = spaces.Discrete(2)  # 0: No action, 1: Up, 2: Down
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.HEIGHT, self.WIDTH, 3), dtype=np.uint8)

        # Create the screen
        self.screen = pygame.display.set_mode((self.WIDTH, self.HEIGHT))
        pygame.display.set_caption("Blue Box Game")

        # Create the blue box
        self.box_width, self.box_height = 200, 0
        self.box_x, self.box_y = (self.WIDTH - self.box_width) // 2, self.HEIGHT - self.box_height - 20

        # Set up clock to control the frame rate
        self.clock = pygame.time.Clock()

    def reset(self):
        # Reset the environment
        self.box_x, self.box_y = (self.WIDTH - self.box_width) // 2, self.HEIGHT - self.box_height - 20
        self.box_height = 0
        return self._get_observation()

    def step(self, action):
        # Update the environment based on the action
        if action == 0:  # Up
            self.box_y -= 5
            self.box_height += 5
        elif action == 1:  # Down
            self.box_y += 0.005
            self.box_height -= 0.005
        elif action == 2:  # Down
            self.box_y += 0.01
            self.box_height -= 0.01
        elif action == 3:  # Down
            self.box_y += 0.015
            self.box_height -= 0.015
        elif action == 4:  # Down
            self.box_y += 0.02
            self.box_height -= 0.02

        # Cap the box height to a reasonable range
        self.box_height = np.clip(self.box_height, 10, self.HEIGHT - 20)

        # Clear the screen
        self.screen.fill((255, 255, 255))

        # Draw the blue box
        pygame.draw.rect(self.screen, self.BLUE, (self.box_x, self.box_y, self.box_width, self.box_height))

        # Update the display
        pygame.display.flip()

        # Cap the frame rate
        self.clock.tick(30)

        # Return the observation, reward, done, and info
        return self._get_observation(), 0.0, False, {}

    def render(self, mode='human'):
        # Render the environment (optional)
        pass

    def close(self):
        pygame.quit()

    def _get_observation(self):
        # Capture the current state of the screen as an observation
        return pygame.surfarray.array3d(pygame.display.get_surface())

# Example of how to use the custom environment
env = BlueBoxEnv()
observation = env.reset()

for _ in range(1000):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)

    if done:
        observation = env.reset()

env.close()


: 

In [13]:
env = DamGateControlEnv()

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [14]:
env.observation_space.sample()

array([404.55298], dtype=float32)

In [15]:
episodes = 10000
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        env.render()
        action = env.observation_space.sample()
        n_state, reward, done, info = env.step(action)
        score += reward
    print("Episode:{} Score:{}".format(episode, score))

Current Water Level: 437.17208053936736
Current Water Level: [436.82623]
Current Water Level: [436.39896]
Current Water Level: [436.22287]
Current Water Level: [436.14944]
Current Water Level: [435.92783]
Current Water Level: [435.8477]
Current Water Level: [435.65027]
Current Water Level: [435.41583]
Current Water Level: [435.2201]
Current Water Level: [434.73495]
Current Water Level: [434.33038]
Current Water Level: [434.1064]
Current Water Level: [434.0365]
Current Water Level: [433.8771]
Current Water Level: [433.79492]
Current Water Level: [433.48752]
Current Water Level: [433.36496]
Current Water Level: [432.8649]
Current Water Level: [432.6896]
Current Water Level: [432.6381]
Current Water Level: [432.45728]
Current Water Level: [432.4074]
Current Water Level: [432.25177]
Current Water Level: [431.74573]
Current Water Level: [431.2885]
Current Water Level: [430.99298]
Current Water Level: [430.85437]
Current Water Level: [430.6498]
Current Water Level: [430.6297]
Current Water L

: 

In [1]:
!pip install tensorflow

^C


In [None]:
!pip install --upgrade pip

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [None]:
actions = env.action_space.n

In [None]:
states = env.observation_space.shape

In [None]:
def build_model(states, actions):
    model = Sequential()
    model.add(Dense(24, activation = 'relu', input_shape = states))
    model.add(Dense(24, activation = 'relu'))
    model.add(Dense(actions, activation = 'linear'))
    return model

In [None]:
model = build_model(states, actions)

In [None]:
model.summary()

In [None]:
!pip install tensorflow==2.7.0

In [3]:
!pip install "https://storage.googleapis.com/tensorflow/windows/cpu/tensorflow_cpu-2.15.0-cp311-cp311-win_amd64.whl"

ERROR: tensorflow_cpu-2.15.0-cp311-cp311-win_amd64.whl is not a supported wheel on this platform.


In [1]:
from rl.agents import DQNAgent

Using TensorFlow backend.


AttributeError: partially initialized module 'keras' has no attribute '__version__' (most likely due to a circular import)

In [None]:
from tf_agents.agents.dqn import dqn_agent

In [None]:
from rl.policy import BoltzmannQPolicy

In [None]:
from rl.memory import SequentialMemory

In [None]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit = 50000, window_length = 1)
    dqn = DQNAgent(model = model, memory=memory, policy=policy, nb_actions = actions, nb_steps_warmup = 10, target_model_update = 1e-2)
    return dqn

In [None]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr = 1e-3), metrics = ["mae"])
dqn.fit(env, nb_steps=50000, visualize = False, verbose = 1)

In [None]:
scores = dqn.test(env, nb_episodes =100, visualize = False)
print(np.mean(scores.history('episode reward')))

# Q-learning algorithm

In [None]:
def q_learning(env, num_episodes=1000, alpha=0.1, gamma=0.99, epsilon=0.1):
    q_table = np.zeros((env.observation_space.shape[0], env.action_space.n))
    rewards_history = []
    q_value_history = []  # New list to store Q-values for a specific state-action pair

    for episode in range(num_episodes):
        state = env.reset()
        state = int(state.item()) if hasattr(state, 'item') else int(state)
        done = False
        total_reward = 0

        while not done:
            if np.random.rand() < epsilon:
                action = env.action_space.sample()  # Exploration
            else:
                action = np.argmax(q_table[state])  # Exploitation

            next_state, reward, done, _ = env.step(action)
            next_state = int(next_state.item()) if hasattr(next_state, 'item') else int(next_state)

            # Q-value update
            q_table[state, action] += alpha * (reward + gamma * np.max(q_table[next_state]) - q_table[state, action])

            state = next_state
            total_reward += reward

        rewards_history.append(total_reward)

    # Plot the rewards and Q-values
    plt.figure(figsize=(12, 6))

    # Plot rewards
    plt.subplot(1, 2, 1)
    plt.plot(rewards_history)
    plt.xlabel('Episode')
    plt.ylabel('Total Reward')
    plt.title('Training Progress')

    # Plot Q-values for the specified state-action pair
    plt.subplot(1, 2, 2)
    plt.plot(q_value_history)
    plt.xlabel('Episode')
    plt.ylabel('Q-value')
    plt.title(f'Q-values for State-Action Pair {plot_state}')

    plt.tight_layout()
    plt.show()

    return q_table, rewards_history

In [None]:
# Function to visualize Q-values for a specific state-action pair
def visualize_q_values(q_table, state_action_pair):
    plt.plot(q_table[state_action_pair[0], state_action_pair[1]])
    plt.xlabel('Episode')
    plt.ylabel('Q-value')
    plt.title(f'Q-values for State-Action Pair {state_action_pair}')
    plt.show()

# Dam Pygame

In [6]:
import pygame
import sys
import random

# Initialize Pygame
pygame.init()

# Constants
WIDTH, HEIGHT = 500, 500
BLUE = (0, 0, 255)
RED = (255, 0, 0)

box1_color = RED
box2_color = RED
box3_color = RED
box4_color = RED

# Create the screen
screen = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption("Blue Box Game")

# Create the blue box
box_width, box_height = 200, random.uniform(435, 440)
box1_width, box1_height = 20, 20
box2_width, box2_height = 20, 20
box3_width, box3_height = 20, 20
box4_width, box4_height = 20, 20
box_x, box_y = (WIDTH - box_width) // 2, HEIGHT - box_height - 20
box1_x, box1_y = (WIDTH - box1_width-150) // 2, HEIGHT - box1_height - 400
box2_x, box2_y = (WIDTH - box2_width-50) // 2, HEIGHT - box2_height - 400
box3_x, box3_y = (WIDTH - box3_width+50) // 2, HEIGHT - box3_height - 400
box4_x, box4_y = (WIDTH - box4_width +150) // 2, HEIGHT - box4_height - 400

# Set up clock to control the frame rate
clock = pygame.time.Clock()

# Main game loop
while True:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            pygame.quit()
            sys.exit()

    keys = pygame.key.get_pressed()

    # Increase the height of the box when the up arrow key is pressed
    if keys[pygame.K_UP]:
        box_y -= 0.01
        box_height += 0.01

    if keys[pygame.K_DOWN]:
        box_y += 0.01
        box_height -= 0.01

    # Clear the screen
    screen.fill((255, 255, 255))

    # Draw the blue box
    pygame.draw.rect(screen, BLUE, (box_x, box_y, box_width, box_height))
    pygame.draw.rect(screen, box1_color, (box1_x, box1_y, box1_width, box1_height))
    pygame.draw.rect(screen, box2_color, (box2_x, box2_y, box2_width, box2_height))
    pygame.draw.rect(screen, box3_color, (box3_x, box3_y, box3_width, box3_height))
    pygame.draw.rect(screen, box4_color, (box4_x, box4_y, box4_width, box4_height))

    # Update the display
    pygame.display.flip()

    # Cap the frame rate
    clock.tick(30)


SystemExit: 

# Dam Deep Q learning

In [1]:
import gym
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras import layers


class DamEnv(gym.Env):
    """Custom environment for dam spillway control."""

    metadata = {'render.modes': ['human']}

    def _init_(self):
        super(DamEnv, self)._init_()

        # Define action space (opening for each gate)
        self.action_space = gym.spaces.Box(low=0, high=1, shape=(4,))

        # Define observation space (water level, inflow rate)
        self.observation_space = gym.spaces.Box(low=435, high=440, shape=(2,))

        # Initialize water level and inflow rate
        self.water_level = 438
        self.inflow_rate = np.random.uniform(10, 20)

    def step(self, action):
        """Step the environment based on the given action."""

        # Update the water level based on action and inflow rate
        outflow_rate = sum(action) * 10
        self.water_level += self.inflow_rate - outflow_rate

        # Reward based on maintaining water level within the desired range
        reward = -abs(self.water_level - 438.325)

        # Check if water level is outside the desired range
        done = self.water_level < 438 or self.water_level > 438.65

        # Update inflow rate for next step
        self.inflow_rate = np.random.uniform(10, 20)

        return np.array([self.water_level, self.inflow_rate]), reward, done, {}

    def reset(self):
        """Reset the environment to the initial state."""

        self.water_level = 438
        self.inflow_rate = np.random.uniform(10, 20)
        return np.array([self.water_level, self.inflow_rate])

    def render(self, mode='human'):
        """Render the environment (optional)."""

        print("Water Level:", self.water_level)
        print("Inflow Rate:", self.inflow_rate)


class QNetwork(keras.Model):
    """Deep Q-learning network."""

    def _init_(self, state_size, action_size):
        super(QNetwork, self)._init_()

        # Define network architecture
        self.dense1 = layers.Dense(128, activation="relu", input_shape=(state_size,))
        self.dense2 = layers.Dense(64, activation="relu")
        self.dense3 = layers.Dense(action_size)

    def call(self, inputs):
        """Forward pass through the network."""

        x = self.dense1(inputs)
        x = self.dense2(x)
        return self.dense3(x)


def main():
    # Initialize environment and Q-networks
    env = DamEnv()
    q_network = QNetwork(state_size=2, action_size=4)
    target_q_network = QNetwork(state_size=2, action_size=4)
    target_q_network.set_weights(q_network.get_weights())

    # Hyperparameters
    gamma = 0.99  # Discount factor
    learning_rate = 0.001  # Learning rate
    epsilon = 1.0  # Exploration factor
    epsilon_decay = 0.999  # Epsilon decay rate


    # Training loop
    for episode in range(10000):
        # Reset environment and state
        state = env.reset()

        # Episode loop
        while True:
            # Choose action
            if np.random.random() < epsilon:
                action = env.action_space.sample()
            else:
                q_values = q_network(np.array([state]))
                action = np.argmax(q_values)

            # Take action and observe next state and reward
            next_state, reward, done, _ = env.step(action)

            # Predict Q-value for next state
            next_q_values = target_q_network(np.array([next_state]))

            # Update Q-value
            target = reward + gamma * np.max(next_q_values)
            with tf.GradientTape() as tape:
                q_values = q_network(np.array([state]))
                action_one_hot = tf.one_hot(action, env.action_space.shape[0])
                q_value = tf.reduce_sum(tf.multiply(q_values, action_one_hot), axis=1)
                loss = tf.reduce_mean(tf.square(target - q_value))

            grads = tape.gradient(loss, q_network.trainable_variables)
            optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
            optimizer.apply_gradients(zip(grads, q_network.trainable_variables))

            # Update target Q-network
            weights = []
            for (net_var, target_var) in zip(q_network.get_weights(), target_q_network.get_weights()):
                weights.append(target_var * (1 - 0.001) + 0.001 * net_var)
            target_q_network.set_weights(weights)

            # Update state
            state = next_state

            if done:
                break

        # Decay exploration factor
        epsilon *= epsilon_decay


: 

In [None]:
import gym
from gym import spaces
import pygame
import sys
import numpy as np

class BlueBoxEnv(gym.Env):
    def __init__(self):
        super(BlueBoxEnv, self).__init__()

        # Constants
        self.WIDTH, self.HEIGHT = 400, 400
        self.BLUE = (0, 0, 255)

        # Action and observation spaces
        self.action_space = spaces.Discrete(2)  # 0: No action, 1: Up, 2: Down
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.HEIGHT, self.WIDTH, 3), dtype=np.uint8)

        # Create the screen
        self.screen = pygame.display.set_mode((self.WIDTH, self.HEIGHT))
        pygame.display.set_caption("Blue Box Game")

        # Create the blue box
        self.box_width, self.box_height = 200, 0
        self.box_x, self.box_y = (self.WIDTH - self.box_width) // 2, self.HEIGHT - self.box_height - 20

        # Set up clock to control the frame rate
        self.clock = pygame.time.Clock()

    def reset(self):
        # Reset the environment
        self.box_x, self.box_y = (self.WIDTH - self.box_width) // 2, self.HEIGHT - self.box_height - 20
        self.box_height = 0
        return self._get_observation()

    def step(self, action):
        # Update the environment based on the action
        if action == 0:  # Up
            self.box_y -= 5
            self.box_height += 5
        elif action == 1:  # Down
            self.box_y += 0.005
            self.box_height -= 0.005
        elif action == 2:  # Down
            self.box_y += 0.01
            self.box_height -= 0.01
        elif action == 3:  # Down
            self.box_y += 0.015
            self.box_height -= 0.015
        elif action == 4:  # Down
            self.box_y += 0.02
            self.box_height -= 0.02

        # Cap the box height to a reasonable range
        self.box_height = np.clip(self.box_height, 10, self.HEIGHT - 20)

        # Clear the screen
        self.screen.fill((255, 255, 255))

        # Draw the blue box
        pygame.draw.rect(self.screen, self.BLUE, (self.box_x, self.box_y, self.box_width, self.box_height))

        # Update the display
        pygame.display.flip()

        # Cap the frame rate
        self.clock.tick(30)

        # Return the observation, reward, done, and info
        return self._get_observation(), 0.0, False, {}

    def render(self, mode='human'):
        # Render the environment (optional)
        pass

    def close(self):
        pygame.quit()

    def _get_observation(self):
        # Capture the current state of the screen as an observation
        return pygame.surfarray.array3d(pygame.display.get_surface())

# Example of how to use the custom environment
env = BlueBoxEnv()
observation = env.reset()

for _ in range(1000):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)

    if done:
        observation = env.reset()

env.close()


# Training the agent

In [None]:
env = DamGateControlEnv()

In [None]:
trained_q_table, rewards_history = q_learning(env)

In [None]:
visualize_q_values(trained_q_table, (0, 0))

# Testing the trained agent

In [None]:
state = env.reset()
state = int(state.item()) if hasattr(state, 'item') else int(state)
done = False

In [None]:
while not done:
    action = np.argmax(trained_q_table[state])
    next_state, reward, done, _ = env.step(action)
    next_state = int(next_state.item()) if hasattr(next_state, 'item') else int(next_state)

    print(f"Current State: {state}, Action: {action}, Next State: {next_state}, Reward: {reward}")

    state = next_state