In [3]:
pip install gymnasium --timeout=200


Collecting gymnasium
  Using cached gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Using cached Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
   ---------------------------------------- 0.0/958.1 kB ? eta -:--:--
   ---------------------------------------- 0.0/958.1 kB ? eta -:--:--
   ---------------------------------------- 0.0/958.1 kB ? eta -:--:--
   ---------------------------------------- 0.0/958.1 kB ? eta -:--:--
   ---------------------------------------- 0.0/958.1 kB ? eta -:--:--
   ---------------------------------------- 0.0/958.1 kB ? eta -:--:--
   ---------------------------------------- 0.0/958.1 kB ? eta -:--:--
   ---------------------------------------- 0.0/958.1 kB ? eta -:--:--
   ---------------------------------------- 0.0/958.1 kB ? eta -:--:--
   ---------------------------------------- 0.0/958.1 kB ? eta -:--:--
   ----------

In [None]:
# Import necessary libraries
import gymnasium as gym  # For creating and managing the environment
import numpy as np  # For numerical operations, especially arrays
import tensorflow as tf  # For building and training the neural network model
from tensorflow.keras import layers  # For defining layers in the neural network

# Create the CartPole environment
env = gym.make('CartPole-v1')  # Initialize the CartPole-v1 environment
num_actions = env.action_space.n  # Get the number of possible actions (2 for CartPole)
state_shape = env.observation_space.shape  # Get the shape of the observation space (4-dimensional state)

# Function to build a simple Deep Q-Network (DQN) model
def build_model():
    model = tf.keras.Sequential([  # Create a sequential model
        layers.InputLayer(input_shape=state_shape),  # Input layer with state shape
        layers.Dense(24, activation='relu'),  # Hidden layer with 24 neurons and ReLU activation
        layers.Dense(24, activation='relu'),  # Another hidden layer with 24 neurons and ReLU activation
        layers.Dense(num_actions, activation='linear')  # Output layer with neurons equal to number of actions
    ])
    # Compile the model with Adam optimizer and mean squared error loss function
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')
    return model  # Return the compiled model

model = build_model()  # Build the DQN model

# Function to train the model using a simplified Q-Learning loop
def train_model(episodes=1000):
    for episode in range(episodes):  # Loop over the specified number of episodes
        state, _ = env.reset()  # Reset the environment to the initial state
        state = np.reshape(state, [1, state_shape[0]])  # Reshape the state for input to the model
        total_reward = 0  # Initialize total reward for this episode
        done = False  # Flag to indicate if the episode is finished

        while not done:  # Loop until the episode is done
            # Select action using epsilon-greedy policy: 90% of the time take the best action, otherwise explore
            action = np.argmax(model.predict(state)[0]) if np.random.rand() > 0.1 else env.action_space.sample()

            # Step in the environment using the selected action
            next_state, reward, done, truncated, info = env.step(action)
            next_state = np.reshape(next_state, [1, state_shape[0]])  # Reshape next state for model input

            # Update total reward received so far in this episode
            total_reward += reward

            # Consider the episode done if either done or truncated is True
            done = done or truncated

            # Calculate the target value using the Bellman equation
            target = reward + 0.95 * np.max(model.predict(next_state)[0]) * (not done)
            target_f = model.predict(state)  # Get the current Q-values for the current state
            target_f[0][action] = target  # Update the Q-value for the selected action with the calculated target

            # Train the model on the updated Q-values
            model.fit(state, target_f, epochs=1, verbose=0)  # Fit the model for 1 epoch without verbose output

            # Move to the next state
            state = next_state

        # Print the total reward for the episode
        print(f"Episode {episode + 1}, Total Reward: {total_reward}")

# Start training the model
train_model()  # Call the training function






Episode 1, Total Reward: 112.0


Episode 2, Total Reward: 49.0
Episode 3, Total Reward: 11.0
Episode 4, Total Reward: 8.0
Episode 5, Total Reward: 8.0
Episode 6, Total Reward: 9.0
Episode 7, Total Reward: 11.0


Episode 8, Total Reward: 10.0
Episode 9, Total Reward: 11.0
Episode 10, Total Reward: 10.0
Episode 11, Total Reward: 10.0
Episode 12, Total Reward: 12.0


Episode 13, Total Reward: 9.0
Episode 14, Total Reward: 9.0
Episode 15, Total Reward: 13.0
Episode 16, Total Reward: 10.0
Episode 17, Total Reward: 10.0


Episode 18, Total Reward: 10.0
Episode 19, Total Reward: 15.0
Episode 20, Total Reward: 9.0
Episode 21, Total Reward: 9.0
Episode 22, Total Reward: 10.0


Episode 23, Total Reward: 10.0
Episode 24, Total Reward: 10.0
Episode 25, Total Reward: 10.0
Episode 26, Total Reward: 10.0
Episode 27, Total Reward: 12.0


Episode 28, Total Reward: 11.0
Episode 29, Total Reward: 8.0
Episode 30, Total Reward: 9.0
Episode 31, Total Reward: 11.0
Episode 32, Total Reward: 8.0
Episode 33, Total Reward: 10.0


Episode 34, Total Reward: 12.0
Episode 35, Total Reward: 12.0
Episode 36, Total Reward: 11.0
Episode 37, Total Reward: 9.0
Episode 38, Total Reward: 10.0


Episode 39, Total Reward: 11.0
Episode 40, Total Reward: 9.0
Episode 41, Total Reward: 9.0
Episode 42, Total Reward: 10.0
Episode 43, Total Reward: 10.0
Episode 44, Total Reward: 9.0


Episode 45, Total Reward: 11.0
Episode 46, Total Reward: 8.0
Episode 47, Total Reward: 10.0
Episode 48, Total Reward: 8.0
Episode 49, Total Reward: 10.0


Episode 50, Total Reward: 11.0
Episode 51, Total Reward: 10.0
Episode 52, Total Reward: 11.0
Episode 53, Total Reward: 10.0
Episode 54, Total Reward: 12.0


Episode 55, Total Reward: 8.0
Episode 56, Total Reward: 8.0
Episode 57, Total Reward: 10.0
Episode 58, Total Reward: 12.0
Episode 59, Total Reward: 10.0
Episode 60, Total Reward: 10.0


Episode 61, Total Reward: 9.0
Episode 62, Total Reward: 10.0
Episode 63, Total Reward: 9.0
Episode 64, Total Reward: 12.0
Episode 65, Total Reward: 8.0


Episode 66, Total Reward: 10.0
Episode 67, Total Reward: 10.0
Episode 68, Total Reward: 10.0
Episode 69, Total Reward: 11.0
Episode 70, Total Reward: 12.0


Episode 71, Total Reward: 11.0
Episode 72, Total Reward: 11.0
Episode 73, Total Reward: 9.0
Episode 74, Total Reward: 8.0
Episode 75, Total Reward: 13.0


Episode 76, Total Reward: 8.0
Episode 77, Total Reward: 12.0
Episode 78, Total Reward: 10.0
Episode 79, Total Reward: 10.0
Episode 80, Total Reward: 11.0


Episode 81, Total Reward: 10.0
Episode 82, Total Reward: 11.0
Episode 83, Total Reward: 10.0
Episode 84, Total Reward: 10.0
Episode 85, Total Reward: 10.0
Episode 86, Total Reward: 8.0


Episode 87, Total Reward: 10.0
Episode 88, Total Reward: 12.0
Episode 89, Total Reward: 8.0
Episode 90, Total Reward: 8.0
Episode 91, Total Reward: 10.0


Episode 92, Total Reward: 9.0
Episode 93, Total Reward: 12.0
Episode 94, Total Reward: 8.0
Episode 95, Total Reward: 10.0
Episode 96, Total Reward: 11.0


Episode 97, Total Reward: 11.0
Episode 98, Total Reward: 8.0
Episode 99, Total Reward: 9.0
Episode 100, Total Reward: 9.0
Episode 101, Total Reward: 9.0
Episode 102, Total Reward: 10.0


Episode 103, Total Reward: 8.0
Episode 104, Total Reward: 9.0
Episode 105, Total Reward: 10.0
Episode 106, Total Reward: 9.0
Episode 107, Total Reward: 9.0
Episode 108, Total Reward: 10.0


Episode 109, Total Reward: 10.0
Episode 110, Total Reward: 10.0
Episode 111, Total Reward: 10.0
Episode 112, Total Reward: 10.0
Episode 113, Total Reward: 10.0


Episode 114, Total Reward: 9.0
Episode 115, Total Reward: 9.0
Episode 116, Total Reward: 9.0
Episode 117, Total Reward: 9.0
Episode 118, Total Reward: 10.0
Episode 119, Total Reward: 8.0


Episode 120, Total Reward: 11.0
Episode 121, Total Reward: 11.0
Episode 122, Total Reward: 9.0
Episode 123, Total Reward: 11.0
Episode 124, Total Reward: 9.0


Episode 125, Total Reward: 10.0
Episode 126, Total Reward: 9.0
Episode 127, Total Reward: 8.0
Episode 128, Total Reward: 10.0
Episode 129, Total Reward: 11.0
Episode 130, Total Reward: 9.0


Episode 131, Total Reward: 11.0
Episode 132, Total Reward: 12.0
Episode 133, Total Reward: 10.0
Episode 134, Total Reward: 10.0
Episode 135, Total Reward: 11.0


Episode 136, Total Reward: 11.0
Episode 137, Total Reward: 11.0
Episode 138, Total Reward: 8.0
Episode 139, Total Reward: 11.0
Episode 140, Total Reward: 10.0


Episode 141, Total Reward: 10.0
Episode 142, Total Reward: 10.0
Episode 143, Total Reward: 10.0
Episode 144, Total Reward: 10.0
Episode 145, Total Reward: 10.0


Episode 146, Total Reward: 10.0
Episode 147, Total Reward: 14.0
Episode 148, Total Reward: 10.0
Episode 149, Total Reward: 11.0
Episode 150, Total Reward: 12.0


Episode 151, Total Reward: 12.0
Episode 152, Total Reward: 12.0
Episode 153, Total Reward: 10.0
Episode 154, Total Reward: 14.0


Episode 155, Total Reward: 14.0
Episode 156, Total Reward: 15.0
Episode 157, Total Reward: 13.0
Episode 158, Total Reward: 17.0


Episode 159, Total Reward: 15.0
Episode 160, Total Reward: 21.0


Episode 161, Total Reward: 31.0
Episode 162, Total Reward: 16.0


Episode 163, Total Reward: 40.0
Episode 164, Total Reward: 11.0
Episode 165, Total Reward: 10.0
Episode 166, Total Reward: 11.0


Episode 167, Total Reward: 9.0
Episode 168, Total Reward: 18.0
Episode 169, Total Reward: 15.0
Episode 170, Total Reward: 10.0


Episode 171, Total Reward: 9.0
Episode 172, Total Reward: 11.0
Episode 173, Total Reward: 10.0
Episode 174, Total Reward: 8.0
Episode 175, Total Reward: 9.0
Episode 176, Total Reward: 9.0


Episode 177, Total Reward: 10.0
Episode 178, Total Reward: 8.0
Episode 179, Total Reward: 10.0
Episode 180, Total Reward: 8.0
Episode 181, Total Reward: 11.0
Episode 182, Total Reward: 9.0


Episode 183, Total Reward: 13.0
Episode 184, Total Reward: 14.0


Episode 185, Total Reward: 34.0
Episode 186, Total Reward: 43.0


Episode 187, Total Reward: 52.0


Episode 188, Total Reward: 13.0
Episode 189, Total Reward: 9.0
Episode 190, Total Reward: 10.0
Episode 191, Total Reward: 8.0
Episode 192, Total Reward: 10.0


Episode 193, Total Reward: 10.0
Episode 194, Total Reward: 10.0
Episode 195, Total Reward: 11.0
Episode 196, Total Reward: 12.0
Episode 197, Total Reward: 13.0


Episode 198, Total Reward: 15.0
Episode 199, Total Reward: 11.0
Episode 200, Total Reward: 13.0
Episode 201, Total Reward: 12.0


Episode 202, Total Reward: 10.0
Episode 203, Total Reward: 14.0
Episode 204, Total Reward: 14.0
Episode 205, Total Reward: 13.0


Episode 206, Total Reward: 15.0
Episode 207, Total Reward: 10.0
Episode 208, Total Reward: 12.0
Episode 209, Total Reward: 14.0
Episode 210, Total Reward: 12.0


Episode 211, Total Reward: 13.0
Episode 212, Total Reward: 14.0
Episode 213, Total Reward: 16.0
Episode 214, Total Reward: 12.0


Episode 215, Total Reward: 15.0
Episode 216, Total Reward: 15.0
Episode 217, Total Reward: 13.0


Episode 218, Total Reward: 13.0
Episode 219, Total Reward: 14.0
Episode 220, Total Reward: 11.0
Episode 221, Total Reward: 13.0


Episode 222, Total Reward: 15.0
Episode 223, Total Reward: 13.0
Episode 224, Total Reward: 14.0
Episode 225, Total Reward: 14.0


Episode 226, Total Reward: 13.0
Episode 227, Total Reward: 11.0
Episode 228, Total Reward: 10.0
Episode 229, Total Reward: 10.0
Episode 230, Total Reward: 12.0


Episode 231, Total Reward: 10.0
Episode 232, Total Reward: 11.0
Episode 233, Total Reward: 12.0
Episode 234, Total Reward: 10.0
Episode 235, Total Reward: 9.0


Episode 236, Total Reward: 9.0
Episode 237, Total Reward: 16.0
Episode 238, Total Reward: 11.0
Episode 239, Total Reward: 9.0
Episode 240, Total Reward: 12.0


Episode 241, Total Reward: 11.0
Episode 242, Total Reward: 15.0
Episode 243, Total Reward: 14.0
Episode 244, Total Reward: 17.0


Episode 245, Total Reward: 16.0
Episode 246, Total Reward: 15.0
Episode 247, Total Reward: 13.0


Episode 248, Total Reward: 16.0
Episode 249, Total Reward: 13.0
Episode 250, Total Reward: 16.0
Episode 251, Total Reward: 17.0


Episode 252, Total Reward: 32.0


Episode 253, Total Reward: 39.0
Episode 254, Total Reward: 41.0




Episode 255, Total Reward: 75.0
Episode 256, Total Reward: 14.0
Episode 257, Total Reward: 14.0


Episode 258, Total Reward: 30.0
Episode 259, Total Reward: 27.0


Episode 260, Total Reward: 19.0
Episode 261, Total Reward: 8.0
Episode 262, Total Reward: 12.0
Episode 263, Total Reward: 10.0


Episode 264, Total Reward: 9.0
Episode 265, Total Reward: 10.0
Episode 266, Total Reward: 13.0
Episode 267, Total Reward: 8.0
Episode 268, Total Reward: 14.0


Episode 269, Total Reward: 10.0
Episode 270, Total Reward: 10.0
Episode 271, Total Reward: 9.0
Episode 272, Total Reward: 12.0
Episode 273, Total Reward: 9.0


Episode 274, Total Reward: 11.0
Episode 275, Total Reward: 17.0
Episode 276, Total Reward: 17.0


Episode 277, Total Reward: 67.0


Episode 278, Total Reward: 10.0
Episode 279, Total Reward: 10.0
Episode 280, Total Reward: 11.0
Episode 281, Total Reward: 10.0
Episode 282, Total Reward: 9.0


Episode 283, Total Reward: 11.0
Episode 284, Total Reward: 11.0
Episode 285, Total Reward: 9.0
Episode 286, Total Reward: 9.0
Episode 287, Total Reward: 9.0
Episode 288, Total Reward: 11.0


Episode 289, Total Reward: 11.0
Episode 290, Total Reward: 11.0
Episode 291, Total Reward: 10.0
Episode 292, Total Reward: 9.0
Episode 293, Total Reward: 9.0


Episode 294, Total Reward: 12.0
Episode 295, Total Reward: 11.0
Episode 296, Total Reward: 12.0
Episode 297, Total Reward: 10.0
Episode 298, Total Reward: 10.0


Episode 299, Total Reward: 11.0
Episode 300, Total Reward: 12.0
Episode 301, Total Reward: 12.0
