In [1]:
import gymnasium as gym
from gymnasium import envs

# List all registered environments (including Atari)
all_envs = envs.registry.keys()
atari_envs = [env_id for env_id in all_envs if "ALE/" in env_id]

# Print first 10 Atari games
print(list(atari_envs)[:10])  # Should include 'ALE/Frogger-v5'

[]


In [2]:
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import random
import matplotlib.pyplot as plt
import cv2
import time
import warnings
import ale_py
import gymnasium as gym
import multiprocessing as mp
from datetime import datetime
import numpy as np
import os
import pandas as pd

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [3]:
'''
import gymnasium as gym

# Modern way to create Atari environment
env = gym.make("ALE/Frogger-v5", render_mode="human")  # Note: ALE/ prefix required


observation, info = env.reset()
for _ in range(100):
    action = env.action_space.sample()  # Random action
    observation, reward, terminated, truncated, info = env.step(action)
        
    if terminated or truncated:
        observation, info = env.reset()
'''

'\nimport gymnasium as gym\n\n# Modern way to create Atari environment\nenv = gym.make("ALE/Frogger-v5", render_mode="human")  # Note: ALE/ prefix required\n\n\nobservation, info = env.reset()\nfor _ in range(100):\n    action = env.action_space.sample()  # Random action\n    observation, reward, terminated, truncated, info = env.step(action)\n        \n    if terminated or truncated:\n        observation, info = env.reset()\n'

In [4]:
# Cell 2: Define the environment runner function
def run_frogger_instance(instance_id, num_episodes=5, render_mode=None):
    """
    Runs a single instance of the Frogger environment
    
    Args:
        instance_id (int): Identifier for this process
        num_episodes (int): Number of episodes to run
        render_mode (str): None, "human", or "rgb_array"
    """
    env = gym.make("ALE/Frogger-v5", render_mode=render_mode)
    
    for episode in range(num_episodes):
        obs, info = env.reset()
        done = False
        total_reward = 0
        
        while not done:
            # Replace this with your actual policy/controller
            action = env.action_space.sample()  # Random actions for demo
            
            obs, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated
            total_reward += reward
            
            # Add your custom processing here
            
        print(f"Instance {instance_id} | Episode {episode+1}/{num_episodes} | Reward: {total_reward}")
    
    env.close()

In [5]:
# Cell 3: Main execution block (must be in its own cell)
if __name__ == "__main__":  # Critical for multiprocessing in Jupyter
    # Configuration
    NUM_INSTANCES = 4       # Number of parallel instances
    EPISODES_PER_INSTANCE = 3
    RENDER_MODE = None      # Set to "human" if you want visualization
    
    print(f"Starting {NUM_INSTANCES} parallel Frogger instances at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    start_time = datetime.now()

Starting 4 parallel Frogger instances at 2025-04-09 10:04:09


In [6]:
   # Create and start processes
processes = []
for i in range(NUM_INSTANCES):
        # Only render the first instance if RENDER_MODE is "human"
    current_render_mode = RENDER_MODE if i == 0 else None
        
    p = mp.Process(
        target=run_frogger_instance,
        args=(i, EPISODES_PER_INSTANCE, current_render_mode)
        )
    p.start()
    processes.append(p)
    
    # Wait for all processes to complete
    for p in processes:
        p.join()
    
    duration = (datetime.now() - start_time).total_seconds()
    print(f"All instances completed in {duration:.2f} seconds")

All instances completed in 0.08 seconds
All instances completed in 0.15 seconds
All instances completed in 0.21 seconds
All instances completed in 0.28 seconds


In [7]:
import gymnasium as gym
import multiprocessing as mp
import numpy as np

def run_env(env_name, num_episodes, process_id):
    env = gym.make(env_name, render_mode="human")  # or "rgb_array" for no rendering
    for episode in range(num_episodes):
        obs, info = env.reset()
        done = False
        while not done:
            action = env.action_space.sample()  # Random policy - replace with your agent
            obs, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated
            # Add your learning/processing here
        print(f"Process {process_id} finished episode {episode}")
    env.close()

if __name__ == "__main__":
    env_name = "ALE/Frogger-v5"  # Or the correct Frogger environment name
    num_processes = 4  # Number of parallel instances
    num_episodes = 10  # Episodes per instance
    
    processes = []
    for i in range(num_processes):
        p = mp.Process(target=run_env, args=(env_name, num_episodes, i))
        p.start()
        processes.append(p)
    
    for p in processes:
        p.join()

In [None]:
import numpy as np
import gymnasium as gym
from keras.models import Sequential, clone_model
from keras.layers import Conv2D, Flatten, Dense
from keras.optimizers import Adam
import time
import matplotlib.pyplot as plt
from collections import deque
import random
import math

# --- Hyperparameters ---
EPISODES = 1000                # Total training episodes
BATCH_SIZE = 32                # Batch size for training
MEMORY_SIZE = 100000           # Larger replay memory (for PER)
GAMMA = 0.99                   # Higher discount factor
EPSILON_START = 1.0            # Starting exploration rate
EPSILON_MIN = 0.01             # Min exploration rate
EPSILON_DECAY = 0.995          # Decay rate
LEARNING_RATE = 0.00025        # Lower learning rate (better stability)
UPDATE_TARGET_FREQ = 1000      # Double DQN target network update
FRAME_STACK = 4                # Number of stacked frames (temporal info)
SAVE_FREQ = 50                 # Save weights every N episodes

# --- Prioritized Experience Replay (PER) ---
class PrioritizedReplayBuffer:
    def __init__(self, max_size):
        self.max_size = max_size
        self.buffer = deque(maxlen=max_size)
        self.priorities = deque(maxlen=max_size)
    
    def add(self, state, action, reward, next_state, done):
        max_priority = max(self.priorities) if self.buffer else 1.0
        self.buffer.append((state, action, reward, next_state, done))
        self.priorities.append(max_priority)
    
    def sample(self, batch_size, alpha=0.6):
        priorities = np.array(self.priorities)
        probs = priorities ** alpha
        probs /= probs.sum()
        
        indices = np.random.choice(len(self.buffer), batch_size, p=probs)
        samples = [self.buffer[i] for i in indices]
        weights = (len(self.buffer) * probs[indices]) ** (-0.4)
        weights /= weights.max()
        
        states = np.array([x[0] for x in samples])
        actions = np.array([x[1] for x in samples])
        rewards = np.array([x[2] for x in samples])
        next_states = np.array([x[3] for x in samples])
        dones = np.array([x[4] for x in samples])
        
        return states, actions, rewards, next_states, dones, indices, weights
    
    def update_priorities(self, indices, errors, offset=0.01):
        for i, error in zip(indices, errors):
            self.priorities[i] = abs(error) + offset

# --- Environment Setup ---
env = gym.make("ALE/Frogger-v5", render_mode="rgb_array")
state_shape = (84, 84, FRAME_STACK)  # Stacked frames
action_size = env.action_space.n

# --- Model Definition ---
def build_model():
    model = Sequential([
        Conv2D(32, (8, 8), strides=4, activation="relu", input_shape=state_shape),
        Conv2D(64, (4, 4), strides=2, activation="relu"),
        Conv2D(64, (3, 3), strides=1, activation="relu"),
        Flatten(),
        Dense(512, activation="relu"),
        Dense(action_size, activation="linear")
    ])
    model.compile(loss="mse", optimizer=Adam(learning_rate=LEARNING_RATE))
    return model

# --- Frame Stacking ---
class FrameStacker:
    def __init__(self):
        self.frames = deque(maxlen=FRAME_STACK)
    
    def reset(self, state):
        for _ in range(FRAME_STACK):
            self.frames.append(state)
        return np.stack(self.frames, axis=-1)
    
    def append(self, state):
        self.frames.append(state)
        return np.stack(self.frames, axis=-1)

# --- Preprocessing ---
def preprocess_state(state):
    state = np.mean(state, axis=2)  # Grayscale
    state = state[34:194, :]        # Crop
    state = state[::2, ::2]         # Downsample to 80x80
    # Add padding to make it 84x84 if needed
    state = np.pad(state, ((2,2),(2,2)), mode='constant')
    return state / 255.0

# --- Training Functions ---
def train_dqn():
    model = build_model()
    target_model = clone_model(model)  # Double DQN
    target_model.set_weights(model.get_weights())
    
    memory = PrioritizedReplayBuffer(MEMORY_SIZE)  # PER
    frame_stacker = FrameStacker()
    epsilon = EPSILON_START
    rewards_history = []
    start_time = time.time()
    global_step = 0

    for episode in range(1, EPISODES + 1):
        state, _ = env.reset()
        state = preprocess_state(state)
        state = frame_stacker.reset(state)
        total_reward = 0
        done = False

        while not done:
            global_step += 1

            # Epsilon-greedy action
            if np.random.rand() <= epsilon:
                action = env.action_space.sample()
            else:
                q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)
                action = np.argmax(q_values[0])

            next_state, reward, done, _, _ = env.step(action)
            next_state = preprocess_state(next_state)
            next_state = frame_stacker.append(next_state)
            memory.add(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

            # Train on replay memory (PER)
            if global_step % 4 == 0 and len(memory.buffer) >= BATCH_SIZE:
                states, actions, rewards, next_states, dones, indices, weights = memory.sample(BATCH_SIZE)
                
                # Double DQN: Use target model for next Q-values
                next_q_values = target_model.predict(next_states, verbose=0)
                best_actions = np.argmax(model.predict(next_states, verbose=0), axis=1)
                target_q = rewards + GAMMA * next_q_values[np.arange(BATCH_SIZE), best_actions] * (1 - dones)
                
                # Compute TD errors and update priorities
                current_q = model.predict(states, verbose=0)
                td_errors = target_q - current_q[np.arange(BATCH_SIZE), actions]
                memory.update_priorities(indices, td_errors)
                
                # Train with importance-sampling weights
                target = current_q.copy()
                target[np.arange(BATCH_SIZE), actions] = target_q
                model.fit(states, target, sample_weight=weights, epochs=1, verbose=0)

            # Update target network (Double DQN)
            if global_step % UPDATE_TARGET_FREQ == 0:
                target_model.set_weights(model.get_weights())

        # Decay epsilon
        if epsilon > EPSILON_MIN:
            epsilon *= EPSILON_DECAY

        rewards_history.append(total_reward)

        # Print progress
        elapsed_time = (time.time() - start_time) / 60  # in minutes
        remaining_time = (elapsed_time / episode) * (EPISODES - episode)
        print(f"Episode: {episode}/{EPISODES}, Reward: {total_reward}, Epsilon: {epsilon:.2f}, Time Elapsed: {elapsed_time:.2f} mins, Remaining: {remaining_time:.2f} mins")

        # Save weights periodically
        if episode % SAVE_FREQ == 0:
            model.save_weights(f"frogger_weights_ep{episode}.weights.h5")
            print(f"Saved weights at episode {episode}")

    # Plot rewards
    plt.plot(rewards_history)
    plt.xlabel("Episode")
    plt.ylabel("Reward")
    plt.title("Frogger DQN Training (PER + Double DQN + Frame Stack)")
    plt.savefig("frogger_training_enhanced.png")
    plt.show()

    return model

# --- Run Training ---
if __name__ == "__main__":
    model = train_dqn()
    model.save_weights("frogger_final_weights_enhanced.weights.h5")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Episode: 1/1000, Reward: 13.0, Epsilon: 0.99, Time Elapsed: 0.24 mins, Remaining: 241.80 mins
Episode: 2/1000, Reward: 10.0, Epsilon: 0.99, Time Elapsed: 0.46 mins, Remaining: 228.18 mins
Episode: 3/1000, Reward: 8.0, Epsilon: 0.99, Time Elapsed: 0.64 mins, Remaining: 213.20 mins
Episode: 4/1000, Reward: 11.0, Epsilon: 0.98, Time Elapsed: 0.89 mins, Remaining: 220.39 mins
Episode: 5/1000, Reward: 8.0, Epsilon: 0.98, Time Elapsed: 1.06 mins, Remaining: 210.27 mins
Episode: 6/1000, Reward: 12.0, Epsilon: 0.97, Time Elapsed: 1.29 mins, Remaining: 214.23 mins
Episode: 7/1000, Reward: 7.0, Epsilon: 0.97, Time Elapsed: 1.51 mins, Remaining: 213.96 mins
Episode: 8/1000, Reward: 8.0, Epsilon: 0.96, Time Elapsed: 1.70 mins, Remaining: 211.25 mins
Episode: 9/1000, Reward: 8.0, Epsilon: 0.96, Time Elapsed: 1.88 mins, Remaining: 207.37 mins
Episode: 10/1000, Reward: 10.0, Epsilon: 0.95, Time Elapsed: 2.09 mins, Remaining: 206.66 mins
Episode: 11/1000, Reward: 6.0, Epsilon: 0.95, Time Elapsed: 2.30