In [None]:
import gymnasium as gym
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense
import time

# Function to create the CNN model for action decision based on "Evan" architecture
def create_evan_model(input_shape, action_space):
    model = Sequential([
        Conv2D(64, (8, 8), strides=(4, 4), activation='relu', input_shape=input_shape),
        Conv2D(128, (5, 5), strides=(2, 2), activation='relu'),
        Conv2D(256, (3, 3), strides=(2, 2), activation='relu'),
        Conv2D(256, (3, 3), strides=(1, 1), activation='relu'),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(action_space, activation='linear')
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# Function to preprocess the game state (image)
def preprocess_state(state):
    if isinstance(state, tuple) or isinstance(state, list):
        state = state[0]
    return np.array(state, dtype=np.float32) / 255.0

# Function to choose an action based on the model's prediction
def choose_action(state, model):
    processed_state = preprocess_state(state)
    q_values = model.predict(np.array([processed_state]))
    return np.argmax(q_values[0])

# Initialize the game environment with 'human' render mode for visual output
env = gym.make('ALE/Frogger-v5', render_mode='human')
print("Action space:", env.action_space)

# Setup the neural network model with the "Evan" architecture
model = create_evan_model((210, 160, 3), env.action_space.n)

# Main game loop with 'human' render mode for visual output

while True:  # Infinite loop to keep playing new games
    state = env.reset()  # Start a new game
    done = False
    while not done:
        action = choose_action(state, model)  # Determine action based on the current policy
        outputs = env.step(action)
        next_state = outputs[0]
        reward = outputs[1]
        done = outputs[2]
        info = outputs[3] if len(outputs) > 3 else {}

        # env.render() is not necessary with 'human' mode as it renders automatically
        
        state = next_state  # Update state for the next step
        time.sleep(0.1)  # Delay to see the changes visually
    
    # Here you could add logic to track game stats, like the number of games played, etc.
    print("Game over. Starting a new game.")

# Clean up and close the environment
env.close()