In [None]:
import gymnasium as gym
import numpy as np
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense

# Function to create the model for action decision
def create_model(input_shape, action_space):
    model = Sequential([
        Conv2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=input_shape),
        Conv2D(64, (4, 4), strides=(2, 2), activation='relu'),
        Conv2D(64, (3, 3), activation='relu'),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(action_space, activation='linear')
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# Preprocess the game state (image)
def preprocess_state(state):
    if isinstance(state, tuple) or isinstance(state, list):
        state = state[0]
    return np.array(state, dtype=np.float32) / 255.0

# Function to choose an action based on the model's prediction
def choose_action(state, model):
    processed_state = preprocess_state(state)
    q_values = model.predict(np.array([processed_state]))
    print("Q-values:", q_values)  # Diagnostic print to check Q-values
    return np.argmax(q_values[0])

# Initialize the game environment
env = gym.make('ALE/Frogger-v5', render_mode='human')
print("Action space:", env.action_space)  # Diagnostic print to check the action space

model = create_model((210, 160, 3), env.action_space.n)  # Setup the neural network model

state = env.reset()
print("Initial state structure:", state)  # Print the initial state to understand its structure

# Assuming the actual game state is the first element if it's a tuple or list
if isinstance(state, (tuple, list)):
    state = state[0]

# Main game loop with diagnostics
done = False
while not done:
    action = choose_action(state, model)  # Determine action
    outputs = env.step(action)
    state = outputs[0]  # Update state assuming state is the first element
    reward = outputs[1]  # Extract reward
    done = outputs[2]  # Check if the game is finished
    info = outputs[3] if len(outputs) > 3 else {}  # Info is the fourth element if it exists
    env.render()
    print(f"Action: {action}, Reward: {reward}, Done: {done}")
    time.sleep(0.1)

env.close()

Action space: Discrete(5)
Initial state structure: (array([[[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0]],

       ...,

       [[ 82, 126,  45],
        [ 82, 126,  45],
        [ 82, 126,  45],
        ...,
        [ 82, 126,  45],
        [ 82, 126,  45],
        [ 82, 126,  45]],

       [[ 82, 126,  45],
        [ 82, 126,  45],
        [ 82, 126,  45],
        ...,
        [ 82, 126,  45],
        [ 82, 126,  45],
        [ 82, 126,  45]],

       [[ 82, 126,  45],
        [ 82, 126,  45],
        [ 82, 126,  45],
        ...,
        [ 82, 126,  45

  super().__init__(


Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Q-values: [[-0.00566363 -0.02436418  0.04528935  0.06514134 -0.12771839]]
Action: 3, Reward: 0.0, Done: False


  logger.warn(


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Q-values: [[-0.00195008 -0.01877591  0.03811887  0.07504021 -0.12121636]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Q-values: [[ 0.00594883 -0.01964147  0.03163203  0.06006731 -0.12158228]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Q-values: [[-0.00103304 -0.0161205   0.02327538  0.05260275 -0.11171053]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Q-values: [[-0.00813079 -0.02014377  0.03185378  0.04169306 -0.09810597]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Q-values: [[-0.00155167 -0.02345909  0.02736447  0.05392828 -0.11104579]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Q-values: [[ 0.009

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Q-values: [[ 0.00290551 -0.00184897  0.01538562  0.03560789 -0.10723241]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Q-values: [[ 0.01756489 -0.00203822  0.00884849  0.04165262 -0.10004733]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Q-values: [[-0.00017743 -0.01035388  0.01654074  0.05338454 -0.09580827]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Q-values: [[-0.00894316 -0.00897847  0.01742614  0.05519159 -0.0993351 ]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Q-values: [[-0.0124628  -0.00977386  0.01171905  0.05127037 -0.08856256]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Q-values: [[-0.010

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Q-values: [[-0.00936639 -0.03591148  0.01636945  0.04311868 -0.07125338]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Q-values: [[-0.01367607 -0.0338601   0.00912753  0.04570779 -0.05656177]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Q-values: [[-0.00658096 -0.03499736 -0.00013607  0.04951295 -0.04072612]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Q-values: [[-0.00137635 -0.05615886  0.0052919   0.06755468 -0.04649574]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Q-values: [[ 0.00057694 -0.05231778  0.00430711  0.06046739 -0.05544931]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Q-values: [[-0.011

Action: 2, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Q-values: [[-0.02639429 -0.03958399  0.06379063  0.06613366 -0.10650428]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Q-values: [[-0.01619909 -0.03941223  0.05716631  0.05692637 -0.11428652]]
Action: 2, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Q-values: [[-0.01616541 -0.04336653  0.04426084  0.06059588 -0.11765296]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Q-values: [[-0.01300195 -0.03964664  0.03694164  0.05411936 -0.12057298]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Q-values: [[-0.03290452 -0.04784878  0.05210476  0.05225537 -0.13705924]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Q-values: [[-0.03017374 -0.03566835  0.02504923  0.03030545 -0.10617117]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Q-values: [[-0.01276271 -0.03678106  0.02418291  0.02855729 -0.10207888]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Q-values: [[-0.01399972 -0.03949107  0.02640443  0.01870663 -0.11019723]]
Action: 2, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Q-values: [[-0.03267106 -0.04581779  0.02672711  0.02316777 -0.1236624 ]]
Action: 2, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Q-values: [[-0.02652814 -0.04229465  0.02105612  0.0325752  -0.12355537]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Q-values: [[-0.020

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Q-values: [[-0.05211238 -0.00782227  0.02478365  0.04548677 -0.11368716]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Q-values: [[-0.04450098 -0.01401204  0.01356869  0.04659828 -0.10390547]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Q-values: [[-0.05828083 -0.01891856  0.04326491  0.04922752 -0.09617906]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Q-values: [[-0.04510085 -0.01855183  0.04135088  0.05386375 -0.10900298]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Q-values: [[-0.03699296 -0.01571641  0.04429808  0.05528779 -0.10387511]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Q-values: [[-0.036

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Q-values: [[-0.01960081 -0.04945289  0.01732429  0.02426122 -0.06065727]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Q-values: [[-0.01872168 -0.06579304  0.01747612  0.04122498 -0.0767067 ]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Q-values: [[-0.00420648 -0.05897391  0.01432025  0.04473829 -0.07414097]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Q-values: [[ 0.00809812 -0.06041131  0.02740577  0.06883793 -0.08973167]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Q-values: [[ 0.00742397 -0.05298915  0.02717696  0.06565958 -0.09099099]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Q-values: [[ 0.005

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Q-values: [[-0.01036963 -0.01143873  0.03401668  0.06669116 -0.12035362]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Q-values: [[-0.01101804 -0.01732089  0.02648433  0.06602897 -0.11349797]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Q-values: [[-0.01059387 -0.02324309  0.03448033  0.06175756 -0.11405525]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
Q-values: [[-0.02372007 -0.03383854  0.04739194  0.05720638 -0.11561532]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Q-values: [[-0.00463368 -0.02358609  0.0412106   0.0698937  -0.1261618 ]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Q-values: [[-0.000

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Q-values: [[ 0.00056665 -0.00382145  0.01989278  0.03919128 -0.11426433]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Q-values: [[ 0.00820562 -0.00110902  0.02528267  0.04200635 -0.13030283]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Q-values: [[ 0.00395565 -0.00366042  0.03009597  0.03772477 -0.12800606]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Q-values: [[ 1.73990540e-02 -1.10452165e-04  1.47129865e-02  4.76060957e-02
  -1.20292075e-01]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Q-values: [[ 0.02879672  0.00100833  0.01452821  0.05850476 -0.11407263]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/s

Action: 2, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Q-values: [[-0.005536   -0.03697737  0.02636585  0.0258829  -0.07620198]]
Action: 2, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Q-values: [[ 0.00796432 -0.04198764  0.02676984  0.03371816 -0.08590186]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Q-values: [[-0.00409253 -0.02533261  0.0298711   0.04195699 -0.06748486]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Q-values: [[-0.00424072 -0.02470005  0.01280132  0.0321748  -0.05856949]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Q-values: [[-0.01216108 -0.03499588  0.01217274  0.02685433 -0.06419464]]
Action: 3, Reward: 0.0, Done: False
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [None]:
outputs = env.step(action)  # Get all outputs from the environment step
state = outputs[0]  # State is always the first element
reward = outputs[1]  # Reward is always the second element
done = outputs[2]  # Done flag is always the third element
info = outputs[3] if len(outputs) > 3 else {}  # Info is the fourth element if it exists, else empty dict