In [33]:
import os
import gym
from tqdm import tqdm
import numpy as np

# Custom MSE loss function
@tf.keras.utils.register_keras_serializable()
def custom_mse(y_true, y_pred):
    return mean_squared_error(y_true, y_pred)

# Create CNN model
def create_cnn(input_shape, num_actions):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(64, (8, 8), strides=(4, 4), activation='relu'),
        layers.Conv2D(128, (4, 4), strides=(2, 2), activation='relu'),
        layers.Conv2D(256, (3, 3), strides=(2, 2), activation='relu'),
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dense(num_actions, activation='linear')
    ])
    model.compile(optimizer=optimizers.Adam(learning_rate=0.00025), loss=custom_mse)
    return model

# Training function
def train_model(model, episodes, save_interval=10):
    save_dir = "model_weights"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    with tqdm(total=episodes, desc="Episode", unit='episode') as pbar:
        for e in range(episodes):
            state = env.reset()
            state = np.array(state)
            done = False
            total_reward = 0

            while not done:
                if np.random.rand() <= 0.1:
                    action = env.action_space.sample()
                else:
                    q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)
                    action = np.argmax(q_values[0])

                next_state, reward, terminated, truncated, _ = env.step(action)
                next_state = np.array(next_state)
                total_reward += reward
                if terminated or truncated:
                    done = True
                state = next_state

            pbar.update(1)
            pbar.set_description(f"Episode: {e+1}, Reward: {total_reward}")
            episode_rewards.append(total_reward)

            if (e + 1) % save_interval == 0:
                model_path = os.path.join(save_dir, f'model_episode_{e + 1}.h5')
                model.save(model_path)

# Loading and testing function
def test_models(weights_folder, episodes_per_model=50):
    weight_files = [f for f in os.listdir(weights_folder) if f.endswith('.h5')]
    for weight_file in weight_files:
        model_path = os.path.join(weights_folder, weight_file)
        model = load_model(model_path, custom_objects={'custom_mse': custom_mse})
        print(f"Testing model: {weight_file}")

        total_rewards = []
        total_times = []

        for _ in range(episodes_per_model):
            start_time = time.time()
            state = env.reset()
            state = np.array(state)
            done = False
            total_reward = 0

            while not done:
                q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)
                action = np.argmax(q_values[0])
                next_state, reward, terminated, truncated, _ = env.step(action)
                next_state = np.array(next_state)
                total_reward += reward
                if terminated or truncated:
                    done = True
                state = next_state
            episode_time = time.time() - start_time
            total_rewards.append(total_reward)
            total_times.append(episode_time)

        average_reward = np.mean(total_rewards)
        average_time = np.mean(total_times)
        print(f"Model {weight_file} - Average Reward: {average_reward}, Average Time: {average_time} seconds per episode")

env = gym.make('ALE/Frogger-v5', render_mode='rgb_array')
input_shape = env.observation_space.shape
num_actions = env.action_space.n
model = create_cnn(input_shape, num_actions)
train_model(model, 500)  # Train for 500 episodes
env.close()


NameError: name 'models' is not defined