In [166]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import load_model
import os
import gymnasium as gym
import pandas as pd

In [167]:
mountain_env = gym.make("MountainCar-v0")


@tf.keras.utils.register_keras_serializable()
class DQN(tf.keras.models.Model):
    def __init__(self, hidden_layers, output_layer):
        super(DQN, self).__init__()
        self.hidden_layers = hidden_layers
        self.output_layer = output_layer

    def call(self, inputs):
        x = inputs
        for layer in self.hidden_layers:
            x = layer(x)
        return self.output_layer(x)

    def get_config(self):
        config = {
            "hidden_layers": self.hidden_layers,
            "output_layer": self.output_layer,
        }
        return config

    @classmethod
    def from_config(cls, config):
        hidden_layers = []
        for layer in config["hidden_layers"]:
            hidden_layers.append(tf.keras.layers.Dense(**layer["config"]))
        output_layer = tf.keras.layers.Dense(**config["output_layer"]["config"])
        return cls(hidden_layers, output_layer)


def reshape_reward(reward, next_state, current_state, speed_weight):
    return reward + speed_weight * (np.abs(next_state[1]) - np.abs(current_state[1]))


def evaluate_mountain_model(model, speed_weight, evaluation_episodes=100):
    rewards, reshaped_rewards = [], []
    for _ in range(evaluation_episodes):
        state, _ = mountain_env.reset()
        done, truncated = False, False
        accumulated_reward, accumulated_reshaped_reward = 0, 0
        while not done and not truncated:
            state_in = tf.expand_dims(state, axis=0)
            action = tf.argmax(model(state_in)[0]).numpy()
            next_state, reward, done, truncated, _ = mountain_env.step(action)
            accumulated_reward += reward
            accumulated_reshaped_reward += reshape_reward(
                reward, next_state, state, speed_weight
            )
            state = next_state
        rewards.append(accumulated_reward)
        reshaped_rewards.append(accumulated_reshaped_reward)
    return rewards, reshaped_rewards

In [168]:
mountain_models_path = "mountain/models"
mountain_metrics_path = "mountain/metrics"
mountain_hyperparams_path = "mountain/hyperparams"


def load_model_data(model_name_with_extension):
    model_name = model_name_with_extension.split(".")[0]
    metrics = np.load(
        os.path.join(mountain_metrics_path, f"{model_name}_metrics.npy"),
        allow_pickle=True,
    )
    hyperparams = np.load(
        os.path.join(mountain_hyperparams_path, f"{model_name}_hyperparams.npy"),
        allow_pickle=True,
    ).item()
    speed_weight = hyperparams.get("speed_weight", 10)
    model = load_model(
        os.path.join(mountain_models_path, f"{model_name_with_extension}")
    )

    return model_name, metrics, hyperparams, speed_weight, model

In [169]:
mountain_models_paths = os.listdir(mountain_models_path)

data = pd.DataFrame()

for model_name_with_extension in mountain_models_paths:
    model_name, metrics, hyperparams, speed_weight, model = load_model_data(
        model_name_with_extension
    )
    print(model_name)
    evaluate_rewards, evaluate_reshaped_rewards = evaluate_mountain_model(
        model, speed_weight, 10
    )
    units_per_layer = [layer.units for layer in model.hidden_layers]
    activation_per_layer = [layer.activation.__name__ for layer in model.hidden_layers]
    data = data._append(
        {
            "model": model_name,
            "evaluate_max_reward": np.max(evaluate_rewards),
            "evaluate_rewards_mean": np.mean(evaluate_rewards),
            "evaluate_rewards_std": np.std(evaluate_rewards),
            "hidden_layers": len(model.hidden_layers),
            "units_per_layer": units_per_layer,
            "activation_per_layer": activation_per_layer,
            "output_layer_activation": model.output_layer.activation.__name__,
            **hyperparams,
        },
        ignore_index=True,
    )

data.to_csv("mountain/results.csv")

2024-06-15_13-41-53_dqn_mountaincar
[-138.0, -145.0, -147.0, -140.0, -110.0, -138.0, -140.0, -140.0, -144.0, -111.0]
2024-06-15_16-46-16_dqn_mountaincar
[-111.0, -111.0, -142.0, -114.0, -124.0, -117.0, -121.0, -142.0, -142.0, -116.0]
2024-06-15_12-43-26_dqn_mountaincar
[-147.0, -147.0, -200.0, -168.0, -143.0, -143.0, -169.0, -169.0, -146.0, -147.0]
2024-06-15_13-02-52_dqn_mountaincar
[-141.0, -144.0, -149.0, -144.0, -140.0, -112.0, -139.0, -140.0, -137.0, -141.0]
2024-06-15_12-35-51_dqn_mountaincar
[-141.0, -85.0, -145.0, -152.0, -90.0, -113.0, -113.0, -112.0, -141.0, -84.0]
2024-06-15_12-43-21_dqn_mountaincar
[-113.0, -148.0, -83.0, -109.0, -109.0, -153.0, -147.0, -148.0, -112.0, -109.0]
2024-06-15_10-54-10_dqn_mountaincar
[-145.0, -144.0, -140.0, -108.0, -109.0, -109.0, -112.0, -112.0, -141.0, -109.0]
2024-06-15_11-57-24_dqn_mountaincar
[-119.0, -160.0, -118.0, -118.0, -151.0, -117.0, -153.0, -160.0, -153.0, -172.0]
2024-06-15_13-39-22_dqn_mountaincar
[-149.0, -154.0, -154.0, -148.0,