In [None]:
%pip install gym-super-mario-bros
%pip install -r requirements.txt 
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
%pip install 'stable-baselines4[extra]'
%pip install optuna
%pip install openpyxl

In [None]:
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from stable_baselines3.common.monitor import Monitor
from gym.wrappers import GrayScaleObservation
import torch

In [None]:
from nes_py.wrappers import JoypadSpace
import gym
from stable_baselines3 import PPO
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
env = gym.make('SuperMarioBros-v0') 
env = JoypadSpace(env, SIMPLE_MOVEMENT)
# Grey scale
env = GrayScaleObservation(env, keep_dim=True)
env = DummyVecEnv([lambda: env])
env = VecFrameStack(env, 4, channels_order='last')

In [None]:
def evaluate_model(model, env, num_episodes=5):
    """
    Evaluate the performance of the model on a given environment.

    Parameters:
    - model: The trained model to evaluate.
    - env: The environment on which to evaluate the model.
    - num_episodes: Number of episodes to run for the evaluation.

    Returns:
    - avg_reward: The average reward obtained over the evaluation episodes.
    """
    total_rewards = 0.0
    for ep in range(num_episodes):
        obs = env.reset()
        done = False
        rewards = 0.0
        while not done:
            action = model.predict(obs)[0]
            obs, reward, done, info = env.step(action)
            # print(reward)
            rewards += reward[0]
            if done:
                break
        total_rewards += rewards
        print(f"Episode {ep+1}: {rewards=:.2f}")
    avg_reward = total_rewards / num_episodes
    return avg_reward


In [None]:
import optuna

def objective(trial):
    lr = trial.suggest_uniform('lr', 1e-5, 8e-5)
    batch_size = trial.suggest_categorical('batch_size', [512, 1024, 2048])
    n_epochs = trial.suggest_int('n_epochs', 4, 10)
    gae_lambda = trial.suggest_uniform('gae_lambda', 0.9, 0.95)
    clip_range = trial.suggest_uniform('clip_range', 0.26, 0.3)
    n_steps = trial.suggest_categorical('n_steps', [512, 1024])
    gamma = trial.suggest_loguniform('gamma', 0.99, 0.999)
    ent_coef = trial.suggest_loguniform('ent_coef', 0.00000001, 0.1)
    hidden_layer_sizes = trial.suggest_categorical('hidden_layer_sizes', [64, 128, 256, 512])
    policy_kwargs = dict(activation_fn=torch.nn.ReLU, net_arch=[hidden_layer_sizes, hidden_layer_sizes])
    # Initialize and train model with suggested hyperparameters
    model = PPO("CnnPolicy", 
                env, 
                verbose=1, 
                policy_kwargs=policy_kwargs, 
                gae_lambda=gae_lambda,
                learning_rate=lr, n_steps=n_steps, 
                batch_size=batch_size, n_epochs=n_epochs, 
                clip_range=clip_range,
                ent_coef=ent_coef, gamma=gamma, 
                tensorboard_log="logs", 
                seed=43)
    
    model.learn(total_timesteps=45_000, tb_log_name="trial_{}".format(trial.number))
    
    # Evaluate the model
    rewards = evaluate_model(model, env, num_episodes=3)
    return rewards

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=15)


In [None]:
best_params = study.best_params  # Get the best hyperparameters
print("Best hyperparameters:", best_params)

# Optionally, save best hyperparameters to a file
import json
with open("best_hyperparameters2.json", "a") as f:
    json.dump(best_params, f)


In [None]:
sorted_trials = sorted(study.trials, key=lambda trial: trial.value, reverse=study.direction == 'maximize')

with open("Trials4.txt", "a") as f:
    for trial in sorted_trials:
        f.write(str(trial))
        f.write("\n")


In [None]:
import pandas as pd

trial_data = {
    "Trial Number": [],
    "State": [],
    "Values": [],
    "Datetime Start": [],
    "Datetime Complete": [],
    "Learning Rate (lr)": [],
    "Batch Size": [],
    "Epochs (n_epochs)": [],
    "Clip Range": [],
    "Steps (n_steps)": [],
    "Gamma": [],
    "ent_coef": [],
    "gae_lambda": [],
    "hidden_layer_sizes": [],
}

for trial in study.trials:  # Assuming study is your Optuna study variable
    trial_data["Trial Number"].append(trial.number)
    trial_data["State"].append(trial.state)
    trial_data["Values"].append(trial.values)
    trial_data["Datetime Start"].append(trial.datetime_start)
    trial_data["Datetime Complete"].append(trial.datetime_complete)
    trial_data["Learning Rate (lr)"].append(trial.params["lr"])
    trial_data["Batch Size"].append(trial.params["batch_size"])
    trial_data["Epochs (n_epochs)"].append(trial.params["n_epochs"])
    trial_data["Clip Range"].append(trial.params["clip_range"])
    trial_data["Steps (n_steps)"].append(trial.params["n_steps"])
    trial_data["Gamma"].append(trial.params["gamma"])
    trial_data["ent_coef"].append(trial.params["ent_coef"])
    trial_data["gae_lambda"].append(trial.params["gae_lambda"])
    trial_data["hidden_layer_sizes"].append(trial.params["hidden_layer_sizes"])

df_trials = pd.DataFrame(trial_data)

excel_path = "optuna_trials3.xlsx"
df_trials.to_excel(excel_path, index=False)

print(f"Excel file saved at: {excel_path}")
