In [None]:
import optuna
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
import pandas as pd
import sys
import os
current_working_dir = os.getcwd()
project_path = os.path.abspath(os.path.join(current_working_dir, ".."))
project_path = project_path.replace("notebooks", "scripts")
print(project_path)

if project_path not in sys.path:
    sys.path.append(project_path)

from environment import TradingEnv
print("Imported TradingEnv successfully.")


# Function to optimize PPO hyperparameters
def optimize_ppo(trial, data):
    # Tối ưu hóa các siêu tham số của PPO
    n_steps = trial.suggest_categorical('n_steps', [512, 1024, 2048])
    gamma = trial.suggest_loguniform('gamma', 0.95, 0.999)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)
    clip_range = trial.suggest_uniform('clip_range', 0.1, 0.3)
    gae_lambda = trial.suggest_uniform('gae_lambda', 0.9, 0.95)
    ent_coef = trial.suggest_loguniform('ent_coef', 1e-8, 1e-2)
    vf_coef = trial.suggest_uniform('vf_coef', 0.1, 0.9)

    # Create the trading environment
    env = DummyVecEnv([lambda: TradingEnv(data)])

    # Train PPO model with trial-specific hyperparameters
    model = PPO(
        "MlpPolicy",
        env,
        learning_rate=learning_rate,
        n_steps=n_steps,
        gamma=gamma,
        clip_range=clip_range,
        gae_lambda=gae_lambda,
        ent_coef=ent_coef,
        vf_coef=vf_coef,
        verbose=0,
    )
    
    # Train the model for a short duration to evaluate its performance
    model.learn(total_timesteps=5000)

    # Evaluate the model by calculating a performance metric
    obs = env.reset()
    total_reward = 0
    for _ in range(len(data) // 2):  # Evaluate on half the dataset
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)  # Updated here to 4 values
        total_reward += reward
        if done:
            obs = env.reset()
    
    # Return the performance metric (higher is better)
    return total_reward


# Define dataset and load data

index = current_working_dir.find("trading_bot_rl_ppo")

# Cắt chuỗi đến vị trí kết thúc của "trading_bot_rl_ppo"
if index != -1:
    base_path = current_working_dir[:index + len("trading_bot_rl_ppo")]
else:
    base_path = current_working_dir  # Nếu không tìm thấy, giữ nguyên
path_data=r"data\processed\du_lieu_phan_tich\2015\du_lieu_vang_phan_tich_2015_01.csv"
data_path = os.path.join(base_path,path_data)
data = pd.read_csv(data_path)

# Optimize hyperparameters using Optuna
print("Starting hyperparameter optimization...")
study = optuna.create_study(direction="maximize")
study.optimize(lambda trial: optimize_ppo(trial, data), n_trials=5)

# Print the best hyperparameters found
print("Best hyperparameters:")
print(study.best_params)


  from .autonotebook import tqdm as notebook_tqdm
[I 2024-12-10 14:15:01,677] A new study created in memory with name: no-name-14f6cfd3-c64b-45ba-a185-8392d1617187


Starting hyperparameter optimization...


  gamma = trial.suggest_loguniform('gamma', 0.95, 0.999)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)
  clip_range = trial.suggest_uniform('clip_range', 0.1, 0.3)
  gae_lambda = trial.suggest_uniform('gae_lambda', 0.9, 0.95)
  ent_coef = trial.suggest_loguniform('ent_coef', 1e-8, 1e-2)
  vf_coef = trial.suggest_uniform('vf_coef', 0.1, 0.9)
[I 2024-12-10 14:15:44,507] Trial 0 finished with value: 0.0 and parameters: {'n_steps': 2048, 'gamma': 0.9797733615331293, 'learning_rate': 2.9479096437114144e-05, 'clip_range': 0.1436051657123369, 'gae_lambda': 0.9079628517578694, 'ent_coef': 8.722763836081246e-05, 'vf_coef': 0.20723400259777877}. Best is trial 0 with value: 0.0.
  gamma = trial.suggest_loguniform('gamma', 0.95, 0.999)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)
  clip_range = trial.suggest_uniform('clip_range', 0.1, 0.3)
  gae_lambda = trial.suggest_uniform('gae_lambda', 0.9, 0.95)
  ent_coef = trial.suggest_loguniform('ent_co

Best hyperparameters:
{'n_steps': 2048, 'gamma': 0.9797733615331293, 'learning_rate': 2.9479096437114144e-05, 'clip_range': 0.1436051657123369, 'gae_lambda': 0.9079628517578694, 'ent_coef': 8.722763836081246e-05, 'vf_coef': 0.20723400259777877}
