In [1]:
# !pip install torch numpy matplotlib
# !pip install nest-asyncio
# !pip install gymnasium
# !pip install gymnasium[accept-rom-license]
# !pip install gym

# 라이브러리 설치, 아래는 터미널. root 권한이 아닐 경우 sudo 붙여서 실행
# apt-get update
# apt-get install -y libgl1-mesa-glx

In [2]:
import asyncio
import nest_asyncio
import os
import json

# 환경 관련 import
from env.battle_env import YakemonEnv

# 모델 관련 import

# 유틸리티 관련 import
from utils.visualization import plot_training_results

# 에이전트 관련 import
from agent.dddqn_agent import DDDQNAgent


# 컨텍스트 관련 import
from context.battle_store import store
from context.duration_store import duration_store

In [3]:
# 전역 변수 초기화
battle_store = store
duration_store = duration_store

# 하이퍼파라미터 설정
HYPERPARAMS = {
    "learning_rate": 0.0001,
    "gamma": 0.99,
    "epsilon_start": 1.0,
    "epsilon_end": 0.02,
    "epsilon_decay": 0.995,
    "batch_size": 128,
    "memory_size": 50000,
    "target_update": 20,
    "num_episodes": 1000,
    "save_interval": 50,
    "test_episodes": 300,
    "state_dim": 1165,  # get_state_vector의 출력 차원
    "action_dim": 6,   # 4개의 기술 + 2개의 교체
    "load_best_model": True, # 최고 성능 모델 이어서 사용할건지 말건지 
    "load_last_model": False # 마지막 모델 이어서 사용할건지 말건지 
}

In [4]:
from training_dqn import test_agent
from training_dqn import train_agent
# 메인 실행 코드
from datetime import datetime

In [5]:
# 메인 실행 코드
from utils.visualization import capture_output


if __name__ == "__main__":
    # Jupyter에서 중첩된 이벤트 루프 허용
    nest_asyncio.apply()
    
    # 결과 저장 디렉토리 설정
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    results_dir = os.path.join('results', f'training_{timestamp}')
    models_dir = os.path.join('models', f'training_{timestamp}')
    
    # 환경 초기화
    env = YakemonEnv()  # 실제 게임 환경
    state_dim = HYPERPARAMS["state_dim"]
    action_dim = HYPERPARAMS["action_dim"]
    
    # DDDQN 에이전트 생성
    ddqn_agent = DDDQNAgent(
        state_dim=state_dim,
        action_dim=action_dim,
        learning_rate=HYPERPARAMS["learning_rate"],
        gamma=HYPERPARAMS["gamma"],
        epsilon_start=HYPERPARAMS["epsilon_start"],
        epsilon_end=HYPERPARAMS["epsilon_end"],
        epsilon_decay=HYPERPARAMS["epsilon_decay"],
        target_update=HYPERPARAMS["target_update"],
        memory_size=HYPERPARAMS["memory_size"],
        batch_size=HYPERPARAMS["batch_size"]
    )
    
    print("Starting DDDQN training...")
    print(f"Results will be saved in: {results_dir}")
    print(f"Models will be saved in: {models_dir}")
    print("\nHyperparameters:")
    for key, value in HYPERPARAMS.items():
        print(f"  {key}: {value}")
    print("\n" + "="*50 + "\n")
    
    # DDDQN 에이전트 학습
    with capture_output() as output:
        ddqn_rewards, ddqn_losses, ddqn_victories = asyncio.run(train_agent(
            env=env,
            agent=ddqn_agent,
            num_episodes=HYPERPARAMS["num_episodes"],
            save_path=models_dir,
            agent_name='ddqn',
            HYPERPARAMS=HYPERPARAMS
        ))
    
    # 학습 결과 시각화
    log_lines = output.getvalue().splitlines()
    plot_training_results(
        rewards_history=ddqn_rewards,
        losses_history=ddqn_losses,
        agent_name='DDDQN',
        save_path=results_dir,
        victories_history=ddqn_victories,  # 승리 기록 추가
        log_lines=log_lines
    )
    
    print("\nTraining completed!")
    print(f"Results saved in: {results_dir}")
    print(f"Models saved in: {models_dir}")
    
    # 학습된 에이전트 테스트
    print("\nStarting test phase...")
    test_results = asyncio.run(test_agent(
        env=env,
        agent=ddqn_agent,
        num_episodes=HYPERPARAMS["test_episodes"],
        HYPERPARAMS=HYPERPARAMS
    ))
    
    # 테스트 결과 저장
    test_stats = {
        'avg_reward': test_results[0],
        'std_reward': test_results[1],
        'avg_steps': test_results[2],
        'victories': test_results[3],
        'win_rate': test_results[4]
    }
    
    with open(os.path.join(results_dir, 'test_results.json'), 'w') as f:
        json.dump(test_stats, f, indent=4)
    
    with open(os.path.join(results_dir, 'test_results.txt'), 'w') as f:
        f.write("Test Results\n")
        f.write("=" * 50 + "\n\n")
        f.write(f"Average Reward: {test_stats['avg_reward']:.4f} ± {test_stats['std_reward']:.4f}\n")
        f.write(f"Average Steps: {test_stats['avg_steps']:.2f}\n")
        f.write(f"Victories: {test_stats['victories']}/{HYPERPARAMS['test_episodes']} (Win Rate: {test_stats['win_rate']:.1f}%)\n")
    
    print("\nTest completed!")
    print(f"Test results saved in: {results_dir}")

reset_pp: {'에너지볼': 10, '광합성': 5, '씨뿌리기': 10, '기가드레인': 15}
reset_pp: {'용의파동': 15, '폭풍': 10, '오버히트': 5, '원시의힘': 5}
reset_pp: {'냉동빔': 10, '아쿠아제트': 20, '하이드로펌프': 5, '악의파동': 15}
reset_pp: {'치근거리기': 10, '볼트체인지': 20, '10만볼트': 15, '파도타기': 15}
reset_pp: {'대지의힘': 10, '냉동펀치': 15, '엄청난힘': 5, '번개펀치': 15}
reset_pp: {'깨물어부수기': 15, '니트로차지': 20, '치근거리기': 10, '플레어드라이브': 15}
battle_store: reset_all 호출
duration_store: reset_all 호출
Starting DDDQN training...
Results will be saved in: results/training_20250530_180902
Models will be saved in: models/training_20250530_180902

Hyperparameters:
  learning_rate: 0.0001
  gamma: 0.99
  epsilon_start: 1.0
  epsilon_end: 0.02
  epsilon_decay: 0.995
  batch_size: 128
  memory_size: 50000
  target_update: 20
  num_episodes: 1000
  save_interval: 50
  test_episodes: 300
  state_dim: 1165
  action_dim: 6
  load_best_model: False
  load_last_model: False


reset_pp: {'용의춤': 20, '지진': 10, '개척하기': 20, '엄청난힘': 5}
reset_pp: {'로킥': 20, '개척하기': 20, '기습': 5, '속이기': 20}
reset_p

KeyboardInterrupt: 

In [6]:
#%reset