In [1]:
#!pip install torch numpy matplotlib

In [2]:

import os
import numpy as np
# import torch
# import torch.nn as nn
# import torch.optim as optim
import matplotlib.pyplot as plt
# from datetime import datetime
import json
import random
# from collections import deque
import asyncio

# 환경 관련 import
from env.battle_env import YakemonEnv

# 모델 관련 import

# 유틸리티 관련 import

# 에이전트 관련 import
from agent.dddqn_agent import DDDQNAgent

# RL 관련 import

# 데이터 관련 import=

# 컨텍스트 관련 import
from context.battle_store import store
from context.duration_store import duration_store

In [3]:
# 전역 변수 초기화
battle_store = store
duration_store = duration_store

# 하이퍼파라미터 설정
HYPERPARAMS = {
    "learning_rate": 0.001,
    "gamma": 0.99,
    "epsilon_start": 1.0,
    "epsilon_end": 0.01,
    "epsilon_decay": 0.995,
    "batch_size": 64,
    "memory_size": 10000,
    "target_update": 10,
    "num_episodes": 1000,
    "save_interval": 100,
    "test_episodes": 100,
    "state_dim": 126,  # get_state_vector의 출력 차원
    "action_dim": 6,   # 4개의 기술 + 2개의 교체
}

In [4]:
from training import test_agent

In [5]:
from training import train_agent

In [75]:
# 메인 실행 코드
if __name__ == "__main__":
    # 환경 초기화
    env = YakemonEnv()  # 실제 게임 환경
    state_dim = HYPERPARAMS["state_dim"]
    action_dim = HYPERPARAMS["action_dim"]
    
    # DDDQN 에이전트 생성
    ddqn_agent = DDDQNAgent(
        state_dim=state_dim,
        action_dim=action_dim,
        learning_rate=HYPERPARAMS["learning_rate"],
        gamma=HYPERPARAMS["gamma"],
        epsilon_start=HYPERPARAMS["epsilon_start"],
        epsilon_end=HYPERPARAMS["epsilon_end"],
        epsilon_decay=HYPERPARAMS["epsilon_decay"]
    )
    
    # DDDQN 에이전트 학습
    ddqn_rewards, ddqn_losses = await train_agent(
        env=env,
        agent=ddqn_agent,
        num_episodes=HYPERPARAMS["num_episodes"],
        agent_name='ddqn'
    )
    
    # # 학습 결과 시각화
    # plot_training_results(
    #     rewards_history=ddqn_rewards,
    #     losses_history=ddqn_losses,
    #     agent_name='DDDQN'
    # )
    
    # 학습된 에이전트 테스트
    await test_agent(
        env=env,
        agent=ddqn_agent,
        num_episodes=HYPERPARAMS["test_episodes"]
    )

[Episode 1] My Team (BattlePokemon):
{'base': <p_models.pokemon_info.PokemonInfo object at 0x000002094927A2A0>, 'current_hp': 170, 'pp': {'불꽃펀치': 15, '플레어드라이브': 15, '도깨비불': 15, '지진': 10}, 'rank': {'attack': 0, 'sp_attack': 0, 'defense': 0, 'sp_defense': 0, 'speed': 0, 'accuracy': 0, 'dodge': 0, 'critical': 0}, 'status': [], 'position': None, 'is_active': False, 'locked_move': None, 'locked_move_turn': None, 'is_protecting': False, 'used_move': None, 'had_missed': False, 'had_rank_up': False, 'is_charging': False, 'charging_move': None, 'received_damage': None, 'is_first_turn': False, 'cannot_move': False, 'form_num': 0, 'form_condition': None, 'un_usable_move': None, 'lost_type': False, 'temp_type': None, 'substitute': None}
{'base': <p_models.pokemon_info.PokemonInfo object at 0x0000020949278920>, 'current_hp': 147, 'pp': {'악의파동': 15, '유턴': 20, '더스트슈트': 5, '하이드로펌프': 5}, 'rank': {'attack': 0, 'sp_attack': 0, 'defense': 0, 'sp_defense': 0, 'speed': 0, 'accuracy': 0, 'dodge': 0, 'critica

TypeError: apply_move_effect_after_multi_damage() takes from 4 to 5 positional arguments but 6 were given

In [8]:
# 시각화 함수
def plot_training_results(
    rewards_history: list,
    losses_history: list,
    agent_name: str,
    save_path: str = 'results'
) -> None:
    """
    학습 결과 시각화
    """
    os.makedirs(save_path, exist_ok=True)
    
    # 보상 그래프
    plt.figure(figsize=(10, 5))
    plt.plot(rewards_history)
    plt.title(f'{agent_name} Training Rewards')
    plt.xlabel('Episode')
    plt.ylabel('Average Reward')
    plt.savefig(os.path.join(save_path, f'{agent_name}_rewards.png'))
    plt.show()
    
    # 손실 그래프
    plt.figure(figsize=(10, 5))
    plt.plot(losses_history)
    plt.title(f'{agent_name} Training Losses')
    plt.xlabel('Episode')
    plt.ylabel('Average Loss')
    plt.savefig(os.path.join(save_path, f'{agent_name}_losses.png'))
    plt.show()