In [None]:
# --- 1. 환경 설치 (Colab에서) ---
!pip install torch

In [None]:
# --- 2. 필요한 파일 복사 ---
# (여기서는 yakemon_rl_project/ 폴더를 이미 올려놓았다고 가정할게)

In [None]:
# --- 3. 라이브러리 임포트 ---
from env.battle_env import YakemonEnv
from agent.dddqn_agent import DDDQNAgent
from utils.replay_buffer import ReplayBuffer

In [None]:
# --- 4. 환경/에이전트 초기화 ---
env = YakemonEnv()
state_dim = env.get_state().shape[0]
action_dim = 6  # (4 기본 기술 + 2 교체)

agent = DDDQNAgent(state_dim, action_dim)
buffer = ReplayBuffer(10000)

In [None]:
# --- 5. 학습 파라미터 ---
episodes = 500
batch_size = 32

In [None]:
# --- 6. 학습 루프 ---
reward_list = []

for episode in range(episodes):
    state = env.reset()
    total_reward = 0
    done = False
    epsilon = max(0.01, 0.1 - 0.01*(episode/100))

    while not done:
        action = agent.select_action(state, epsilon)
        next_state, reward, done, _ = env.step(action)
        buffer.push((state, action, reward, next_state, done))
        state = next_state
        total_reward += reward

        if len(buffer.buffer) >= batch_size:
            agent.update(buffer.sample(batch_size))

    reward_list.append(total_reward)

    if episode % 10 == 0:
        print(f"Episode {episode} - Total Reward: {total_reward:.2f}")

In [None]:
# --- 7. 결과 시각화 (선택) ---
import matplotlib.pyplot as plt
plt.plot(reward_list)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Yakemon DDDQN Training')
plt.show()