In [None]:
from ConnectFourBoard import ConnectFourBoard
from QLearningAgent import DQNAgent
import numpy as np
import random
import torch
import matplotlib.pyplot as plt

In [None]:
def train_agent(agent, episodes=5000, eval_every=1000, eval_games=100):
    rewards = []
    win_percentages = []

    for ep in range(1, episodes + 1):
        state, _ = agent.env.reset()
        done = False
        total_reward = 0

        while not done:
            if agent.env.current_player == 1:
                # Agent's turn
                action = agent.select_action(state)
                next_state, reward, done, _, _ = agent.env.step(action)
                agent.store_transition(state, action, reward, next_state, done)
                agent.update()
                state = next_state
            else:
                # Opponent is also the agent (self-play)
                action = agent.select_action(state)
                state, reward, done, _, _ = agent.env.step(action)

        total_reward = agent.env.winner
        rewards.append(total_reward)

        agent.epsilon = max(agent.epsilon_end,
                            agent.epsilon * agent.epsilon_decay)

        if ep % agent.target_update_freq == 0:
            agent.target_net.load_state_dict(agent.q_net.state_dict())

        # Evaluate against random baseline
        if ep % eval_every == 0:
            win_count = 0
            for _ in range(eval_games):
                state, _ = agent.env.reset()
                done = False
                while not done:
                    if agent.env.current_player == 1:
                        action = agent.select_action(state)
                    else:
                        action = random.choice(
                            agent.env.get_available_actions())
                    state, reward, done, _, _ = agent.env.step(action)

                if agent.env.winner == 1:
                    win_count += 1

            win_percentage = (win_count / eval_games) * 100
            win_percentages.append(win_percentage)
            print(
                f"Episode {ep}: Agent won {win_percentage:.2f}% against random baseline")
    plt.plot(range(eval_every, episodes + 1, eval_every), win_percentages)
    plt.xlabel("Training Episodes")
    plt.ylabel("Win % vs Random Baseline")
    plt.title("DQN Agent Evaluation Performance")
    plt.grid(True)
    plt.show()

    return rewards, win_percentages

100%|██████████| 10/10 [00:00<00:00, 11.25it/s]


In [None]:
env = ConnectFourBoard()
agent = DQNAgent(env, device="cuda" if torch.cuda.is_available() else "cpu")

: 

In [None]:
rewards, win_percentages = train_agent(
    agent, episodes=10000, eval_every=1000, eval_games=100)