# RL Agent Implementation

This notebook demonstrates the RL agent implementation and basic functionality.


In [1]:
import sys
sys.path.append('../src')

import numpy as np
import pickle
from rl_agent import QLearningAgent, HMGPriorAgent
from hmm_model import HangmanHMM
from hangman_env import HangmanEnv
from utils import encode_state

# Load HMM model
hmm = HangmanHMM()
hmm.load('../models/hmm_model.pkl')
print("HMM model loaded")

# Initialize RL agent
agent = QLearningAgent(
    learning_rate=0.1,
    discount_factor=0.95,
    epsilon=1.0,
    epsilon_decay=0.995,
    epsilon_min=0.01
)
print(f"RL agent initialized with epsilon={agent.epsilon}")


Model loaded from ../models/hmm_model.pkl
HMM model loaded
RL agent initialized with epsilon=1.0


In [2]:
# Test agent action selection
print("Testing agent action selection:")

# Create a simple test state
test_word = "test"
env = HangmanEnv(test_word, max_lives=6)
state = env.reset()

masked_list = env.get_masked_word_list()
hmm_probs = hmm.predict_letter_probabilities(masked_list, env.guessed_letters, len(test_word))
state_features = encode_state(masked_list, env.guessed_letters, hmm_probs, 
                             env.lives, len(test_word))

action = agent.select_action(state_features, hmm_probs, env.guessed_letters)
print(f"Selected action: {action}")
print(f"Current epsilon: {agent.get_epsilon():.4f}")

# Update Q-value (example)
next_state, reward, done, info = env.step(action)
next_masked = env.get_masked_word_list()
next_hmm_probs = hmm.predict_letter_probabilities(next_masked, env.guessed_letters, len(test_word))
next_state_features = encode_state(next_masked, env.guessed_letters, next_hmm_probs,
                                   env.lives, len(test_word))

agent.update_q_value(state_features, action, reward, next_state_features,
                     next_hmm_probs, env.guessed_letters, done, current_hmm_probs=hmm_probs)

print(f"Reward received: {reward:.2f}")
print(f"Q-table size: {len(agent.q_table)} entries")
print("✓ Agent test completed")


Testing agent action selection:
Selected action: d
Current epsilon: 1.0000
Reward received: -1.00
Q-table size: 2 entries
✓ Agent test completed
