# üöÄ AGGRESSIVE PERFORMANCE IMPROVEMENTS

**Goal:** Get 40-50%+ win rate and positive final score

**Strategy:**
1. Fix HMM to give MUCH better predictions
2. Add frequency-based smart guessing
3. If still bad, implement DQN

**Current:** 19% win rate, -55K score ‚ùå
**Target:** 45%+ win rate, +50K score ‚úÖ

In [None]:
import sys
sys.path.append('../src')

import numpy as np
import pickle
import random
from collections import Counter, defaultdict
from tqdm import tqdm

from hmm_model import HangmanHMM
from hangman_env import HangmanEnv
from utils import calculate_final_score

# Load test words
test_path = '../Data/test.txt'
with open(test_path, 'r', encoding='utf-8') as f:
    test_words = [line.strip().lower() for line in f if line.strip()]
test_words = [''.join(c for c in word.lower() if c.isalpha()) for word in test_words]
test_words = [w for w in test_words if len(w) > 0]

print(f"Loaded {len(test_words)} test words")

# Load corpus for analysis
with open('../results/preprocessed_data.pkl', 'rb') as f:
    preprocessed_data = pickle.load(f)
corpus_words = preprocessed_data['words']
print(f"Loaded {len(corpus_words)} corpus words")

## Step 1: Build MUCH SMARTER HMM-based Agent

Key improvements:
- Use FULL word matching (not just patterns)
- Filter corpus by word length
- Use actual letter frequencies from matching words
- Pattern-based filtering

In [None]:
class SmartHangmanAgent:
    """Much smarter agent using word list filtering."""
    
    def __init__(self, corpus_words):
        self.corpus_words = corpus_words
        self.alphabet = 'abcdefghijklmnopqrstuvwxyz'
        
        # Group corpus by length for faster lookup
        self.words_by_length = defaultdict(list)
        for word in corpus_words:
            self.words_by_length[len(word)].append(word)
        
        # Overall letter frequency (fallback)
        all_letters = ''.join(corpus_words)
        letter_counts = Counter(all_letters)
        total = sum(letter_counts.values())
        self.global_freq = {char: letter_counts.get(char, 0) / total for char in self.alphabet}
    
    def get_matching_words(self, masked_word, guessed_letters):
        """Get all corpus words matching the current pattern."""
        word_length = len(masked_word)
        candidates = self.words_by_length.get(word_length, [])
        
        matches = []
        for word in candidates:
            # Check if word matches pattern
            if self._matches_pattern(word, masked_word, guessed_letters):
                matches.append(word)
        
        return matches
    
    def _matches_pattern(self, word, masked_word, guessed_letters):
        """Check if word matches the masked pattern."""
        if len(word) != len(masked_word):
            return False
        
        for i, (w_char, m_char) in enumerate(zip(word, masked_word)):
            if m_char is not None:
                # Position has been revealed
                if w_char != m_char:
                    return False
            else:
                # Position is blank - make sure it's not a guessed letter
                if w_char in guessed_letters:
                    return False
        
        return True
    
    def predict_letter(self, masked_word, guessed_letters):
        """Predict best letter to guess."""
        # Get matching words
        matches = self.get_matching_words(masked_word, guessed_letters)
        
        if len(matches) == 0:
            # No matches - fall back to global frequency
            available = [(c, self.global_freq[c]) for c in self.alphabet if c not in guessed_letters]
            if available:
                return max(available, key=lambda x: x[1])[0]
            return random.choice([c for c in self.alphabet if c not in guessed_letters])
        
        # Count letter frequencies in matching words
        letter_counts = Counter()
        for word in matches:
            for char in word:
                if char not in guessed_letters:
                    letter_counts[char] += 1
        
        # Return most common letter
        if letter_counts:
            return letter_counts.most_common(1)[0][0]
        
        # Fallback
        return random.choice([c for c in self.alphabet if c not in guessed_letters])

print("‚úì SmartHangmanAgent defined")

In [None]:
# Test the smart agent
smart_agent = SmartHangmanAgent(corpus_words)

def play_game_smart(agent, word):
    """Play game with smart agent."""
    env = HangmanEnv(word, max_lives=6)
    state = env.reset()
    
    while not env.done:
        masked_list = env.get_masked_word_list()
        action = agent.predict_letter(masked_list, env.guessed_letters)
        
        if action is None:
            available = [c for c in 'abcdefghijklmnopqrstuvwxyz' if c not in env.guessed_letters]
            if available:
                action = available[0]
            else:
                break
        
        next_state, reward, done, info = env.step(action)
        
        if done:
            break
    
    stats = env.get_stats()
    return {
        'won': env.won,
        'wrong_guesses': stats['wrong_count'],
        'repeated_guesses': stats['repeated_count']
    }

print("\nüöÄ TESTING SMART AGENT ON FULL TEST SET (2000 words)")
print("="*60)

smart_results = []
for word in tqdm(test_words[:2000], desc="Smart Agent"):
    result = play_game_smart(smart_agent, word)
    smart_results.append(result)

# Calculate metrics
smart_wins = sum(1 for r in smart_results if r['won'])
smart_rate = smart_wins / len(smart_results)
smart_wrong = sum(r['wrong_guesses'] for r in smart_results)
smart_repeated = sum(r['repeated_guesses'] for r in smart_results)
smart_score = calculate_final_score(smart_rate, smart_wrong, smart_repeated, len(smart_results))

print("\n" + "="*60)
print("SMART AGENT RESULTS")
print("="*60)
print(f"Win Rate: {smart_rate:.4f} ({smart_rate*100:.2f}%)")
print(f"Total Wrong: {smart_wrong}")
print(f"Avg Wrong/Game: {smart_wrong/len(smart_results):.2f}")
print(f"Total Repeated: {smart_repeated}")
print(f"FINAL SCORE: {smart_score:.2f}")
print("="*60)

## Step 2: If Smart Agent Works, Enhance with RL

If smart agent gets 40%+, we can use it as the new baseline and add RL on top.

In [None]:
# Save smart agent if it's good
if smart_rate > 0.35:  # If better than 35%
    print(f"\n‚úÖ SMART AGENT IS EXCELLENT! ({smart_rate*100:.1f}% win rate)")
    print("Saving as new baseline...")
    
    smart_agent_data = {
        'agent': smart_agent,
        'win_rate': smart_rate,
        'score': smart_score
    }
    
    with open('../models/smart_agent.pkl', 'wb') as f:
        pickle.dump(smart_agent_data, f)
    
    print("‚úì Saved to ../models/smart_agent.pkl")
    print("\nThis is now your best model!")
else:
    print(f"\n‚ö†Ô∏è Smart agent got {smart_rate*100:.1f}% - need to try DQN...")

## Step 3: If Needed - Implement Deep Q-Network (DQN)

Only run this if smart agent still doesn't achieve 40%+ win rate.

In [None]:
# DQN implementation - only if needed
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque

class DQNetwork(nn.Module):
    """Deep Q-Network for Hangman."""
    
    def __init__(self, state_dim, action_dim, hidden_dim=256):
        super(DQNetwork, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(state_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, 128),
            nn.ReLU(),
            nn.Linear(128, action_dim)
        )
    
    def forward(self, x):
        return self.network(x)

class DQNAgent:
    """DQN Agent for Hangman."""
    
    def __init__(self, state_dim=26*2+6, action_dim=26, learning_rate=0.001):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Q-networks
        self.q_network = DQNetwork(state_dim, action_dim).to(self.device)
        self.target_network = DQNetwork(state_dim, action_dim).to(self.device)
        self.target_network.load_state_dict(self.q_network.state_dict())
        
        self.optimizer = optim.Adam(self.q_network.parameters(), lr=learning_rate)
        self.memory = deque(maxlen=50000)
        
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.batch_size = 64
        self.alphabet = 'abcdefghijklmnopqrstuvwxyz'
    
    def encode_state(self, masked_word, guessed_letters, smart_probs):
        """Encode state as vector."""
        # Masked word encoding (26-dim: letter frequencies in blanks)
        blank_positions = [i for i, c in enumerate(masked_word) if c is None]
        word_encoding = np.zeros(26)
        
        # Guessed letters (26-dim binary)
        guessed_encoding = np.zeros(26)
        for char in guessed_letters:
            idx = ord(char) - ord('a')
            if 0 <= idx < 26:
                guessed_encoding[idx] = 1.0
        
        # Smart agent probabilities (26-dim)
        smart_encoding = np.array([smart_probs.get(c, 0.0) for c in self.alphabet])
        
        # Combine: 26 + 26 + 26 = 78 dim
        state = np.concatenate([word_encoding, guessed_encoding, smart_encoding])
        return torch.FloatTensor(state).to(self.device)
    
    def select_action(self, state, available_actions):
        """Epsilon-greedy action selection."""
        if random.random() < self.epsilon:
            return random.choice(available_actions)
        
        with torch.no_grad():
            q_values = self.q_network(state.unsqueeze(0))
            
            # Mask unavailable actions
            mask = torch.full((26,), float('-inf')).to(self.device)
            for action in available_actions:
                idx = ord(action) - ord('a')
                mask[idx] = q_values[0, idx]
            
            best_idx = torch.argmax(mask).item()
            return chr(ord('a') + best_idx)

print("‚úì DQN implementation ready (if needed)")

In [None]:
# Decision point: use smart agent or train DQN
print("="*60)
print("DECISION POINT")
print("="*60)

if smart_rate >= 0.40:
    print(f"\nüéâ SMART AGENT IS EXCELLENT: {smart_rate*100:.1f}% win rate!")
    print(f"   Final Score: {smart_score:.2f}")
    print("\n‚úÖ USE THIS AS YOUR FINAL MODEL!")
    print("\nNo need for DQN - smart word matching is the key!")
elif smart_rate >= 0.30:
    print(f"\n‚úì Smart agent is decent: {smart_rate*100:.1f}% win rate")
    print(f"  Final Score: {smart_score:.2f}")
    print("\n‚Üí Could try adding RL on top to boost to 40%+")
else:
    print(f"\n‚ö†Ô∏è Smart agent: {smart_rate*100:.1f}% win rate")
    print(f"  Final Score: {smart_score:.2f}")
    print("\n‚Üí Need to implement and train DQN...")
    print("   (This will take 1-2 hours)")

print("\n" + "="*60)