### Mount Drive & Import Libraries

In [15]:
import os
import json
import string
import random
import numpy as np
import pprint
from google.colab import drive
from collections import defaultdict
import re  # We need regex for the HMM oracle

# --- Mount Drive ---
drive.mount('/content/drive')

# --- Configuration ---
BASE_PATH = '/content/drive/My Drive/ml-hackathon'
DATA_PATH = os.path.join(BASE_PATH, 'data')
MODEL_PATH = os.path.join(BASE_PATH, 'models')

# --- Input Files ---
# We need ALL our files for the final run
CORPUS_JSON_PATH = os.path.join(DATA_PATH, 'corpus_by_length.json')
TEST_JSON_PATH = os.path.join(DATA_PATH, 'test_by_length.json')
HMM_MODEL_PATH = os.path.join(MODEL_PATH, 'hmm_probabilities.json')
Q_TABLE_PATH = os.path.join(MODEL_PATH, 'q_table.json')

print("--- Evaluation Notebook ---")
print(f"Base path set to: {BASE_PATH}")
print("Loading all data and trained models...")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
--- Evaluation Notebook ---
Base path set to: /content/drive/My Drive/ml-hackathon
Loading all data and trained models...


### Load All Data & Models

In [17]:
def load_json_file(file_path):
    """Loads a JSON file from the given path."""
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
        print(f"Successfully loaded: {file_path}")
        return data
    except FileNotFoundError:
        print(f"ERROR: File not found at {file_path}")
    except Exception as e:
        print(f"An error occurred loading {file_path}: {e}")
    return None

# --- Load All Data ---
corpus_by_length = load_json_file(CORPUS_JSON_PATH)
test_by_length = load_json_file(TEST_JSON_PATH)
hmm_models = load_json_file(HMM_MODEL_PATH)
q_table = load_json_file(Q_TABLE_PATH) # <-- Loading our trained brain

# --- Data Correction ---
if corpus_by_length:
    corpus_by_length = {int(k): v for k, v in corpus_by_length.items()}
if test_by_length:
    test_by_length = {int(k): v for k, v in test_by_length.items()}
if hmm_models:
    hmm_models = {int(k): v for k, v in hmm_models.items()}

if corpus_by_length and test_by_length and hmm_models and q_table:
    print("\nAll data and models loaded successfully.")
else:
    print("\nError: Failed to load one or more essential files.")

# --- Constants ---
ALPHABET = list(string.ascii_uppercase)
LIVES_ALLOWED = 6

Successfully loaded: /content/drive/My Drive/ml-hackathon/data/corpus_by_length.json
Successfully loaded: /content/drive/My Drive/ml-hackathon/data/test_by_length.json
Successfully loaded: /content/drive/My Drive/ml-hackathon/models/hmm_probabilities.json
Successfully loaded: /content/drive/My Drive/ml-hackathon/models/q_table.json

All data and models loaded successfully.


### Re-define the Hangman Environment

In [18]:
class HangmanEnvironment:
    """
    This class implements the Hangman game environment.
    We add a 'set_word' method for evaluation.
    """

    def __init__(self, word_list, lives=6):
        # word_list is not needed if we set the word manually
        self.word_list = word_list
        self.total_lives = lives
        self.secret_word = ""
        self.masked_word = []
        self.lives_left = 0
        self.guessed_letters = set()
        self.game_over = True
        self.word_length = 0

    def reset_with_word(self, secret_word):
        """
        Starts a new game with a SPECIFIC word for evaluation.
        """
        self.secret_word = secret_word.upper()
        self.word_length = len(self.secret_word)
        self.masked_word = ["_"] * self.word_length
        self.lives_left = self.total_lives
        self.guessed_letters = set()
        self.game_over = False
        return self._get_current_state()

    def _get_current_state(self):
        """Returns the current state."""
        return {
            "masked_word": "".join(self.masked_word),
            "word_length": self.word_length,
            "lives_left": self.lives_left,
            "guessed_letters": sorted(list(self.guessed_letters)),
            "game_over": self.game_over
        }

    def step(self, action_letter):
        """Guesses a letter and returns the new state, reward, and game_over status."""
        if self.game_over:
            return self._get_current_state(), 0, True, {"error": "Game is already over."}

        action_letter = action_letter.upper()

        info = {"guess_type": ""}
        is_win = False
        is_loss = False

        # Case 1: Repeated guess
        if action_letter in self.guessed_letters:
            info["guess_type"] = "repeated"

        # Case 2: Wrong guess
        elif action_letter not in self.secret_word:
            self.lives_left -= 1
            self.guessed_letters.add(action_letter)
            info["guess_type"] = "wrong"

        # Case 3: Correct guess
        else:
            self.guessed_letters.add(action_letter)
            info["guess_type"] = "correct"
            new_masked_word = list(self.masked_word)
            for i, char in enumerate(self.secret_word):
                if char == action_letter:
                    new_masked_word[i] = action_letter
            self.masked_word = new_masked_word

        # --- Check for Game Over ---
        if "_" not in self.masked_word:
            self.game_over = True
            is_win = True
        elif self.lives_left <= 0:
            self.game_over = True
            is_loss = True

        # During evaluation, we don't need rewards, just info
        return self._get_current_state(), 0, self.game_over, info

### Re-define the "HMM Oracle"

In [19]:
import re

def get_letter_probabilities(state, word_length):
    """
    This is our "HMM Oracle."
    It calculates the probability of each un-guessed letter.
    It returns a list of (letter, probability) tuples.
    """

    masked_word = state["masked_word"]
    guessed_letters = set(state["guessed_letters"])

    model = hmm_models[word_length]
    word_list = corpus_by_length[word_length]

    pattern = ""
    for char in masked_word:
        if char == "_":
            pattern += f"[^{''.join(guessed_letters)}]"
        else:
            pattern += char

    try:
        regex = re.compile(f"^{pattern}$")
        candidate_words = [word for word in word_list if regex.match(word)]
    except:
        candidate_words = []

    letter_probs = defaultdict(float)

    if not candidate_words:
        prob_model = model['unigram']
        for char in ALPHABET:
            if char not in guessed_letters:
                letter_probs[char] = prob_model.get(char, 1e-6)
    else:
        blank_indices = [i for i, char in enumerate(masked_word) if char == "_"]
        total_blank_letters = 0

        for word in candidate_words:
            for i in blank_indices:
                letter_at_blank = word[i]
                if letter_at_blank not in guessed_letters:
                    letter_probs[letter_at_blank] += 1
                    total_blank_letters += 1

        if total_blank_letters > 0:
            for char in letter_probs:
                letter_probs[char] /= total_blank_letters

        if not letter_probs:
            prob_model = model['unigram']
            for char in ALPHABET:
                if char not in guessed_letters:
                    letter_probs[char] = prob_model.get(char, 1e-6)

    sorted_probs = sorted(letter_probs.items(), key=lambda item: item[1], reverse=True)
    return sorted_probs

### Re-define the Q-Learning Agent (for Evaluation)

In [20]:
class QLearningAgent:
    """
    This is the RL agent "brain" for EVALUATION.
    Epsilon is 0. Uses the new, smarter state.
    """

    def __init__(self, q_table):
        self.actions = [0, 1, 2]
        self.q_table = q_table
        print("Agent loaded with pre-trained Q-table. Ready for evaluation.")

    def _get_state_key(self, state, hmm_prob_info):
        """
        This is our STATE ABSTRACTION function.
        """
        lives = state["lives_left"]
        unique_letters_in_mask = len(set(c for c in state["masked_word"] if c != '_'))

        if not hmm_prob_info:
            best_prob = 0.0
        else:
            best_prob = hmm_prob_info[0][1]

        prob_bin = int(best_prob * 10)

        return f"L:{lives}_U:{unique_letters_in_mask}_P:{prob_bin}"

    def choose_action(self, state, hmm_prob_info):
        """
        Chooses the BEST action from the Q-table. No epsilon.
        """
        state_key = self._get_state_key(state, hmm_prob_info)
        q_values = self.q_table.get(state_key, {})

        if not q_values:
            return 0 # Default to best guess

        # Q-table keys are "0", "1", "2". We cast to int.
        return int(max(q_values, key=lambda k: q_values.get(k, 0.0)))

### The Final Evaluation

In [21]:
print("--- Starting Final Evaluation ---")
print(f"Loading test set and trained agent...")

all_test_words = [word for words in test_by_length.values() for word in words]
num_test_games = len(all_test_words)

eval_env = HangmanEnvironment(word_list=all_test_words, lives=LIVES_ALLOWED)
agent = QLearningAgent(q_table=q_table)

total_wins = 0
total_wrong_guesses = 0
total_repeated_guesses = 0

print(f"Running agent against {num_test_games} games from the test set...")

for i, word in enumerate(all_test_words):
    if (i + 1) % 200 == 0:
        print(f"  ... playing game {i+1}/{num_test_games}")

    state = eval_env.reset_with_word(word)
    word_length = state["word_length"]
    game_over = False

    game_wrong_guesses = 0
    game_repeated_guesses = 0

    while not game_over:
        # 1. Get HMM info
        hmm_prob_info = get_letter_probabilities(state, word_length)

        # 2. Agent chooses action
        action = agent.choose_action(state, hmm_prob_info)

        # 3. Translate action to letter
        guess = None
        suggested_letters = [letter for letter, prob in hmm_prob_info]

        if action == 2 and len(suggested_letters) >= 3:
            guess = suggested_letters[2]
        elif (action == 1 or action == 2) and len(suggested_letters) >= 2:
            guess = suggested_letters[1]
        elif len(suggested_letters) >= 1:
            guess = suggested_letters[0]
        else:
            # Failsafe: No valid letters from HMM
            available_letters = [l for l in ALPHABET if l not in state["guessed_letters"]]
            if available_letters:
                guess = random.choice(available_letters)
            else:
                break # Game is stuck

        # 4. Take the step
        next_state, _, game_over, info = eval_env.step(guess)

        # 5. Record stats
        if info["guess_type"] == "wrong":
            game_wrong_guesses += 1
        elif info["guess_type"] == "repeated":
            game_repeated_guesses += 1

        state = next_state

    # --- End of Game ---
    if "_" not in state["masked_word"]:
        total_wins += 1

    # --- DEFINITIVE ACCUMULATION LOGIC ---
    total_wrong_guesses += game_wrong_guesses
    total_repeated_guesses += game_repeated_guesses

print("...Evaluation Complete.")

# --- Calculate Final Scores ---
success_rate = total_wins / num_test_games

score_from_wins = total_wins
score_from_wrong = total_wrong_guesses * 5
score_from_repeated = total_repeated_guesses * 2
final_score = score_from_wins - score_from_wrong - score_from_repeated

# --- Print Final Report ---
print("\n\n--- FINAL EVALUATION RESULTS ---")
print("----------------------------------")
print(f"Total Games Played: {num_test_games}")
print(f"Total Wins:         {total_wins}")
print(f"Success Rate:       {success_rate * 100:.2f}%")
print(f"Avg. Wrong Guesses:   {total_wrong_guesses / num_test_games:.2f}")
print(f"Avg. Repeated Guesses: {total_repeated_guesses / num_test_games:.2f}")
print("\n--- SCORING BREAKDOWN ---")
print(f"Score from Wins (+1 per win):      + {score_from_wins}")
print(f"Penalty from Wrong (-5 per):     - {score_from_wrong}")
print(f"Penalty from Repeated (-2 per):  - {score_from_repeated}")
print("----------------------------------")
print(f"FINAL SCORE: {final_score}")
print("----------------------------------")

--- Starting Final Evaluation ---
Loading test set and trained agent...
Agent loaded with pre-trained Q-table. Ready for evaluation.
Running agent against 2000 games from the test set...
  ... playing game 200/2000
  ... playing game 400/2000
  ... playing game 600/2000
  ... playing game 800/2000
  ... playing game 1000/2000
  ... playing game 1200/2000
  ... playing game 1400/2000
  ... playing game 1600/2000
  ... playing game 1800/2000
  ... playing game 2000/2000
...Evaluation Complete.


--- FINAL EVALUATION RESULTS ---
----------------------------------
Total Games Played: 2000
Total Wins:         326
Success Rate:       16.30%
Avg. Wrong Guesses:   5.67
Avg. Repeated Guesses: 0.00

--- SCORING BREAKDOWN ---
Score from Wins (+1 per win):      + 326
Penalty from Wrong (-5 per):     - 56730
Penalty from Repeated (-2 per):  - 0
----------------------------------
FINAL SCORE: -56404
----------------------------------
