# Notebook 3: The Reinforcement Learning (RL) Environment

### Mount Drive & Define Paths

In [5]:
import os
import json
import string
import random
import numpy as np
import pprint
from google.colab import drive

# --- Mount Drive ---
drive.mount('/content/drive')

# --- Configuration ---
BASE_PATH = '/content/drive/My Drive/ml-hackathon'
DATA_PATH = os.path.join(BASE_PATH, 'data')
MODEL_PATH = os.path.join(BASE_PATH, 'models')

# --- Input Files (from Notebook 1 & 2) ---
CORPUS_JSON_PATH = os.path.join(DATA_PATH, 'corpus_by_length.json')
TEST_JSON_PATH = os.path.join(DATA_PATH, 'test_by_length.json')
HMM_MODEL_PATH = os.path.join(MODEL_PATH, 'hmm_probabilities.json')

# --- Output File (for our RL Agent) ---
Q_TABLE_PATH = os.path.join(MODEL_PATH, 'q_table.json')

print(f"Base path set to: {BASE_PATH}")
print(f"All required input files will be loaded from:")
print(f" - {CORPUS_JSON_PATH}")
print(f" - {TEST_JSON_PATH}")
print(f" - {HMM_MODEL_PATH}")
print(f"The trained Q-table will be saved to: {Q_TABLE_PATH}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Base path set to: /content/drive/My Drive/ml-hackathon
All required input files will be loaded from:
 - /content/drive/My Drive/ml-hackathon/data/corpus_by_length.json
 - /content/drive/My Drive/ml-hackathon/data/test_by_length.json
 - /content/drive/My Drive/ml-hackathon/models/hmm_probabilities.json
The trained Q-table will be saved to: /content/drive/My Drive/ml-hackathon/models/q_table.json


### Load All Preprocessed Data & Models

In [6]:
def load_json_file(file_path):
    """Loads a JSON file from the given path."""
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
        print(f"Successfully loaded: {file_path}")
        return data
    except FileNotFoundError:
        print(f"ERROR: File not found at {file_path}")
        print("Please make sure your previous notebooks ran successfully.")
    except Exception as e:
        print(f"An error occurred loading {file_path}: {e}")
    return None

# --- Load Data ---
corpus_by_length = load_json_file(CORPUS_JSON_PATH)
test_by_length = load_json_file(TEST_JSON_PATH)
hmm_models = load_json_file(HMM_MODEL_PATH)

# --- Data Correction ---
# JSON saves all keys as strings. We must convert them back to integers.
if corpus_by_length:
    corpus_by_length = {int(k): v for k, v in corpus_by_length.items()}
if test_by_length:
    test_by_length = {int(k): v for k, v in test_by_length.items()}
if hmm_models:
    hmm_models = {int(k): v for k, v in hmm_models.items()}

if corpus_by_length and test_by_length and hmm_models:
    print("\nAll data and models loaded successfully.")
    print(f"Loaded {len(corpus_by_length)} corpus word groups.")
    print(f"Loaded {len(test_by_length)} test word groups.")
    print(f"Loaded {len(hmm_models)} HMM models.")
else:
    print("\nError: Failed to load one or more essential files. Please check paths and previous notebooks.")

# --- Constants ---
ALPHABET = list(string.ascii_uppercase)
LIVES_ALLOWED = 6 # As specified in the problem statement

Successfully loaded: /content/drive/My Drive/ml-hackathon/data/corpus_by_length.json
Successfully loaded: /content/drive/My Drive/ml-hackathon/data/test_by_length.json
Successfully loaded: /content/drive/My Drive/ml-hackathon/models/hmm_probabilities.json

All data and models loaded successfully.
Loaded 24 corpus word groups.
Loaded 21 test word groups.
Loaded 24 HMM models.


### The Hangman Environment Class

In [7]:
class HangmanEnvironment:
    """
    This class implements the Hangman game environment as required
    by the problem statement.
    """

    def __init__(self, word_list, lives=6):
        """
        Initializes the environment.
        :param word_list: A list of words to choose from for the game.
        :param lives: Number of wrong guesses allowed.
        """
        self.word_list = word_list
        self.total_lives = lives

        # Game state variables
        self.secret_word = ""
        self.masked_word = []
        self.lives_left = 0
        self.guessed_letters = set()
        self.game_over = True

    def reset(self):
        """
        Starts a new game of Hangman.
        - Picks a new secret word.
        - Resets all state variables.
        - Returns the initial state.
        """
        # 1. Pick a new secret word
        self.secret_word = random.choice(self.word_list).upper()
        self.word_length = len(self.secret_word)

        # 2. Reset state variables
        self.masked_word = ["_"] * self.word_length
        self.lives_left = self.total_lives
        self.guessed_letters = set()
        self.game_over = False

        # Return the initial state for the agent
        return self._get_current_state()

    def _get_current_state(self):
        """
        Helper function to package the current state for the RL agent.
        """
        return {
            "masked_word": "".join(self.masked_word),
            "word_length": self.word_length,
            "lives_left": self.lives_left,
            "guessed_letters": sorted(list(self.guessed_letters)),
            "game_over": self.game_over
        }

    def step(self, action_letter):
        """
        Performs one "step" in the game by guessing a letter.
        :param action_letter: The uppercase letter to guess (e.g., 'A').
        :return: (next_state, reward, game_over, info)
        """
        if self.game_over:
            # Should not happen if agent is coded correctly
            return self._get_current_state(), 0, True, {"error": "Game is already over."}

        action_letter = action_letter.upper()

        # --- Define Rewards ---
        # These will be used by our RL agent's reward function
        reward = 0
        info = {"guess_type": ""}

        # Case 1: Repeated guess
        if action_letter in self.guessed_letters:
            reward = -2  # Penalize repeated guesses
            info["guess_type"] = "repeated"
            self.game_over = False # Game continues

        # Case 2: Wrong guess
        elif action_letter not in self.secret_word:
            self.lives_left -= 1
            reward = -5  # Penalize wrong guesses
            self.guessed_letters.add(action_letter)
            info["guess_type"] = "wrong"

        # Case 3: Correct guess
        else:
            reward = 1  # Small positive reward for a correct guess
            self.guessed_letters.add(action_letter)
            info["guess_type"] = "correct"

            # Update the masked word
            new_masked_word = list(self.masked_word)
            for i, char in enumerate(self.secret_word):
                if char == action_letter:
                    new_masked_word[i] = action_letter
            self.masked_word = new_masked_word

        # --- Check for Game Over ---

        # Check for Win: No more underscores in masked_word
        if "_" not in self.masked_word:
            self.game_over = True
            reward = 20  # Large bonus for winning
            info["game_result"] = "win"

        # Check for Loss: Ran out of lives
        elif self.lives_left <= 0:
            self.game_over = True
            reward = -20 # Large penalty for losing
            info["game_result"] = "loss"

        return self._get_current_state(), reward, self.game_over, info

### Test the Environment

In [8]:
print("--- Testing Hangman Environment ---")

# We'll test using only the 7-letter words from the corpus
word_list_7 = corpus_by_length[7]
print(f"Loaded {len(word_list_7)} 7-letter words for testing.")

# 1. Create the environment
env = HangmanEnvironment(word_list=word_list_7, lives=LIVES_ALLOWED)

# 2. Start a new game
state = env.reset()
print(f"New Game Started. Secret Word: {env.secret_word}") # We cheat to see the word
pprint.pprint(state)

# 3. Simulate a game with dummy guesses
dummy_guesses = ['E', 'T', 'A', 'O', 'I', 'S', 'E', 'X', 'Y', 'Z', 'W', 'Q']
game_reward = 0

for guess in dummy_guesses:
    if not state["game_over"]:
        print(f"\n--- Guessing: '{guess}' ---")
        state, reward, game_over, info = env.step(guess)

        print(f"Result: {info['guess_type']}")
        print(f"Reward: {reward}")
        pprint.pprint(state)
        game_reward += reward

print("\n--- Final Game Status ---")
print(f"Secret Word was: {env.secret_word}")
print(f"Final Mask: {state['masked_word']}")
print(f"Total Game Reward: {game_reward}")
print("\nEnvironment test complete.")

--- Testing Hangman Environment ---
Loaded 5111 7-letter words for testing.
New Game Started. Secret Word: UPHEAVE
{'game_over': False,
 'guessed_letters': [],
 'lives_left': 6,
 'masked_word': '_______',
 'word_length': 7}

--- Guessing: 'E' ---
Result: correct
Reward: 1
{'game_over': False,
 'guessed_letters': ['E'],
 'lives_left': 6,
 'masked_word': '___E__E',
 'word_length': 7}

--- Guessing: 'T' ---
Result: wrong
Reward: -5
{'game_over': False,
 'guessed_letters': ['E', 'T'],
 'lives_left': 5,
 'masked_word': '___E__E',
 'word_length': 7}

--- Guessing: 'A' ---
Result: correct
Reward: 1
{'game_over': False,
 'guessed_letters': ['A', 'E', 'T'],
 'lives_left': 5,
 'masked_word': '___EA_E',
 'word_length': 7}

--- Guessing: 'O' ---
Result: wrong
Reward: -5
{'game_over': False,
 'guessed_letters': ['A', 'E', 'O', 'T'],
 'lives_left': 4,
 'masked_word': '___EA_E',
 'word_length': 7}

--- Guessing: 'I' ---
Result: wrong
Reward: -5
{'game_over': False,
 'guessed_letters': ['A', 'E', 'I',