In [7]:
import random

class HangmanAgent:
    def __init__(self, word_list):
        self.word_list = word_list
        self.q_table = {}
        self.alpha = 0.1
        self.gamma = 0.9
        self.epsilon = 0.1

    def train(self, episodes=10):
        for _ in range(episodes):
            word = random.choice(self.word_list)
            state = '_' * len(word)
            attempts_left = 6
            guessed_letters = []

            while attempts_left > 0:
                if random.uniform(0, 1) < self.epsilon:
                    action = random.choice([chr(i) for i in range(97, 123)])  # random action
                else:
                    action = self.choose_action(state)  # choose action based on Q-values

                if action in guessed_letters:
                    continue

                guessed_letters.append(action)

                if action in word:
                    new_state = ''.join([c if c == action or c in guessed_letters else '_' for c in word])
                    reward = 1 if new_state == word else 0
                else:
                    new_state = state
                    reward = -1

                self.update_q_table(state, action, reward, new_state)
                state = new_state

                if state == word:
                    break

                attempts_left -= 1
    def choose_action(self, state):
        if state not in self.q_table or not self.q_table[state]:
            return random.choice([chr(i) for i in range(97, 123)])  # choose random action if state not in Q-table or Q-values are empty

        return max(self.q_table[state], key=self.q_table[state].get)

    def update_q_table(self, state, action, reward, new_state):
        if state not in self.q_table:
            self.q_table[state] = {}

        if new_state not in self.q_table:
            self.q_table[new_state] = {}

        if action not in self.q_table[state]:
            self.q_table[state][action] = 0

        max_future_q = max(self.q_table[new_state].values()) if self.q_table[new_state] else 0
        current_q = self.q_table[state][action]
        new_q = (1 - self.alpha) * current_q + self.alpha * (reward + self.gamma * max_future_q)
        self.q_table[state][action] = new_q

    def play(self, word):
        state = '_' * len(word)
        guessed_letters = []

        while True:
            action = self.choose_action(state)

            if action in guessed_letters:
                continue

            guessed_letters.append(action)

            if action in word:
                state = ''.join([c if c == action or c in guessed_letters else '_' for c in word])

            print("Word:", state)

            if state == word:
                print("Congratulations! You guessed the word.")
                break


word_list = ['apple', 'banana', 'orange', 'grape', 'strawberry', 'melon', 'peach', 'mango']
agent = HangmanAgent(word_list)
agent.train()

word_to_guess = random.choice(word_list)
print("Word to guess:", word_to_guess)
agent.play(word_to_guess)


Word to guess: melon
Word: ___o_
Word: __lo_
Word: __lo_
Word: __lo_
Word: __lo_
Word: __lo_
Word: __lo_
Word: __lo_
Word: __lo_
Word: __lo_
Word: __lo_
Word: m_lo_
Word: m_lo_
Word: m_lo_
Word: m_lo_
Word: m_lo_
Word: m_lo_
Word: m_lo_
Word: melo_
Word: melo_
Word: melo_
Word: melo_
Word: melo_
Word: melon
Congratulations! You guessed the word.
