In [6]:
import json
import requests
import random
import string
import secrets
import time
import re
import collections
from sklearn.tree import DecisionTreeClassifier
import numpy as np


try:
    from urllib.parse import parse_qs, urlencode, urlparse
except ImportError:
    from urlparse import parse_qs, urlparse
    from urllib import urlencode

from requests.packages.urllib3.exceptions import InsecureRequestWarning

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)


class HangmanAPI(object):
    def __init__(self, access_token=None, session=None, timeout=None):
        self.hangman_url = self.determine_hangman_url()
        self.access_token = access_token
        self.session = session or requests.Session()
        self.timeout = timeout
        self.guessed_letters = []
        
        full_dictionary_location = "words_250000_train.txt"
        self.full_dictionary = self.build_dictionary(full_dictionary_location)        
        self.full_dictionary_common_letter_sorted = collections.Counter("".join(self.full_dictionary)).most_common()
        
        self.current_dictionary = []

        # Initialize the Decision Tree classifier
        self.decision_tree = DecisionTreeClassifier()

    @staticmethod
    def determine_hangman_url():
        links = ['https://trexsim.com', 'https://sg.trexsim.com']

        data = {link: 0 for link in links}

        for link in links:

            requests.get(link)

            for i in range(10):
                s = time.time()
                requests.get(link)
                data[link] = time.time() - s

        link = sorted(data.items(), key=lambda x: x[1])[0][0]
        link += '/trexsim/hangman'
        return link


    def preprocess_data(self, words, guessed_letters):
              
        num_words = len(words)
        num_guessed_letters = len(guessed_letters)
        num_features = 26 # 26 letters in the alphabet + num_guessed_letters

        # Create an empty numpy array to store the feature vectors
        feature_vectors = np.zeros((num_words, num_features), dtype=int)

        # Convert the words to feature vectors
        for i, word in enumerate(words):
            # Convert each character in the word to a one-hot encoded vector
            for j, letter in enumerate(word):
                if isinstance(letter, str) and 'a' <= letter <= 'z':
                    # If the letter is not a blank, set the corresponding index to 1
                    feature_vectors[i, ord(letter) - ord("a")] = 1

            # Set the guessed letters in the feature vector
            for j, letter in enumerate(guessed_letters):
                if isinstance(letter, str) and 'a' <= letter <= 'z':
                    feature_vectors[i, (26 + j)%num_guessed_letters] = ord(letter) - ord("a")

        return feature_vectors
       

    def train_decision_tree(self, words, guessed_letters):
        
        # Preprocess the data into feature vectors
        X_train = self.preprocess_data(words, guessed_letters)

        # Create labels for training the Decision Tree (target is the next letter to guess)
        y_train = [word.replace('_', '')[len(guessed_letters)+word.count('_')] for word in words]
        
        # Check if there are samples in the training data
        if X_train.shape[0] == 0:
            raise ValueError("No training samples available.")

        # Initialize the Decision Tree classifier
        decision_tree = DecisionTreeClassifier()

        # Train the Decision Tree classifier on the data
        decision_tree.fit(X_train, y_train)

        return decision_tree

    def predict(self, word, guessed_letters):
        
        # Preprocess the input word into a feature vector
        X_input = self.preprocess_data([word], [])

        # Use the trained Decision Tree classifier to make a prediction
        predicted_word = self.decision_tree.predict(X_input)[0]

        # Extract the next letter to guess from the predicted word
        guess_letter = predicted_word[len(guessed_letters)]

        return guess_letter




    def guess(self, word): # word input example: "_ p p _ e "
        # Clean the word so that we strip away the space characters
        # Replace "_" with "." as "." indicates any character in regular expressions
        clean_word = word[::2].replace("_", ".")

        # Find length of the passed word
        len_word = len(clean_word)

        # Grab the current dictionary of possible words from self object, initialize a new possible words dictionary to empty
        current_dictionary = self.current_dictionary
        new_dictionary = []

        # Iterate through all of the words in the old plausible dictionary
        for dict_word in current_dictionary:
            # Continue if the word is not of the appropriate length
            if len(dict_word) != len_word:
                continue

            # If dictionary word is a possible match then add it to the current dictionary
            if re.match(clean_word, dict_word):
                new_dictionary.append(dict_word)

        # Overwrite the old possible words dictionary with the updated version
        self.current_dictionary = new_dictionary

        # Train the Decision Tree on the current dictionary and guessed letters
        self.decision_tree = self.train_decision_tree(new_dictionary, self.guessed_letters)

        # Convert the input word to a feature vector
        input_word_feature = self.preprocess_data([word], [self.guessed_letters])
        
        

        # Use the Decision Tree to predict the most likely letter to guess
        predicted_word = self.decision_tree.predict(input_word_feature)[0]
        next_letter_index = len(self.guessed_letters)
        
        letter=predicted_word
        if letter not in self.guessed_letters:
            guess_letter = letter
        else:
            guess_letter='_'
            
            
        return guess_letter
    
    def build_dictionary(self, dictionary_file_location):
        text_file = open(dictionary_file_location,"r")
        full_dictionary = text_file.read().splitlines()
        text_file.close()
        return full_dictionary
                
    def start_game(self, practice=False, verbose=True):
        # reset guessed letters to an empty set and the current plausible dictionary to the full dictionary
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary

        response = self.request("/new_game", {"practice": practice})
        if response.get('status') == "approved":
            game_id = response.get('game_id')
            word = response.get('word')
            tries_remains = response.get('tries_remains')
            if verbose:
                print("Successfully start a new game! Game ID: {0}. # of tries remaining: {1}. Word: {2}.".format(game_id, tries_remains, word))
        
            while tries_remains > 0:
                try:
                    # get guessed letter from user code
                    guess_letter = self.guess(word)
                except ValueError as ve:
                    print("ValueError:", ve)
                    print("Exiting game due to error. Continuing to the next game.")
                    break

            
                # append guessed letter to guessed letters field in the hangman object
                self.guessed_letters.append(guess_letter)
                if verbose:
                    print("Guessing letter: {0}".format(guess_letter))
                    
                
            
                try:    
                    res = self.request("/guess_letter", {"request": "guess_letter", "game_id": game_id, "letter": guess_letter})
                except HangmanAPIError:
                    print('HangmanAPIError exception caught on request.')
                    continue
                except Exception as e:
                    print('Other exception caught on request.')
                    raise e

                if verbose:
                    print("Server response: {0}".format(res))
                status = res.get('status')
                tries_remains = res.get('tries_remains')
            
                if status == "failed":
                    reason = res.get('reason', '# of tries exceeded!')
                    if verbose:
                        print("Failed game: {0}. Because of: {1}".format(game_id, reason))
                    return False
                elif status == "ongoing":
                    word = res.get('word')
            else:
                if verbose:
                    print("Failed to start a new game")
            return status=="success"

        
    def my_status(self):
        return self.request("/my_status", {})
    
    def request(
            self, path, args=None, post_args=None, method=None):
        if args is None:
            args = dict()
        if post_args is not None:
            method = "POST"

        # Add `access_token` to post_args or args if it has not already been
        # included.
        if self.access_token:
            # If post_args exists, we assume that args either does not exists
            # or it does not need `access_token`.
            if post_args and "access_token" not in post_args:
                post_args["access_token"] = self.access_token
            elif "access_token" not in args:
                args["access_token"] = self.access_token

        time.sleep(0.2)

        num_retry, time_sleep = 50, 2
        for it in range(num_retry):
            try:
                response = self.session.request(
                    method or "GET",
                    self.hangman_url + path,
                    timeout=self.timeout,
                    params=args,
                    data=post_args,
                    verify=False
                )
                break
            except requests.HTTPError as e:
                response = json.loads(e.read())
                raise HangmanAPIError(response)
            except requests.exceptions.SSLError as e:
                if it + 1 == num_retry:
                    raise
                time.sleep(time_sleep)

        headers = response.headers
        if 'json' in headers['content-type']:
            result = response.json()
        elif "access_token" in parse_qs(response.text):
            query_str = parse_qs(response.text)
            if "access_token" in query_str:
                result = {"access_token": query_str["access_token"][0]}
                if "expires" in query_str:
                    result["expires"] = query_str["expires"][0]
            else:
                raise HangmanAPIError(response.json())
        else:
            raise HangmanAPIError('Maintype was not text, or querystring')

        if result and isinstance(result, dict) and result.get("error"):
            raise HangmanAPIError(result)
        return result
    
    
class HangmanAPIError(Exception):
    def __init__(self, result):
        self.result = result
        self.code = None
        try:
            self.type = result["error_code"]
        except (KeyError, TypeError):
            self.type = ""

        try:
            self.message = result["error_description"]
        except (KeyError, TypeError):
            try:
                self.message = result["error"]["message"]
                self.code = result["error"].get("code")
                if not self.type:
                    self.type = result["error"].get("type", "")
            except (KeyError, TypeError):
                try:
                    self.message = result["error_msg"]
                except (KeyError, TypeError):
                    self.message = result

        Exception.__init__(self, self.message)
        

# Create an instance of the HangmanAPI class with your access token
api = HangmanAPI(access_token="12534b1f466de51f28084866ccea14", timeout=2000)


# Determine the batch size
batch_size = 50  # Set your desired batch size

# Split the words into batches for training
words=api.full_dictionary
num_batches = len(words) // batch_size
batches = [words[i * batch_size : (i + 1) * batch_size] for i in range(num_batches)]

# Train the Decision Tree classifier in batches
for batch in batches:
    # Preprocess words to create feature vectors
    X_train = api.preprocess_data(batch, [])  # Feature vectors for training set
    y_train = [word.replace('_', '') for word in batch]  # Labels for training set
    # Train the Decision Tree classifier on the current batch
    api.decision_tree.fit(X_train, y_train)
    


# Split your dataset into training and validation sets
from sklearn.model_selection import train_test_split
X_train_combined = api.preprocess_data(words, [])  # Preprocess the whole dataset for splitting
y_train_combined = [word.replace('_', '') for word in words]  # Labels for the whole dataset
X_train, X_val, y_train, y_val = train_test_split(X_train_combined, y_train_combined, test_size=0.2, random_state=42)

# Make predictions on the validation set
y_pred = api.decision_tree.predict(X_val)

# # Start the game and test the HangmanAPI with the decision tree-based guess function
# for i in range(1):
#     api.start_game(practice=True, verbose=True)

# Run 1000 games (uncomment the following lines if you are ready to execute the games)
for i in range(1000):
    print('Playing', i, 'th game')
    try:
        api.start_game(practice=0, verbose=False)
        time.sleep(0.5)
    except (IndexError):
        continue

# Get game statistics
[total_practice_runs, total_recorded_runs, total_recorded_successes, total_practice_successes] = api.my_status()
practice_success_rate = total_practice_successes / total_practice_runs
print('run %d practice games out of an allotted 100,000. practice success rate so far = %.3f' % (total_practice_runs, practice_success_rate))

success_rate = total_recorded_successes / total_recorded_runs
print('overall success rate = %.3f' % success_rate)

KeyboardInterrupt: 