In [86]:
## From https://github.com/egeromin/mastermind/

import config
import numpy as np
import itertools
import random
from collections import Counter


class Episode:
    '''generate episode for Agent to learn using Q-learning algorithm'''
    

    def __init__(self, secret):
        if isinstance(secret, int):
            secret = self._number_from_index(secret)
        self.secret = secret

    @staticmethod
    def _index_from_number(number):
        """
        Convert a 4-digit guess to an index between 0 and 6**4 
        """
        assert(len(number) <= 4)
        assert(set(number) <= set(map(str, range(6))))
        return int(number, base=6)

    @staticmethod
    def _number_from_index(index):
        assert(0 <= index < config.max_guesses)
        digits = []
        while index > 0:
            digits.append(str(index % 6))
            index = index // 6
        return "".join(reversed(digits)).zfill(4)
    
    @staticmethod
    def score(p, q):
        hits = sum(p_i == q_i for p_i, q_i in zip(p, q))
        misses = sum((Counter(p) & Counter(q)).values()) - hits
        return hits, misses
        
    

#     @staticmethod
#     def _select_next_action(action_distribution):
#         """Sample the next action based on the action distribution"""
#         action_dist = action_distribution.detach().numpy().reshape(-1)
#         # `action_distribution` is a tensor
#         return np.random.choice(action_dist.shape[0],
#                                 p=action_dist)
    
    def generate_best_patten(self):
        l = []
        possibles = list(itertools.product('012345', repeat=4))
        possibles = [''.join(lst) for lst in possibles]
        
        while possibles:
            guess, = random.sample(possibles,1)
            result = score(self.secret, guess)
            l.append((guess, result))
            possibles = {p for p in possibles if score(p, guess) == result} - {guess}
        return l
    
    
    def generate_random_episodes(self):
        
        lst = []
        
        for idx in range(config.max_episode_length):
            guess = random.randint(0, 6**4 - 1)
            guess = self._number_from_index(guess)
            lst.append((guess, self.score(self.secret, guess)))
            if guess == self.secret:
                return lst
                       
        return lst
         
    

Testing Episode class

In [90]:
for idx in np.random.randint(0, 6**4, 50):
    code = Episode._number_from_index(idx)
    print(code)
    print(len(Episode(code).generate_best_patten()))
    print('-'*30)

0521
4
------------------------------
1203
4
------------------------------
0532
5
------------------------------
0254
4
------------------------------
1245
4
------------------------------
2245
6
------------------------------
2130
6
------------------------------
2232
5
------------------------------
3532
3
------------------------------
3031
5
------------------------------
4415
6
------------------------------
2331
5
------------------------------
1251
2
------------------------------
5001
4
------------------------------
4004
4
------------------------------
4145
4
------------------------------
5215
6
------------------------------
2213
3
------------------------------
4405
6
------------------------------
0333
4
------------------------------
1550
5
------------------------------
5251
5
------------------------------
0142
5
------------------------------
0330
4
------------------------------
0433
5
------------------------------
1040
4
------------------------------
4323
4
-----

In [91]:
for idx in np.random.randint(0, 6**4, 25):
    code = Episode._number_from_index(idx)
    print(code)
    print(len(Episode(code).generate_random_episodes()))
    print('-'*30)

4424
30
------------------------------
1213
30
------------------------------
4354
30
------------------------------
3531
30
------------------------------
3051
30
------------------------------
1301
30
------------------------------
2441
30
------------------------------
2450
30
------------------------------
3114
30
------------------------------
2114
30
------------------------------
2514
30
------------------------------
3551
30
------------------------------
2051
30
------------------------------
1213
30
------------------------------
4524
30
------------------------------
4024
30
------------------------------
1255
30
------------------------------
4521
30
------------------------------
5245
30
------------------------------
4321
30
------------------------------
2010
30
------------------------------
0353
30
------------------------------
0454
30
------------------------------
3513
30
------------------------------
4502
30
------------------------------
