In [5]:
from mastermind import Mastermind
from DeepQNetwork import DQNAgent
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from keras.optimizers import RMSprop, Adam, Adamax, Adadelta, Adagrad, Nadam, Ftrl
from tensorflow import keras 
from tensorflow.keras.layers import Dense
import pygame
from collections import deque
import random
import numpy as np

In [2]:
code_to_decode = "RBGY"

clock = pygame.time.Clock()

mastermind = Mastermind(code_to_decode)
mastermind.reset()

state_size = mastermind.get_number_of_states()
action_size = len(mastermind.get_possible_actions(None))
states = mastermind.get_all_states()
learning_rate = 0.001

model = tf.keras.Sequential()
model.add(Dense(64, input_dim=24))
model.add(Dense(32, activation='relu'))
model.add(Dense(action_size, activation='linear'))

opt = RMSprop(lr=0.0002, decay=6e-8)
model.compile(loss='mse', optimizer=opt, metrics=['accuracy'])
model.summary()

agent = DQNAgent(action_size, learning_rate, model)

agent.epsilon = 0.75

done = False
batch_size = 24
EPISODES = 5
counter = 0
for e in range(EPISODES):

    summary = []
    for _ in range(100):
        total_reward = 0
        env_state = mastermind.reset()

        # prepare appropriate format of the state for network
        state = to_categorical(env_state, num_classes=state_size).reshape(1, 24)

        for time in range(1000):
            action = agent.get_action(state)
            next_state_env, reward, done, _ = mastermind.step(states[action])
            total_reward += reward

            # prepare appropriate format of the next state for network
            next_state = to_categorical(next_state_env, num_classes=state_size).reshape(1, 24)

            # add to experience memory
            agent.remember(state.flatten(), action, reward, next_state.flatten(), done)
            state = next_state
            if done:
                break

        # train network if in the memory is more samples than size of the batch
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)

        summary.append(total_reward)
    print("epoch #{}\tmean reward = {:.3f}\tepsilon = {:.3f}".format(e, np.mean(summary), agent.epsilon))
    agent.update_epsilon_value()
    if np.mean(summary) > 100:
        print("You Win!")
        break

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                1600      
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 24)                792       
Total params: 4,472
Trainable params: 4,472
Non-trainable params: 0
_________________________________________________________________
epoch #0	mean reward = -350.100	epsilon = 0.750
epoch #1	mean reward = 437.770	epsilon = 0.712
You Win!


In [3]:
mastermind = Mastermind(code_to_decode)
done = False
state = mastermind.reset()


In [4]:
while not done:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            done = True

    # prepare appropriate format of the state for network
    state = to_categorical(state, num_classes=24).reshape(1, 24)
    action = agent.get_action(state)
    state, reward, done, score = mastermind.step(states[action])
    print(score)
    clock.tick(1)

-21
-13
24
30
1030
