## Projektarbeit Mühle

### Imports

In [1]:
import PySimpleGUI as psGui
import matplotlib.pyplot as plt
import numpy as np
import time
import copy
from tensorflow import keras
import tensorflow as tf
import datetime as dt
import random
import sklearn.preprocessing as pre

### Class definitions

#### Environment

In [23]:
class MillEnv(object):
    def __init__(self):
        self.isPlaying: int = 1
        self.gamePhase: list = [0,0]
        self.moveNeeded: int = 0
        self.inHand: list = [9,9]
        self.onBoard: list = [0,0]
        self.checkerPositions: list = [[],[]]
        self.selected: int = -1
        self.board: np.ndarray = np.zeros(24)
        self.winner = 0
        self.columns: np.ndarray = np.array([[0,1,2],
                        [3,4,5],
                        [6,7,8],
                        [9,10,11],
                        [12,13,14],
                        [15,16,17],
                        [18,19,20],
                        [21,22,23],
                        [0,9,21],
                        [3,10,18],
                        [6,11,15],
                        [1,4,7],
                        [16,19,22],
                        [8,12,17],
                        [5,13,20],
                        [2,14,23],
                        ])
    def makeMove(self, move: int) -> bool:
        valid: bool = False
        last_state: tuple = self.getSummary(self.isPlaying)
        last_player: int = self.isPlaying
        if self.moveNeeded == 0: # Set Checker on position
            if self.board[move] == 0:
                self.board[move] = self.isPlaying
                self.checkerPositions[1 if self.isPlaying == 1 else 0].append(move)
                valid = True
                self.isPlaying = -self.isPlaying
                if self.gamePhase[1 if self.isPlaying == 1 else 0] == 2:
                    self.board[self.selected] = 0
                    self.moveNeeded = 1
                    self.checkerPositions[1 if self.isPlaying == 1 else 0].remove(self.selected)
                else:
                    self.inHand[1 if self.isPlaying == 1 else 0] -= 1
                    self.onBoard[1 if self.isPlaying == 1 else 0] += 1
                if np.array(self.inHand).sum() == 0:
                    self.gamePhase = [1,1]
                    self.moveNeeded = 1
        elif self.moveNeeded == 1: # choose checker to move
            if self.board[move] == 1 * self.isPlaying and ~self.getMoveFields(move).all():
                valid = True
                self.selected = move
                self.moveNeeded = 2
        elif self.moveNeeded == 2: # move checker up, down, left or right
            if self.getMoveFields(self.selected)[move] == 0:
                idxToMove: np.ndarray = np.where(self.getInRows(self.selected) == self.selected)
                valid = True
                self.board[self.selected] = 0
                self.checkerPositions[1 if self.isPlaying == 1 else 0].remove(self.selected)
                if move == 0: # up
                    self.board[self.getInRows(self.selected)[1][idxToMove[0][0]-1]] = self.isPlaying
                    self.checkerPositions[1 if self.isPlaying == 1 else 0].append(self.getInRows(self.selected)[1][idxToMove[0][0]-1])
                if move == 1: # right
                    self.board[self.getInRows(self.selected)[0][idxToMove[1][0]+1]] = self.isPlaying
                    self.checkerPositions[1 if self.isPlaying == 1 else 0].append(self.getInRows(self.selected)[0][idxToMove[1][0]+1])
                if move == 2: # down
                    self.board[self.getInRows(self.selected)[1][idxToMove[0][0]+1]] = self.isPlaying
                    self.checkerPositions[1 if self.isPlaying == 1 else 0].append(self.getInRows(self.selected)[1][idxToMove[0][0]+1])
                if move == 3: # left
                    self.board[self.getInRows(self.selected)[0][idxToMove[1][0]-1]] = self.isPlaying
                    self.checkerPositions[1 if self.isPlaying == 1 else 0].append(self.getInRows(self.selected)[0][idxToMove[1][0]-1])
                self.moveNeeded = 1
        elif self.moveNeeded == 3: # delete opponent checker
            canDelete: bool = False
            for pos in self.checkerPositions[1 if self.isPlaying == -1 else 0]:
                if ~(abs(self.board[self.getInRows(move)].sum(axis=1)) == 3).any():
                    canDelete = True
                    valid = True
                    if self.gamePhase[1 if self.isPlaying == -1 else 0] == 0:
                        self.moveNeeded = 0
                    elif self.gamePhase[1 if self.isPlaying == -1 else 0] == 1:
                        self.moveNeeded = 1
                    break
            threeInChosenRow: np.ndarray = abs(self.board[self.getInRows(move)].sum(axis=1)) == 3
            if self.board[move] == -1 * self.isPlaying and ~threeInChosenRow.any() and canDelete:
                valid = True
                self.board[move] = 0
                self.checkerPositions[1 if self.isPlaying == -1 else 0].remove(move)
                self.isPlaying = -self.isPlaying
                self.onBoard[1 if self.isPlaying == -1 else 0] -= 1
                if self.gamePhase[1 if self.isPlaying == -1 else 0] == 0:
                    self.moveNeeded = 0
                elif self.gamePhase[1 if self.isPlaying == -1 else 0] == 1:
                    if self.onBoard[1 if self.isPlaying == -1 else 0] == 3:
                        self.gamePhase[1 if self.isPlaying == -1 else 0] = 2
                    self.moveNeeded = 1
                elif self.gamePhase[1 if self.isPlaying == -1 else 0] == 2:
                    self.gamePhase = 3
                    self.winner = last_player
        if last_state[0] < self.getSummary(last_player)[0]:
            self.isPlaying = last_player
            self.moveNeeded = 3
        if self.gamePhase[1 if self.isPlaying == -1 else 0] == 1:
            finished = True
            for pos in self.checkerPositions[1 if self.isPlaying == -1 else 0]:
                if self.getMoveFields(pos).any():
                    finished = False
                    break
            if finished:
                self.winner = last_player
                self.gamePhase = 3
        return valid
    def isFinished(self):
        return self.winner
    def getBoard(self) -> np.ndarray:
        return copy.deepcopy(self.board)
    def getInRows(self, pos: int) -> np.ndarray:
        arrayPos: np.ndarray = self.columns == pos
        return self.columns[arrayPos.any(axis=1)]
    def reset(self):
        self.board = np.zeros(24)
        self.isPlaying = 1
    def getSummary(self, player: int) -> (int, int, int, int):
        numTwoPlayerActual: int = 0
        numTwoPlayerOpponent: int = 0
        numThreePlayerActual: int = 0
        numThreePlayerOpponent: int = 0
        for column in self.columns:
            columnSum: int = self.board[column[0]] + self.board[column[1]] + self.board[column[2]]
            if columnSum == -2 * player:
                numTwoPlayerOpponent += 1
            elif columnSum  == 2 * player:
                numTwoPlayerActual += 1
            elif columnSum == -3 * player:
                numTwoPlayerOpponent += 1
            elif columnSum == 3 * player:
                numThreePlayerActual += 1
        return numThreePlayerActual, numThreePlayerOpponent, numTwoPlayerActual, numTwoPlayerOpponent
    def getMoveFields(self, pos: int) -> np.ndarray:
        moveFields: np.ndarray = np.zeros(4)
        chosenRows: np.ndarray = self.getInRows(pos)
        idx: np.ndarray = np.where(chosenRows == pos)
        if idx[1][0] != 1:
            if idx[1][0] == 0:
                moveFields[3] = 2
                moveFields[1] = self.board[chosenRows[0][1]]
            elif idx[1][0] == 2:
                moveFields[1] = 2
                moveFields[3] = self.board[chosenRows[0][1]]
        else:
            moveFields[3] = self.board[chosenRows[0][0]]
            moveFields[1] = self.board[chosenRows[0][1]]
        if idx[0][0] != 1:
            if idx[0][0] == 0:
                moveFields[0] = 2
                moveFields[2] = self.board[chosenRows[1][1]]
            elif idx[0][0] == 2:
                moveFields[2] = 2
                moveFields[0] = self.board[chosenRows[1][1]]
        else:
            moveFields[0] = self.board[chosenRows[1][0]]
            moveFields[2] = self.board[chosenRows[1][1]]
        return moveFields


#### Agent

In [3]:
class Agent(object):
    def getPos(self, state: np.ndarray, temp: float, moveNeeded: int,network:keras.models.Model=None) -> np.ndarray:
        if network is None:
            if moveNeeded == 2:
                return np.random.randint(0,4)
            else:
                return np.random.randint(0,24)
        softmaxed_output = keras.backend.softmax(network(state.reshape(1,-1))/ temp)
        action_value = np.random.choice(np.array(softmaxed_output[0]), p= np.array(softmaxed_output[0]))
        pos: np.ndarray = np.argmax(softmaxed_output[0] == action_value)
        return pos

### Tests

#### Environment

In [24]:
env = MillEnv()
print(env.getInRows(2))

env.makeMove(0)
env.makeMove(3)
env.makeMove(2)
env.makeMove(4)
env.makeMove(1)
print(env.getBoard())
print(env.moveNeeded)
ThreeInChosenRow = abs(env.board[env.getInRows(0)].sum(axis=1)) == 3
print(~ThreeInChosenRow.any())
print(np.where(env.getInRows(1) == 1))
print(env.getMoveFields(2))
print(env.getSummary(1))

[[ 0  1  2]
 [ 2 14 23]]
[ 1.  1.  1. -1. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.]
3
False
(array([0, 1]), array([1, 0]))
[2. 2. 0. 1.]
(1, 0, 0, 1)


#### Agent

In [28]:
env = MillEnv()
ag = Agent()
env.reset()
while env.isFinished() == 0:
    valid = env.makeMove(ag.getPos(env.getBoard(), 0, env.moveNeeded))
    if env.moveNeeded == 3 and valid:
        print(env.getBoard())
print(f"Gewonnen hat {env.isFinished()}")

[ 0.  0.  1.  0. -1.  1.  1.  0.  1. -1. -1. -1.  0.  0.  1.  1.  0.  0.
 -1.  0. -1.  0.  0.  0.]
[ 0.  1.  1.  0. -1.  1.  0.  0.  1. -1. -1. -1.  0. -1.  1.  1.  0.  1.
 -1. -1. -1.  0.  0.  0.]
[ 1.  1.  1.  0. -1.  1.  0.  0.  1. -1. -1. -1.  0. -1.  1.  1.  0.  0.
 -1. -1. -1.  0.  0.  0.]


KeyboardInterrupt: 