In [2]:
from Board import Board

In [39]:
np.random.random()

0.9022325403738445

In [3]:
b = Board()
print(b)

    0 1 2 3 4 5 6 7

0  |▓| |▓| |▓| |▓| |
1  | |▓| |▓| |▓| |▓|
2  |▓| |▓| |▓| |▓| |
3  | | | | | | | | |
4  | | | | | | | | |
5  | |░| |░| |░| |░|
6  |░| |░| |░| |░| |
7  | |░| |░| |░| |░|



In [11]:
for i in b.player1.pieces.values():
    print(i.pos)

(0, 0)
(1, 3)
(2, 6)
(0, 2)
(2, 2)
(0, 6)
(1, 5)
(2, 0)
(2, 4)
(0, 4)
(1, 1)
(1, 7)


In [1]:
import numpy as np
from Board import Board

class agent:
    """
    Learning class used to implement the components for reinforcement learning
    """
    # set the reward for each state
    rwin = 0.0 # win reward
    rloss = -1000.0
    rtie = -50.0 # tie reward
    rside = -1.0 # reward for each piece on the side of the board
    rmiddle = -2.0 # reward for each piece in the middles
    def __init__(self, expRate = 0, discount = 1):
        """
        Initialize a Board object with the follwoing properties
        states : all the past boards and the actions that led to them
        expRate : between 0 and 1, probability of taking a random move
        dc : the discount factor used to dampen the agent's rewards
        current : the current state
        creward : the reward for the current state
        """
        self.board = Board()
        # create a dictionary to keep track of all the past steps
        self.states = {}
        self.expRate = expRate
        self.dc = discount
        self.current = self.board.copy()
    def domain(self, verbose = False, showBoard = True):
        """
        return a tuple of lists, consisting of the possible moves for
        respectively player 1 and player 2
        also print the possible moves for display if 
        specified verbose = True (False by default)
        print the board (unless specified otherwise)
        """
        # print the moves if specified
        # first get the legal moves for both players as sets
        # this makes them more reusable for after
        # also add any possible captures to the moves
        self.p1legal = self.board.player1.legalMoves(self.board.player2)
        self.p1legal.update(self.board.player1.legalCaptures(self.board.player2))
        self.p2legal = self.board.player2.legalMoves(self.board.player1)
        self.p2legal.update(self.board.player2.legalCaptures(self.board.player1))
        if  verbose:
            # go over all the specified legal moves for both players
            print('PLAYER 1: {}'.format(self.board.player1.color()))
            for move in self.p1legal:
                print('{} --> {}'.format(move[0].pos,move[1]))      
            print('PLAYER 2: {}'.format(self.board.player2.color()))
            for move in self.p2legal:
                print('{} --> {}'.format(move[0].pos,move[1]))
        if showBoard: # print the current state of the board
            print(self.board)
        # now construct the possbile moves arrays
        # this a numpy array object made of tuples wher
        # tuple[0] is current position and tuple[1] is
        # next psoition if the move is performed
        self.p1moves = np.array([(move[0].pos,move[1]) for move in self.p1legal])
        self.p2moves = np.array([(move[0].pos,move[1]) for move in self.p2legal])
        return (self.p1moves.copy(), self.p2moves.copy())
    @staticmethod
    def reward(board, inclOpposite = False):
        """
        Reward function that determines the reward 
        for the current state -- of player1
        the input is a board object
        """
        # check whether there is a winner
        if board.winner() == board.player1:
            return agent.rwin # this meand computer won
        elif board.winner() == board.player2:
            return agent.rloss # computer lost
        # the other state would be when a winner is not decided yet or we have a tie
        result = 0.0
        for piece in board.player1.pieces.values():
            # iterate over the dictionary of board pieces of player1
            # basically, if a piece is on the side which is a more strategic
            # point, give a reward
            if piece.pos[1] == 0 or piece.pos[1] == 0: # if any piece is on the side
                result += agent.rmiddle
            else:
                result += agent.rside
        # I thought maybe I could do the same thing for the opposition board,
        # each opposite side piece has -2.0 and middle ones have -1.0
        # I will keep this optional for testing
        if inclOpposite:
            for piece in board.player1.pieces.values():
                # iterate over the dictionary of board pieces of player2
                if piece.pos[1] == 0 or piece.pos[1] == 0: # if any piece is on the side
                    result += agent.rside
                else:
                    result += agent.rmiddle
        return result
    def policy(self):
        """
        Method that chooses the best next move
        """
        # using the experimentation rate, randomize the next move (or not)
        # use np.random.random that return numbers in [0,1) from a continuous distribution
        # find all the possible legal moves and captures
        legalMoves = self.board.player1.legalMoves(self.board.player2)
        legalMoves.update(self.board.player1.legalCaptures(self.board.player2))
        if np.random.random() < self.expRate:
            print(legalMoves)
    def saveState(self):
        """Save the state of the current board and the action
            leading to it inside the self.states dictionary
        """
        self.states.append()
        pass

Writing learning.py


In [80]:
l = agent(expRate=1)


In [78]:
l.policy()

{(Piece((2, 6), white), (3, 5)), (Piece((2, 0), white), (3, 1)), (Piece((2, 6), white), (3, 7)), (Piece((2, 2), white), (3, 3)), (Piece((2, 4), white), (3, 5)), (Piece((2, 4), white), (3, 3)), (Piece((2, 2), white), (3, 1))}


In [81]:
(a,b) = l.domain(verbose = True)

PLAYER 1: white
(2, 2) --> (3, 1)
(2, 2) --> (3, 3)
(2, 0) --> (3, 1)
(2, 4) --> (3, 5)
(2, 4) --> (3, 3)
(2, 6) --> (3, 7)
(2, 6) --> (3, 5)
PLAYER 2: black
(5, 3) --> (4, 2)
(5, 5) --> (4, 4)
(5, 5) --> (4, 6)
(5, 7) --> (4, 6)
(5, 1) --> (4, 0)
(5, 3) --> (4, 4)
(5, 1) --> (4, 2)
    0 1 2 3 4 5 6 7

0  |▓| |▓| |▓| |▓| |
1  | |▓| |▓| |▓| |▓|
2  |▓| |▓| |▓| |▓| |
3  | | | | | | | | |
4  | | | | | | | | |
5  | |░| |░| |░| |░|
6  |░| |░| |░| |░| |
7  | |░| |░| |░| |░|



In [53]:
print(set(list(range(1,100,4))).update())

None


In [60]:
a = set(list(range(40,60)))

In [65]:
a

{1,
 5,
 9,
 13,
 17,
 21,
 25,
 29,
 33,
 37,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 61,
 65,
 69,
 73,
 77,
 81,
 85,
 89,
 93,
 97}

In [64]:
a.update(list(range(1,100,4)))