In [82]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
plt.style.use('ggplot')


In [108]:
def Qhash(board):
    
    h = ''
    for row in board:
        for e in row:
            if e == 1:
                h += 'x'
            elif e == -1:
                h += 'o'
            else:
                h += 'e'
    
    return h


def winner_check(board):
    
    # Check columns
    col_sums = np.sum(board, axis=0)
    
    if (col_sums==3).any():
        # p1 won
        return 1
    elif (col_sums==-3).any():
        # p2 won
        return 2
    
    # Check rows
    row_sums = np.sum(board, axis=1)
    
    if (row_sums==3).any():
        # p1 won
        return 1
    elif (row_sums==-3).any():
        # p2 won
        return 2
    
    # Check diag
    diag = board[0,0] + board[1,1] + board[2,2]
    
    if diag == 3:
        #p1 won
        return 1
    elif diag == -3:
        # p2 won
        return 2
    
    diag = board[0,2] + board[1,1] + board[2,0]
    
    if diag == 3:
        #p1 won
        return 1
    elif diag == -3:
        # p2 won
        return 2
    
    # Check if stalemate
    
    abs_sum = np.sum(np.abs(board))
    if abs_sum == 9:
        return 3
    else:
        return 0

    
class Player:
    
    
    def __init__(self, pieces:int):
        self.Q = []
        self.hashes = []
        self.moves = []
        self.piece = pieces # 1 or -1 
        self.e = 1
        self.a = 0.1
        
    
    def greedy_move(self, board):
        # Check if the state has been visited before
        h = Qhash(board)
        if h in self.hashes:
            print('entered greedy move')
            idx = self.hashes.index(h)
            Q = self.Q[idx][0]
            A = self.Q[idx][1]
            
            valid_actions = np.where(board == 0)
            valid_actions = list(zip(valid_actions[0], valid_actions[1]))
            action = np.unravel_index(np.argmax(A), A.shape)
            if action in valid_actions:
                board[action] = self.piece
                nh = Qhash(board)
                self.hashes.append(nh)
                A = np.zeros((3,3))
                A[board != 0] = np.NaN
                self.Q.append([board, A])
            else:
                board = self.random_move(board)
        
        else:
            self.hashes.append(h)
            self.Q.append([board, np.zeros((3,3))])
            board = self.random_move(board)
        
        
        return board
    
    
    def random_move(self, board):
        # Find all possible moves
        possible_moves = np.where(board==0)
        move_list = list(zip(possible_moves[0], possible_moves[1]))
        #print(move_list)
        # Choose a random move
        move_idx = np.random.randint(len(move_list))
        board[move_list[move_idx]] = self.piece
        # Add state to Q
        h = Qhash(board)
        if h not in self.hashes:
            self.hashes.append(h)
            A = np.zeros((3,3))
            A[board != 0] = np.NaN
            self.Q.append([board, A])
            
        return board
    
    def updateQ(self, reward):
        
        if reward == 1:
            # Won compute update
            for ql in reversed(self.Q):
                # First element will be the last one
                Q = ql[0]
                A = ql[1]
                
        elif reward == -1:
            A2 = self.Q[-1][1]
            A1 = self.Q[-2][1]
            move = [np.where(A2-A1 !=0 )[0], np.where(A2-A1 !=0 )[1]]
            self.Q[-1][1][move] = -1

In [109]:
arg = Player(1)
ledsen = Player(-1)
    

In [110]:


for _ in range(100):

    board = np.zeros((3,3))
    run = True
    while run:
    
        board = arg.greedy_move(board)
        winner = winner_check(board)
        
        if winner == 1:
            print('Player 1 won')
            # Update Q
            print(board)
            run = False
            break
        elif winner == 3:
            print('Stalemate')
            # Update Q
            print(board)
            run = False
            break
        board = ledsen.greedy_move(board)
        winner = winner_check(board)
        
        if winner == 2:
            print('Player 2 won')
            # Update Q
            print(board)
            run = False
            break
        elif winner == 3:
            print('Stalemate')
            # Update Q
            print(board)
            run = False
            break
            
            

Player 1 won
[[ 1.  1.  1.]
 [-1.  0.  0.]
 [-1. -1.  1.]]
entered greedy move
Player 1 won
[[ 1. -1.  0.]
 [ 0.  1.  1.]
 [-1. -1.  1.]]
entered greedy move
entered greedy move
Player 1 won
[[ 1.  1.  1.]
 [ 1. -1. -1.]
 [-1. -1.  1.]]
entered greedy move
entered greedy move
Player 1 won
[[ 1.  1.  1.]
 [ 0.  0.  0.]
 [-1. -1.  0.]]
entered greedy move
entered greedy move
Player 1 won
[[ 1. -1.  1.]
 [ 1.  1. -1.]
 [ 1. -1. -1.]]
entered greedy move
entered greedy move
entered greedy move
entered greedy move
entered greedy move
Player 2 won
[[ 1.  1. -1.]
 [ 1.  1.  0.]
 [-1. -1. -1.]]
entered greedy move
entered greedy move
Stalemate
[[ 1.  1. -1.]
 [-1.  1.  1.]
 [ 1. -1. -1.]]
entered greedy move
entered greedy move
entered greedy move
entered greedy move
Stalemate
[[ 1. -1.  1.]
 [ 1. -1.  1.]
 [-1.  1. -1.]]
entered greedy move
entered greedy move
Player 1 won
[[ 1. -1.  0.]
 [ 1. -1. -1.]
 [ 1.  1.  0.]]
entered greedy move
entered greedy move
entered greedy move
Player 1 won
[[

In [105]:
arg.updateQ(1)

[array([[ 1.,  1., -1.],
       [-1., -1.,  1.],
       [ 1., -1.,  1.]]), array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])]
[array([[ 1.,  1., -1.],
       [-1., -1.,  1.],
       [ 1., -1.,  1.]]), array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])]
[array([[ 1.,  1., -1.],
       [-1., -1.,  1.],
       [ 1., -1.,  1.]]), array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])]
[array([[ 1.,  1., -1.],
       [-1., -1.,  1.],
       [ 1., -1.,  1.]]), array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])]
[array([[ 1.,  1., -1.],
       [-1., -1.,  1.],
       [ 1., -1.,  1.]]), array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])]
[array([[ 1.,  1., -1.],
       [-1.,  1., -1.],
       [ 1., -1.,  1.]]), array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])]
[array([[ 1.,  1., -1.],
       [-1.,  1., -1.],
       [ 1., -1.,  1.]]), array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])]
[array([[ 1.,  1., -

In [39]:

    


a = np.zeros((3,3))
a[:,0] = -1
t = np.array([[1,-1,1], [-1,-1,1], [-1, 1, -1]])
b = winner_check(a)
print(b)

2
