In [None]:
import random
import time
import math
from queue import *
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# board = np.zeros((3,3), dtype=int)

def printBoard(board):
    print("   0    1    2")
    print("  ------------")
    for i in range(len(board)):
        print(i, end=" | ")
        print(board[i,0], end="")
        print(" | ", end="")
        print(board[i,1], end="")
        print(" | ", end="")
        print(board[i,2], end=" |\n")
        print("  ------------")

In [None]:
class Agent:
    def __init__(self, playerNumber):
        self.utilities = {}
        self.ns = {}
        self.alpha = 0.0001
        self.epsilon = 0
        self.gamma = 1
        self.plays = 0
        self.totalPlays = 0
        self.gameCount = 0
        self.playerNumber = playerNumber
        self.lastState = None
        self.stateQueue = Queue()
    
    def setPlayerNumber(self, n):
        self.playerNumber = n
    
    def reward(self, playerNumber, board):
        winner = 0
        for line in board:
            if all(item == 1 for item in line):
                winner = 1
            if all(item == 2 for item in line):
                winner = 2
        for i in range(len(board)):
            if all(item == 1 for item in board[:,i]):
                winner = 1
            if all(item == 2 for item in board[:,i]):
                winner = 2
        if all(item == 1 for item in board.diagonal()):
            winner = 1
        if all(item == 2 for item in board.diagonal()):
            winner = 2
        if board[2,0] == 1 and board[1,1] == 1 and board[0,2] == 1:
            winner = 1
        if board[2,0] == 2 and board[1,1] == 2 and board[0,2] == 2:
            winner = 2
        if winner == playerNumber:
            return 1
        elif winner != 0:
            return -1
        elif self.checkPossibleActions(board) == []:
            return -0.5
        return 0

    def checkPossibleActions(self, state):
        actions = []
        for i in range(len(state)):
            for j in range(len(state[i])):
                if state[i,j] == 0:
                    actions.append([i,j])
        return actions


    def placeChip(self, tempState, chipLocation):
        tempState[chipLocation[0], chipLocation[1]] = self.playerNumber
        return tempState

    def policy(self, state, utilities, epsilon, playerNumber):
        actions = self.checkPossibleActions(state)
        actionReward = -2
        currentAction = None
        if actions == []:
            return 0
        if random.random() < epsilon:
            return random.choice(actions)
        for action in actions:
            tempState = np.copy(state)
            tempState = self.placeChip(tempState, action)
            tempStateKey = self.hashState(tempState)
            if tempStateKey in utilities:
                if utilities[tempStateKey] >= actionReward:
                    currentAction = action
                    actionReward = utilities[tempStateKey]
        if currentAction == None: 
            return random.choice(actions)
        return currentAction


    def hashState(self, board):
        return '\n'.join('\t'.join('%0.3f' %x for x in y) for y in board)

    def learningAgent(self, state):
        if self.reward(self.playerNumber, state) != 0:
            self.gameCount += 1
            self.totalPlays += self.plays
            self.plays = 0
            self.stateQueue.queue.clear()
            self.lastState = None
            return 1
        self.plays += 1
        self.epsilon = max(1 - (0.000001 * self.totalPlays), 0.05)
        key = self.hashState(state)
        isInMatrix = key in self.ns
        if isInMatrix == False:
            self.ns[key] = 0
            self.utilities[key] = self.reward(self.playerNumber, state)
        if hasattr(self.lastState, 'shape'):
            lastBoardKey = self.hashState(self.lastState)
            self.ns[lastBoardKey] += 1
            self.alpha = 60/(59 + self.ns[lastBoardKey])
            queueCount = 1
            for i in range(self.stateQueue.qsize()):
                queuedState = self.stateQueue.get()
                qStateKey = self.hashState(queuedState)
                isInMatrix = qStateKey in self.utilities
                if isInMatrix == False:
                    self.utilities[qStateKey] = self.reward(self.playerNumber, queuedState)
                self.utilities[qStateKey] +=\
                                    (\
                                      self.alpha * \
                                      (\
                                          self.reward(self.playerNumber, queuedState)\
                                          + self.gamma\
                                          * self.utilities[key]\
                                          - self.utilities[qStateKey]\
                                      )\
                                    )\
                                    / queueCount
                queueCount += queueCount
                self.stateQueue.put(queuedState)
        self.stateQueue.put(state)
        self.lastState = state
        action = self.policy(state, self.utilities, self.epsilon, self.playerNumber)
        return action

    def playGame(self, playerNumber, state):
        return self.placeChip(state, self.policy(state, self.utilities, 0, playerNumber))



In [None]:
def checkWins(board):
    for line in board:
        if all(item == 1 for item in line):
            return 1
        if all(item == 2 for item in line):
            return 2
    for i in range(len(board)):
        if all(item == 1 for item in board[:,i]):
            return 1
        if all(item == 2 for item in board[:,i]):
            return 2
    if all(item == 1 for item in board.diagonal()):
        return 1
    if all(item == 2 for item in board.diagonal()):
        return 2
    if board[2,0] == 1 and board[1,1] == 1 and board[0,2] == 1:
        return 1
    if board[2,0] == 2 and board[1,1] == 2 and board[0,2] == 2:
        return 2
    for i in range(len(board)):
        for j in range(len(board[i])):
            if board[i,j] == 0: return 0
    return 3



def gameTurn(player):
    while True:
        print("Player", player)
        x = int(input("Choose y position: (0,1,2) "))
        y = int(input("Choose x position: (0,1,2) "))
        if x > 2 or y > 2:
            print("Out of bounds")
            continue
        if board[x,y] == 0:
            board[x,y] = player
            return checkWins(board)
        print("There is already a chip in this place")
    
    
    
def endGame(board, player1, player2):
    player1.learningAgent(board)
    player2.learningAgent(board)

def game():
    board = np.zeros((3,3), dtype=int)
    winner = 0
    player1 = Agent(1)
    player2 = Agent(2)
    player1Move = None
    player2Move = None
    gamesDone = 0
    quit = True
    while gamesDone < 100000:
#         if random.random() > 0.5:
        while True:
            player1Move = player1.learningAgent(board)
            board =  player1.placeChip(board, player1Move)
            printBoard(board)
            if checkWins(board)!= 0:
                endGame(board, player1, player2)
                break
            player2Move = player2.learningAgent(board)
            board =  player2.placeChip(board, player2Move)
            printBoard(board)
            if checkWins(board)!= 0:
                endGame(board, player1, player2)
                break
        player1Move = player2Move = None
        board = np.zeros((3,3), dtype=int)
#         else:
#             while player1Move != 1 and player2Move != 1:
#                 player2Move = player2.learningAgent(board)
#                 if player2Move != 1:
#                     board =  player2.placeChip(board, player2Move)
#                     printBoard(board)
#                 player1Move = player1.learningAgent(board)
#                 if player1Move != 1:
#                     board =  player1.placeChip(board, player1Move)
#                     printBoard(board)
#             player1Move = player2Move = None
#             board = np.zeros((3,3), dtype=int)
        gamesDone += 1
        print("Game:", gamesDone)
    return player1, player2



def playVsBot(player):
    quit = "n"
    board = np.zeros((3,3), dtype=int)
    player.setPlayerNumber(2)
    while quit != "y":
        while True:
            x = int(input("X: "))
            y = int(input("Y: "))
            board[y , x] = 1
#             printBoard(board)
            if checkWins(board)!= 0:
                print("Game Over")
                break
            board = player.playGame(2, board)
#             printBoard(board)
            if checkWins(board)!= 0:
                print("Game Over")
                break
        board = np.zeros((3,3), dtype=int)
        quit = input("Quit (y/n)")
            
start = time.time()
player1, player2 = game()
end = time.time()
print("Time:", end - start)
playVsBot(player1)


    
