<a href="https://colab.research.google.com/github/IGieckI/TrAIs/blob/main/TrAIs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

# Tris environment
class Tris:
    def __init__(self):
        self.board = [[' ', ' ', ' '], [' ', ' ', ' '], [' ', ' ', ' ']]

    def __str__(self):
        return "  A B C\n" + \
            "1 {}|{}|{}\n".format(self.board[0][0], self.board[0][1], self.board[0][2]) + \
            "  -+-+-\n" + \
            "2 {}|{}|{}\n".format(self.board[1][0], self.board[1][1], self.board[1][2]) + \
            "  -+-+-\n" + \
            "3 {}|{}|{}".format(self.board[2][0], self.board[2][1], self.board[2][2])


    def get_cell_by_id(self, cell_id):
        return self.board[cell_id//3][cell_id%3]

    def set_cell_by_id(self, cell_id, new_value):
        self.board[cell_id//3][cell_id%3] = new_value

    def check_winner(self):
        for i in range(3):
            if self.board[i][0] == self.board[i][1] and self.board[i][1] == self.board[i][2]:
                return self.board[i][0]

            if self.board[0][i] == self.board[1][i] and self.board[1][i] == self.board[2][i]:
                return self.board[0][i]

        if (self.board[0][0] == self.board[1][1] and self.board[1][1] == self.board[2][2]) or (self.board[0][2] == self.board[1][1] and self.board[1][1] == self.board[2][0]):
            return self.board[1][1]
        return ' '

    def available_actions(self):
        return [i for i in range(9) if self.get_cell_by_id(i) == ' ']

    def step(self, action, sign):
        self.set_cell_by_id(action, sign)

        winner = self.check_winner()

        if winner == ' ':
            reward = 0
        elif winner == sign:
            reward = 1
        else:
            reward = -1

        return self, reward

    def get_hash(self):
        hash_string = ''
        for c in [cell for row in self.board for cell in row]:
            hash_string += c
        return hash_string

    '''
    def get_hash(self):
        hash_string = ''
        for c in [cell for row in self.board for cell in row]:
        if c == ' ':
            hash_string += '0'
        elif c == 'X':
            hash_string += '1'
        else:
            hash_string += '2'
        return int(hash_string)
    '''
    def is_playing(self):
        return self.check_winner() == ' ' and len(self.available_actions()) > 0

In [None]:
# The AI model itself
class TrAIs:
    def __init__(self, learning_rate, gamma, sign):
        self.sign = sign
        self.learning_rate = learning_rate
        self.gamma = gamma

        self.qTable = {}
        self.history = []

    def get_qRow(self, board_hash):
        if board_hash in self.qTable:
            row = self.qTable[board_hash]
        else:
            row = np.full(9, 0.6)
            self.qTable[board_hash] = row

        return row

    def get_action(self, board) -> int:
        board_hash = board.get_hash()
        while True:
            qRow = self.get_qRow(board_hash)
            index = np.argmax(qRow)
            #print(index, ": ", qRow)
            if index in board.available_actions():
                return index
            else:
                self.qTable[board_hash][index] = -1.0
                qRow[index] = -1.0

    # !!! PROVA A FARE IL TRAINING IN REVERSE
    # !!! PROVA AD USARE IL VALORE ATTESO <--- USEFUL OPTIMIZATION
    def train(self, final_reward):
        for memory in self.history:
            cur_action_row = self.get_qRow(memory[0].get_hash())[memory[1]]
            new_state_row = self.get_qRow(memory[2].get_hash())
            old_value = (1-self.learning_rate) * cur_action_row
            new_value = self.learning_rate * (final_reward + self.gamma * max(new_state_row))
            #print("Training data: ", final_reward, " ", old_value, " ", self.qTable[memory[0].get_hash()][memory[1]])
            #print("Training data: ", old_value + new_value)
            self.qTable[memory[0].get_hash()][memory[1]] = old_value + new_value
        history = []


In [None]:
# Training parameters
NUM_EPISODES = 500
LEARNING_RATE = 0.5
GAMMA = 0.99

EPSILON = 1.0
MIN_EPSILON = 0.01
DECAY_RATE = 0.0001

In [None]:
import random

model1 = TrAIs(LEARNING_RATE, GAMMA, 'X')
model2 = TrAIs(LEARNING_RATE, GAMMA, 'O')

current_episode = 0
current_epsilon = EPSILON

m1_wins = 0
m2_wins = 0
ties = 0

while current_episode < NUM_EPISODES:
    board = Tris()
    total_reward1, total_reward2 = 0, 0
    turn = True
    while board.is_playing():
        if random.uniform(0, 1) < current_epsilon:
            action = random.choice(board.available_actions())
        else:
            action = model1.get_action(board) if turn else model2.get_action(board)

        if turn:
            new_board, reward = board.step(action, model1.sign)
            model1.history.append([board, action, new_board])
            total_reward1 += reward
        else:
            new_board, reward = board.step(action, model2.sign)
            model2.history.append([board, action, new_board])
            total_reward2 += reward

        board = new_board
        turn = not turn

    model1.train(total_reward1 if total_reward2 == 0 else -total_reward2)
    model2.train(total_reward2 if total_reward1 == 0 else -total_reward1)
    winner = board.check_winner()

    if winner == ' ':
        ties+=1
    elif winner == model1.sign:
        m1_wins+=1
    else:
        m2_wins+=1

    """
    if current_episode%10 == 0:
        print(board)
    """
    print("N.", current_episode, " | ", m1_wins, " - ", ties, " - ", m2_wins)
    current_episode += 1
    current_epsilon -= DECAY_RATE
    if current_epsilon < MIN_EPSILON:
        current_epsilon = MIN_EPSILON


N. 0  |  1  -  0  -  0
N. 1  |  1  -  1  -  0
N. 2  |  1  -  2  -  0
N. 3  |  1  -  2  -  1
N. 4  |  2  -  2  -  1
N. 5  |  3  -  2  -  1
N. 6  |  3  -  2  -  2
N. 7  |  3  -  3  -  2
N. 8  |  3  -  4  -  2
N. 9  |  3  -  4  -  3
N. 10  |  3  -  4  -  4
N. 11  |  3  -  5  -  4
N. 12  |  4  -  5  -  4
N. 13  |  4  -  5  -  5
N. 14  |  5  -  5  -  5
N. 15  |  6  -  5  -  5
N. 16  |  7  -  5  -  5
N. 17  |  8  -  5  -  5
N. 18  |  9  -  5  -  5
N. 19  |  10  -  5  -  5
N. 20  |  11  -  5  -  5
N. 21  |  12  -  5  -  5
N. 22  |  13  -  5  -  5
N. 23  |  14  -  5  -  5
N. 24  |  14  -  5  -  6
N. 25  |  15  -  5  -  6
N. 26  |  16  -  5  -  6
N. 27  |  16  -  6  -  6
N. 28  |  16  -  7  -  6
N. 29  |  17  -  7  -  6
N. 30  |  17  -  7  -  7
N. 31  |  18  -  7  -  7
N. 32  |  18  -  7  -  8
N. 33  |  19  -  7  -  8
N. 34  |  19  -  7  -  9
N. 35  |  19  -  7  -  10
N. 36  |  20  -  7  -  10
N. 37  |  21  -  7  -  10
N. 38  |  22  -  7  -  10
N. 39  |  23  -  7  -  10
N. 40  |  23  -  8  -  1

In [None]:
# To play against the bot

# convert player input into board cell
def convert_input(input:str):
    cell = 0
    if input[0] == 'a':
        cell += 0
    elif input[0] == 'b':
        cell += 1
    elif input[0] == 'c':
        cell += 2
    else:
        return -1

    if input[1] == '1':
        cell += 0
    elif input[1] == '2':
        cell += 3
    elif input[1] == '3':
        cell += 6
    else:
        return -1
    return cell

board = Tris()
print(model1.get_qRow(board.get_hash()))
# choose who play first
if random.uniform(0, 1) < 0.5:
    player_turn = True
else:
    player_turn = False

# start game
while board.is_playing():
    print(board)
    if player_turn:
        print("Player turn:")
        print("Seleziona la casella(ex:\"a1\"):")
        action = convert_input(input())
        if action == -1:
            continue
        new_board, reward = board.step(action, "O")
    else:
        print("TrAIs turn:")
        action = model1.get_action(board)
        new_board, reward = board.step(action, model1.sign)

    board = new_board
    player_turn = not player_turn

print(board)
winner = board.check_winner()

if winner == ' ':
    print("It's a draw!")
elif winner == model1.sign:
    print("TrAIs wins!")
else:
    print("Player wins!")

[0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6]
  A B C
1  | | 
  -+-+-
2  | | 
  -+-+-
3  | | 
TrAIs turn:
  A B C
1 X| | 
  -+-+-
2  | | 
  -+-+-
3  | | 
Player turn:
Seleziona la casella(ex:"a1"):
