In [1]:
import numpy as np
import pickle

with open('TTT_model.pkl','rb') as f:
    model = pickle.load(f)

    

In [2]:
model.keys()

dict_keys(['states', 'connections', 'valuetable'])

In [3]:
states = model['states']
connections = model['connections']
V = model['valuetable']

In [4]:
def identify_state(board):
    turn = np.sum(np.array(board) != 0)
    state_index = states[turn].index(board)
    return turn, state_index

def get_best_action(turn, state_index):
    actions = connections[turn][state_index]
    v_arr = [V[turn + 1][a] for a in actions]
    print(v_arr)
    if turn%2==0:    
        a = np.argmax(v_arr)
    else: 
        a = np.argmin(v_arr)
    best_action = actions[a] 
    next_state = states[turn + 1][best_action]
    return next_state

def translate_action(board_state, next_state):
    for i, b , n in zip(range(9), board_state, next_state):
        if b != n:
            return i

v_states = [
            [1,1,1,0,0,0,0,0,0],
            [0,0,0,1,1,1,0,0,0],
            [0,0,0,0,0,0,1,1,1],
            [1,0,0,1,0,0,1,0,0],
            [0,1,0,0,1,0,0,1,0],
            [0,0,1,0,0,1,0,0,1],
            [1,0,0,0,1,0,0,0,1],
            [0,0,1,0,1,0,1,0,0],
            ]

def print_board(s):
    x = ['-', 'x', 'o']
    print(f' [{x[s[0]]} {x[s[1]]} {x[s[2]]}] \n [{x[s[3]]} {x[s[4]]} {x[s[5]]}] \n [{x[s[6]]} {x[s[7]]} {x[s[8]]}] \n')

def check_victory(s):
    # s is a length 9 array
    x = s == 1
    o = s == 2
    sx = v_states @ x
    so = v_states @ o
    if sx.max() == 3:
        return 1
    elif so.max() == 3:
        return -1
    else:
        return 0

gamma = 0.5
def calc_V(turn, i, verbose = False):

    actions = connections[turn][i] 
    V_new = []
    for a in actions:
        s = states[turn + 1][a]
        print_board(s)
        reward = check_victory(np.array(s))
        if reward == 0 and turn != 8:
            V_new.append( gamma * V[turn + 1][a])

        else:
            V_new.append(reward)
    
    if verbose == True:
        print(V_new)

    if turn%2 == 0:
        V_max = max(V_new)
        a = np.argmax(V_new)
    else:
        V_max = min(V_new)
        a = np.argmin(V_new) 

    # randomize move if there are multiple best moves
    if len(np.array(V_new) == V_max) > 1:
        a = np.random.choice([i for i, n in enumerate(V_new) if n == V_max])    

    next_state = states[turn + 1][actions[a]]
    return next_state



In [5]:
board_state = [0] * 9
board_state[2] = 1
board_state[8] = 2
print(board_state)
print(np.array(board_state) != 0)
turn, state_index = identify_state(board_state)
print(state_index)

[0, 0, 1, 0, 0, 0, 0, 0, 2]
[False False  True False False False False False  True]
23


In [6]:
import tkinter as tk
import random

def check_winner(board):

    winning_combinations = [
        (0, 1, 2), (3, 4, 5), (6, 7, 8),
        (0, 3, 6), (1, 4, 7), (2, 5, 8),
        (0, 4, 8), (2, 4, 6)
    ]
    for combo in winning_combinations:
        if board[combo[0]] == board[combo[1]] == board[combo[2]] != 0:
            return board[combo[0]] 
    return 0

class Computer_AI:
    def __init__(self, value_table, states, connections):
        self.V = value_table
        self.states = states
        self.connections = connections

    def identify_state(self, board):
        turn = np.sum(np.array(board) != 0)
        state_index = self.states[turn].index(board)
        return turn, state_index    
    
    def translate_action(self, board_state, next_state):
        for i, b , n in zip(range(9), board_state, next_state):
            if b != n:
                return i

    def get_reward(self,winner):
        if winner == 1:
            return 1
        elif winner == 2:
            return -1
        else:
            return 0 

    def calc_V(self, turn, i, verbose = False):

        actions = self.connections[turn][i] 
        V_new = []
        for a in actions:
            s = self.states[turn + 1][a]
            print_board(s)
            winner = check_winner(np.array(s))
            reward = self.get_reward(winner)

            if reward == 0 and turn != 8:
                V_new.append( gamma * self.V[turn + 1][a])

            else:
                V_new.append(reward)
        
        if verbose == True:
            print(V_new)

        if turn%2 == 0:
            V_max = max(V_new)
            a = np.argmax(V_new)
        else:
            V_max = min(V_new)
            a = np.argmin(V_new) 

        # randomize move if there are multiple best moves
        if len(np.array(V_new) == V_max) > 1:
            a = np.random.choice([i for i, n in enumerate(V_new) if n == V_max])    

        next_state = states[turn + 1][actions[a]]
        return next_state

    def calc_computer_action(self, board):
        turn, state_index = self.identify_state(board)
        next_state = self.calc_V(turn, state_index, verbose = True)
        next_move = self.translate_action(board, next_state)
        return next_move



class TicTacToe:
    def __init__(self, AI):
        self.root = tk.Tk()
        self.root.title("TicTacToe Board")
        
        self.AI = AI

        self.button_size = 5
        self.board_state = [0] * 9
        self.buttons = []
        self.player_action = 1
        self.computer_action = 2

        self.choose_starting_player()
        self.create_board()

        self.root.mainloop()


    def display_winner_message(self, winner):
        print("Spieler gewinnt!") if winner == self.player_action else print("Computer gewinnt!")


    def on_button_click(self, button, index):
        if self.board_state[index] == 0:
            button.config(text="X")
            self.board_state[index] = self.player_action

            winner = check_winner(self.board_state)
            if winner:
                self.display_winner_message(winner)
                return

            button.after(500, self.make_computer_move)

    def make_computer_move(self):
        free_indices = [i for i, val in enumerate(self.board_state) if val == 0]
        if free_indices:
            computer_choice = self.AI.calc_computer_action(self.board_state)
            self.buttons[computer_choice].config(text="O")
            self.board_state[computer_choice] = self.computer_action
            print_board(self.board_state)

            winner = check_winner(self.board_state)
            if winner:
                self.display_winner_message(winner)

    def start_game(self, starting_player):
        if starting_player == "Computer":
            self.player_action = 2
            self.computer_action = 1
            self.make_computer_move()

    def choose_starting_player(self):
        choice_window = tk.Toplevel(self.root)
        choice_window.title("Wer beginnt?")
        
        tk.Label(choice_window, text="Wähle den Startspieler:").pack()
        tk.Button(choice_window, text="Spieler", command=lambda: (choice_window.destroy(), self.start_game("Spieler"))).pack()
        tk.Button(choice_window, text="Computer", command=lambda: (choice_window.destroy(), self.start_game("Computer"))).pack()

    def create_board(self):
        for index in range(9):
            btn = tk.Button(self.root, text="-", width=self.button_size, height=self.button_size)
            btn.config(command=lambda b=btn, i=index: self.on_button_click(b, i))
            btn.grid(row=index//3, column=index%3, padx=0, pady=0)
            self.buttons.append(btn)

# Das Spiel starten
AI = Computer_AI(model['valuetable'], model['states'], model['connections'], )
TicTacToe(AI)

 [x - -] 
 [- - -] 
 [- - -] 

 [- x -] 
 [- - -] 
 [- - -] 

 [- - x] 
 [- - -] 
 [- - -] 

 [- - -] 
 [x - -] 
 [- - -] 

 [- - -] 
 [- x -] 
 [- - -] 

 [- - -] 
 [- - x] 
 [- - -] 

 [- - -] 
 [- - -] 
 [x - -] 

 [- - -] 
 [- - -] 
 [- x -] 

 [- - -] 
 [- - -] 
 [- - x] 

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 [- - x] 
 [- - -] 
 [- - -] 

 [x - x] 
 [- - -] 
 [- - o] 

 [- x x] 
 [- - -] 
 [- - o] 

 [- - x] 
 [x - -] 
 [- - o] 

 [- - x] 
 [- x -] 
 [- - o] 

 [- - x] 
 [- - x] 
 [- - o] 

 [- - x] 
 [- - -] 
 [x - o] 

 [- - x] 
 [- - -] 
 [- x o] 

[0.0625, 0.0625, 0.0, 0.0, 0.0, 0.0625, 0.0]
 [x - x] 
 [- - -] 
 [- - o] 

 [x o x] 
 [x - -] 
 [- - o] 

 [x o x] 
 [- x -] 
 [- - o] 

 [x o x] 
 [- - x] 
 [- - o] 

 [x o x] 
 [- - -] 
 [x - o] 

 [x o x] 
 [- - -] 
 [- x o] 

[0.0, 0.0, -0.125, 0.25, 0.0]
 [x o x] 
 [- - -] 
 [x - o] 

 [x o x] 
 [x o -] 
 [x - o] 

 [x o x] 
 [- o x] 
 [x - o] 

 [x o x] 
 [- o -] 
 [x x o] 

[1, -0.5, 0.0]
 [x o x] 
 [x o -] 
 [x - o

<__main__.TicTacToe at 0x7b859b5bbe60>