In [1]:
import numpy as np
import pickle

with open('TTT_model.pkl','rb') as f:
    model = pickle.load(f)

    

In [2]:
model.keys()

dict_keys(['states', 'connections', 'valuetable'])

In [3]:
states = model['states']
connections = model['connections']
V = model['valuetable']

In [32]:
def identify_state(board):
    turn = np.sum(np.array(board) != 0)
    state_index = states[turn].index(board)
    return turn, state_index

def get_best_action(turn, state_index):
    actions = connections[turn][state_index]
    v_arr = [V[turn + 1][a] for a in actions]
    print(v_arr)
    if turn%2==0:    
        a = np.argmax(v_arr)
    else: 
        a = np.argmin(v_arr)
    best_action = actions[a] 
    next_state = states[turn + 1][best_action]
    return next_state

def translate_action(board_state, next_state):
    for i, b , n in zip(range(9), board_state, next_state):
        if b != n:
            return i

v_states = [
            [1,1,1,0,0,0,0,0,0],
            [0,0,0,1,1,1,0,0,0],
            [0,0,0,0,0,0,1,1,1],
            [1,0,0,1,0,0,1,0,0],
            [0,1,0,0,1,0,0,1,0],
            [0,0,1,0,0,1,0,0,1],
            [1,0,0,0,1,0,0,0,1],
            [0,0,1,0,1,0,1,0,0],
            ]

def print_board(s):
    x = ['-', 'x', 'o']
    print(f' [{x[s[0]]} {x[s[1]]} {x[s[2]]}] \n [{x[s[3]]} {x[s[4]]} {x[s[5]]}] \n [{x[s[6]]} {x[s[7]]} {x[s[8]]}] \n')

def check_victory(s):
    # s is a length 9 array
    x = s == 1
    o = s == 2
    sx = v_states @ x
    so = v_states @ o
    if sx.max() == 3:
        return 1
    elif so.max() == 3:
        return -1
    else:
        return 0

gamma = 0.5
def calc_V(turn, i, verbose = False):

    actions = connections[turn][i] 
    V_new = []
    for a in actions:
        s = states[turn + 1][a]
        print_board(s)
        reward = check_victory(np.array(s))
        if reward == 0 and turn != 8:
            V_new.append( gamma * V[turn + 1][a])

        else:
            V_new.append(reward)
    
    if verbose == True:
        print(V_new)

    if turn%2 == 0:
        V_max = max(V_new)
        a = np.argmax(V_new)
    if turn%2 != 0:
        V_max = min(V_new)
        a = np.argmin(V_new) 

    next_state = states[turn + 1][actions[a]]
    return next_state



In [5]:
board_state = [0] * 9
board_state[2] = 1
board_state[8] = 2
print(board_state)
print(np.array(board_state) != 0)
turn, state_index = identify_state(board_state)
print(state_index)

[0, 0, 1, 0, 0, 0, 0, 0, 2]
[False False  True False False False False False  True]
23


In [6]:
import tkinter as tk
import random

def check_winner(board_state):
    reward = (1,-1)

    winning_combinations = [
        (0, 1, 2), (3, 4, 5), (6, 7, 8),
        (0, 3, 6), (1, 4, 7), (2, 5, 8),
        (0, 4, 8), (2, 4, 6)
    ]
    for combo in winning_combinations:
        if board_state[combo[0]] == board_state[combo[1]] == board_state[combo[2]] != 0:
            return reward[board_state[combo[0]] - 1] # return 1 for first player and -1 for second player
    return 0

def on_button_click(button, index, board_state, buttons):
    if board_state[index] == 0:
        button.config(text="X")
        board_state[index] = 1

        winner = check_winner(board_state)
        if winner:
            print("Spieler gewinnt!") if winner == 1 else print("Computer gewinnt!")
            return

        button.after(500, lambda: make_computer_move(board_state, buttons))

def calc_computer_action(board_state):
    turn, state_index = identify_state(board_state)
    next_state = calc_V(turn, state_index)
    next_move = translate_action(board_state, next_state)
    return next_move

def make_computer_move(board_state, buttons):
    free_indices = [i for i, val in enumerate(board_state) if val == 0]
    if free_indices:
        computer_choice = calc_computer_action(board_state)
        buttons[computer_choice].config(text="O")
        board_state[computer_choice] = 2

        winner = check_winner(board_state)
        if winner:
            print("Spieler gewinnt!") if winner == 1 else print("Computer gewinnt!")

def start_game(starting_player, board_state, buttons):
    if starting_player == "Computer":
        make_computer_move(board_state, buttons)

def choose_starting_player(root, board_state, buttons):
    choice_window = tk.Toplevel(root)
    choice_window.title("Wer beginnt?")
    
    tk.Label(choice_window, text="Wähle den Startspieler:").pack()

    tk.Button(choice_window, text="Spieler", command=lambda: (choice_window.destroy(), start_game("Spieler", board_state, buttons))).pack()
    tk.Button(choice_window, text="Computer", command=lambda: (choice_window.destroy(), start_game("Computer", board_state, buttons))).pack()

def create_tictactoe_board():
    root = tk.Tk()
    root.title("TicTacToe Board")

    button_size = 5
    board_state = [0] * 9
    buttons = []
    player_action = 1
    computer_action = 2

    choose_starting_player(root, board_state, buttons)

    for index in range(9):
        btn = tk.Button(root, text="-", width=button_size, height=button_size)
        btn.config(command=lambda b=btn, i=index: on_button_click(b, i, board_state, buttons))
        btn.grid(row=index//3, column=index%3, padx=0, pady=0)
        buttons.append(btn)

    root.mainloop()

create_tictactoe_board()

 [x - -] 
 [- - -] 
 [- - -] 

 [- x -] 
 [- - -] 
 [- - -] 

 [- - x] 
 [- - -] 
 [- - -] 

 [- - -] 
 [x - -] 
 [- - -] 

 [- - -] 
 [- x -] 
 [- - -] 

 [- - -] 
 [- - x] 
 [- - -] 

 [- - -] 
 [- - -] 
 [x - -] 

 [- - -] 
 [- - -] 
 [- x -] 

 [- - -] 
 [- - -] 
 [- - x] 

 [o x -] 
 [- x -] 
 [- - -] 

 [o - x] 
 [- x -] 
 [- - -] 

 [o - -] 
 [x x -] 
 [- - -] 

 [o - -] 
 [- x x] 
 [- - -] 

 [o - -] 
 [- x -] 
 [x - -] 

 [o - -] 
 [- x -] 
 [- x -] 

 [o - -] 
 [- x -] 
 [- - x] 

 [o o x] 
 [- x x] 
 [- - -] 

 [o o -] 
 [x x x] 
 [- - -] 

 [o o -] 
 [- x x] 
 [x - -] 

 [o o -] 
 [- x x] 
 [- x -] 

 [o o -] 
 [- x x] 
 [- - x] 

 [o o x] 
 [o x x] 
 [x - -] 

 [o o x] 
 [o x x] 
 [- x -] 

 [o o x] 
 [o x x] 
 [- - x] 

Computer gewinnt!


In [35]:
import tkinter as tk
import random

class TicTacToe:
    def __init__(self):
        self.root = tk.Tk()
        self.root.title("TicTacToe Board")

        self.button_size = 5
        self.board_state = [0] * 9
        self.buttons = []
        self.player_action = 1
        self.computer_action = 2

        self.choose_starting_player()
        self.create_board()

        self.root.mainloop()

    def check_winner(self):

        winning_combinations = [
            (0, 1, 2), (3, 4, 5), (6, 7, 8),
            (0, 3, 6), (1, 4, 7), (2, 5, 8),
            (0, 4, 8), (2, 4, 6)
        ]
        for combo in winning_combinations:
            if self.board_state[combo[0]] == self.board_state[combo[1]] == self.board_state[combo[2]] != 0:
                return self.board_state[combo[0]] 
        return 0


    def display_winner_message(self, winner):
        print("Spieler gewinnt!") if winner == self.player_action else print("Computer gewinnt!")


    def on_button_click(self, button, index):
        if self.board_state[index] == 0:
            button.config(text="X")
            self.board_state[index] = self.player_action

            winner = self.check_winner()
            if winner:
                self.display_winner_message(winner)
                return

            button.after(500, self.make_computer_move)

    def calc_computer_action(self):
        turn, state_index = identify_state(self.board_state)
        next_state = calc_V(turn, state_index, verbose = True)
        next_move = translate_action(self.board_state, next_state)
        return next_move

    def make_computer_move(self):
        free_indices = [i for i, val in enumerate(self.board_state) if val == 0]
        if free_indices:
            computer_choice = self.calc_computer_action()
            self.buttons[computer_choice].config(text="O")
            self.board_state[computer_choice] = self.computer_action

            winner = self.check_winner()
            if winner:
                self.display_winner_message(winner)

    def start_game(self, starting_player):
        if starting_player == "Computer":
            self.player_action = 2
            self.computer_action = 1
            self.make_computer_move()

    def choose_starting_player(self):
        choice_window = tk.Toplevel(self.root)
        choice_window.title("Wer beginnt?")
        
        tk.Label(choice_window, text="Wähle den Startspieler:").pack()
        tk.Button(choice_window, text="Spieler", command=lambda: (choice_window.destroy(), self.start_game("Spieler"))).pack()
        tk.Button(choice_window, text="Computer", command=lambda: (choice_window.destroy(), self.start_game("Computer"))).pack()

    def create_board(self):
        for index in range(9):
            btn = tk.Button(self.root, text="-", width=self.button_size, height=self.button_size)
            btn.config(command=lambda b=btn, i=index: self.on_button_click(b, i))
            btn.grid(row=index//3, column=index%3, padx=0, pady=0)
            self.buttons.append(btn)

# Das Spiel starten
TicTacToe()

 [x o -] 
 [- - -] 
 [- - -] 

 [x - o] 
 [- - -] 
 [- - -] 

 [x - -] 
 [o - -] 
 [- - -] 

 [x - -] 
 [- o -] 
 [- - -] 

 [x - -] 
 [- - o] 
 [- - -] 

 [x - -] 
 [- - -] 
 [o - -] 

 [x - -] 
 [- - -] 
 [- o -] 

 [x - -] 
 [- - -] 
 [- - o] 

[0.03125, 0.03125, 0.03125, 0.0, 0.03125, 0.03125, 0.03125, 0.03125]
 [x o -] 
 [x o -] 
 [- - -] 

 [x - o] 
 [x o -] 
 [- - -] 

 [x - -] 
 [x o o] 
 [- - -] 

 [x - -] 
 [x o -] 
 [o - -] 

 [x - -] 
 [x o -] 
 [- o -] 

 [x - -] 
 [x o -] 
 [- - o] 

[0.5, 0.5, 0.5, 0.0, 0.5, 0.5]
 [x o -] 
 [x o -] 
 [o x -] 

 [x - o] 
 [x o -] 
 [o x -] 

 [x - -] 
 [x o o] 
 [o x -] 

 [x - -] 
 [x o -] 
 [o x o] 

[0.0, -1, 0.0, 0.0]
Computer gewinnt!


<__main__.TicTacToe at 0x7e7336b39b20>